def run_inception(images,
                  graph_def=None,
                  default_graph_def_fn=_default_graph_def_fn,
                  image_size=INCEPTION_DEFAULT_IMAGE_SIZE,
                  input_tensor=INCEPTION_INPUT,
                  output_tensor=INCEPTION_OUTPUT):
  """Run images through a pretrained Inception classifier.

  Args:
    images: Input tensors. Must be [batch, height, width, channels]. Input shape
      and values must be in [-1, 1], which can be achieved using
      `preprocess_image`.
    graph_def: A GraphDef proto of a pretrained Inception graph. If `None`,
      call `default_graph_def_fn` to get GraphDef.
    default_graph_def_fn: A function that returns a GraphDef. Used if
      `graph_def` is `None. By default, returns a pretrained InceptionV3 graph.
    image_size: Required image width and height. See unit tests for the default
      values.
    input_tensor: Name of input Tensor.
    output_tensor: Name or list of output Tensors. This function will compute
      activations at the specified layer. Examples include INCEPTION_V3_OUTPUT
      and INCEPTION_V3_FINAL_POOL which would result in this function computing
      the final logits or the penultimate pooling layer.

  Returns:
    Tensor or Tensors corresponding to computed `output_tensor`.

  Raises:
    ValueError: If images are not the correct size.
    ValueError: If neither `graph_def` nor `default_graph_def_fn` are provided.
  """
  images = _validate_images(images, image_size)

  if graph_def is None:
    if default_graph_def_fn is None:
      raise ValueError('If `graph_def` is `None`, must provide '
                       '`default_graph_def_fn`.')
    graph_def = default_graph_def_fn()

  activations = run_image_classifier(images, graph_def, input_tensor,
                                     output_tensor)
  if isinstance(activations, list):
    for i, activation in enumerate(activations):
      if array_ops.rank(activation) != 2:
        activations[i] = layers.flatten(activation)
  else:
    if array_ops.rank(activations) != 2:
      activations = layers.flatten(activations)

  return activations
def run_inception(images,
                  graph_def=None,
                  default_graph_def_fn=_default_graph_def_fn,
                  image_size=INCEPTION_DEFAULT_IMAGE_SIZE,
                  input_tensor=INCEPTION_INPUT,
                  output_tensor=INCEPTION_OUTPUT):
    """Run images through a pretrained Inception classifier.

  Args:
    images: Input tensors. Must be [batch, height, width, channels]. Input shape
      and values must be in [-1, 1], which can be achieved using
      `preprocess_image`.
    graph_def: A GraphDef proto of a pretrained Inception graph. If `None`,
      call `default_graph_def_fn` to get GraphDef.
    default_graph_def_fn: A function that returns a GraphDef. Used if
      `graph_def` is `None. By default, returns a pretrained InceptionV3 graph.
    image_size: Required image width and height. See unit tests for the default
      values.
    input_tensor: Name of input Tensor.
    output_tensor: Name or list of output Tensors. This function will compute
      activations at the specified layer. Examples include INCEPTION_V3_OUTPUT
      and INCEPTION_V3_FINAL_POOL which would result in this function computing
      the final logits or the penultimate pooling layer.

  Returns:
    Tensor or Tensors corresponding to computed `output_tensor`.

  Raises:
    ValueError: If images are not the correct size.
    ValueError: If neither `graph_def` nor `default_graph_def_fn` are provided.
  """
    images = _validate_images(images, image_size)

    if graph_def is None:
        if default_graph_def_fn is None:
            raise ValueError('If `graph_def` is `None`, must provide '
                             '`default_graph_def_fn`.')
        graph_def = default_graph_def_fn()

    activations = run_image_classifier(images, graph_def, input_tensor,
                                       output_tensor)
    if isinstance(activations, list):
        for i, activation in enumerate(activations):
            if array_ops.rank(activation) != 2:
                activations[i] = layers.flatten(activation)
    else:
        if array_ops.rank(activations) != 2:
            activations = layers.flatten(activations)

    return activations
Beispiel #3
0
def condition_tensor(tensor, conditioning):
    """Condition the value of a tensor.

  Conditioning scheme based on https://arxiv.org/abs/1609.03499.

  Args:
    tensor: A minibatch tensor to be conditioned.
    conditioning: A minibatch Tensor of to condition on. Must be 2D, with first
      dimension the same as `tensor`.

  Returns:
    `tensor` conditioned on `conditioning`.

  Raises:
    ValueError: If the non-batch dimensions of `tensor` aren't fully defined.
    ValueError: If `conditioning` isn't at least 2D.
    ValueError: If the batch dimension for the input Tensors don't match.
  """
    tensor.shape[1:].assert_is_fully_defined()
    num_features = tensor.shape[1:].num_elements()
    if conditioning.shape.ndims < 2:
        raise ValueError(
            'conditioning must be at least 2D, but saw shape: %s' %
            conditioning.shape)

    mapped_conditioning = layers.linear(layers.flatten(conditioning),
                                        num_features)
    if not mapped_conditioning.shape.is_compatible_with(tensor.shape):
        mapped_conditioning = array_ops.reshape(mapped_conditioning,
                                                _get_shape(tensor))
    return tensor + mapped_conditioning
def condition_tensor(tensor, conditioning):
  """Condition the value of a tensor.

  Conditioning scheme based on https://arxiv.org/abs/1609.03499.

  Args:
    tensor: A minibatch tensor to be conditioned.
    conditioning: A minibatch Tensor of to condition on. Must be 2D, with first
      dimension the same as `tensor`.

  Returns:
    `tensor` conditioned on `conditioning`.

  Raises:
    ValueError: If the non-batch dimensions of `tensor` aren't fully defined.
    ValueError: If `conditioning` isn't at least 2D.
    ValueError: If the batch dimension for the input Tensors don't match.
  """
  tensor.shape[1:].assert_is_fully_defined()
  num_features = tensor.shape[1:].num_elements()
  if conditioning.shape.ndims < 2:
    raise ValueError('conditioning must be at least 2D, but saw shape: %s'
                     % conditioning.shape)

  mapped_conditioning = layers.linear(
      layers.flatten(conditioning), num_features)
  if not mapped_conditioning.shape.is_compatible_with(tensor.shape):
    mapped_conditioning = array_ops.reshape(
        mapped_conditioning, _get_shape(tensor))
  return tensor + mapped_conditioning
Beispiel #5
0
def _embedding_alexnet(is_training, images, params):
    with tf.variable_scope('Siamese', 'CFCASiamese', [images], reuse=tf.AUTO_REUSE):
        with arg_scope(
                [layers.conv2d], activation_fn=tf.nn.relu):
            net = layers.conv2d(
                images, 96, [11, 11], 4, padding='VALID', scope='conv1')
            # net = layers.batch_norm(net, decay=0.9, epsilon=1e-06, is_training=is_training)
            net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool1')
            net = layers.conv2d(net, 256, [5, 5], scope='conv2')
            # net = layers.batch_norm(net, decay=0.9, epsilon=1e-06, is_training=is_training)
            net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool2')
            net = layers_lib.dropout(
                net, keep_prob=0.7, is_training=is_training)
            net = layers.conv2d(net, 384, [3, 3], scope='conv3')
            net = layers.conv2d(net, 256, [3, 3], scope='conv4')
            net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool5')
            net = layers_lib.dropout(
                net, keep_prob=0.7, is_training=is_training)
            net = layers_lib.flatten(net, scope='flatten1')
            net = layers_lib.fully_connected(net, 1024, scope='fc1',
                                             weights_regularizer=layers.l2_regularizer(0.0005))
            net = layers_lib.dropout(
                net, keep_prob=0.5, is_training=is_training)
            net = layers_lib.fully_connected(net, params.embedding_size, scope='fc2',
                                             weights_regularizer=layers.l2_regularizer(0.0005))
            return net
def slim_net_original(image, keep_prob):
    with arg_scope([layers.conv2d, layers.fully_connected], biases_initializer=tf.random_normal_initializer(stddev=0.1)):

        # conv2d(inputs, num_outputs, kernel_size, stride=1, padding='SAME',
        # activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None,
        # weights_initializer=initializers.xavier_initializer(), weights_regularizer=None,
        # biases_initializer=init_ops.zeros_initializer, biases_regularizer=None, scope=None):
        net = layers.conv2d(image, 32, [5, 5], scope='conv1', weights_regularizer=regularizers.l1_regularizer(0.5))

        # max_pool(inputs, kernel_size, stride=2, padding='VALID', scope=None)
        net = layers.max_pool2d(net, 2, scope='pool1')

        net = layers.conv2d(net, 64, [5, 5], scope='conv2', weights_regularizer=regularizers.l2_regularizer(0.5))
        summaries.summarize_tensor(net, tag='conv2')

        net = layers.max_pool2d(net, 2, scope='pool2')

        net = layers.flatten(net, scope='flatten1')

        # fully_connected(inputs, num_outputs, activation_fn=nn.relu, normalizer_fn=None,
        # normalizer_params=None, weights_initializer=initializers.xavier_initializer(),
        # weights_regularizer=None, biases_initializer=init_ops.zeros_initializer,
        # biases_regularizer=None, scope=None):
        net = layers.fully_connected(net, 1024, scope='fc1')

        # dropout(inputs, keep_prob=0.5, is_training=True, scope=None)
        net = layers.dropout(net, keep_prob=keep_prob, scope='dropout1')

        net = layers.fully_connected(net, 10, scope='fc2')
    return net
Beispiel #7
0
def main(_):

    dropout_on = tf.placeholder(tf.float32)
    if dropout_on is not None:
        conv_keep_prob = 1.0
    else:
        conv_keep_prob = 1.0

    x = tf.placeholder(tf.float32, shape=[None, 14 * 4])
    y_ = tf.placeholder(tf.float32, shape=[None, 2])

    x_image = tf.reshape([-1, 14, 4, 1])

    n_conv1 = 384  # TBD
    L_conv1 = 9  # TBD
    maxpool_len1 = 2
    conv1 = convolution2d(x_image,
                          n_conv1, [L_conv1, 4],
                          padding="VALID",
                          normalizer_fn=None)
    conv1_pool_len = int((14 - L_conv1 + 1) / maxpool_len1)

    n_conv2 = n_conv1
    L_conv2 = 5
    maxpool_len2 = int(
        conv1_pool_len - L_conv2 +
        1)  # global maxpooling (max-pool across temporal domain)
    conv2 = convolution2d(conv1_pool,
                          n_conv2, [L_conv2, 1],
                          padding='VALID',
                          normalizer_fn=None)
    conv2_pool = max_pool2d(conv2, [maxpool_len2, 1], [maxpool_len2, 1])
    # conv2_drop = tf.nn.dropout(conv2_pool, conv_keep_prob)

    # LINEAR FC LAYER
    y_conv = fully_connected(flatten(conv2_pool), 2, activation_fn=None)
    y_conv_softmax = tf.nn.softmax(y_conv)

    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(y_conv, y_))
    train_step = tf.train.AdamOptimizer().minimize(cross_entropy)
    correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    sess.run(tf.initialize_all_variables())
Beispiel #8
0
    def __init__(self, sequence_length, num_classes):

        #placeholders for input, output and dropout
        self.input_x = tf.placeholder(tf.float32, [None, sequence_length],
                                      name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes],
                                      name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")

        x_image = tf.reshape(self.input_x, shape=[-1, 14, 4, 1])

        n_conv1 = 44
        L_conv1 = 5
        maxpool_len1 = 2
        conv1 = convolution2d(x_image,
                              n_conv1, [L_conv1, 4],
                              padding='VALID',
                              normalizer_fn=None)
        conv1_pool = max_pool2d(conv1, [maxpool_len1, 1], [maxpool_len1, 1])
        conv1_pool_len = int((101 - L_conv1 + 1) / maxpool_len1)

        # n_conv2 = n_conv1
        # L_conv2 = 3
        # maxpool_len2 = int(conv1_pool_len - L_conv2 + 1)  # global maxpooling (max-pool across temporal domain)
        # conv2 = convolution2d(conv1_pool, n_conv2, [L_conv2, 1], padding='VALID', normalizer_fn=None)
        # conv2_pool = max_pool2d(conv2, [maxpool_len2, 1], [maxpool_len2, 1])

        # LINEAR FC LAYER
        y_conv = fully_connected(flatten(conv1_pool), 2, activation_fn=None)
        prediction = tf.nn.softmax(y_conv)

        self.cross_entropy = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=y_conv,
                                                    labels=self.input_y))
        # train_step = tf.train.AdamOptimizer().minimize(cross_entropy)

        correct_prediction = tf.equal(tf.argmax(prediction, 1),
                                      tf.argmax(self.input_y, 1))
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
def slim_net_original(image, keep_prob):
    with arg_scope(
        [layers.conv2d, layers.fully_connected],
            biases_initializer=tf.random_normal_initializer(stddev=0.1)):

        # conv2d(inputs, num_outputs, kernel_size, stride=1, padding='SAME',
        # activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None,
        # weights_initializer=initializers.xavier_initializer(), weights_regularizer=None,
        # biases_initializer=init_ops.zeros_initializer, biases_regularizer=None, scope=None):
        net = layers.conv2d(
            image,
            32, [5, 5],
            scope='conv1',
            weights_regularizer=regularizers.l1_regularizer(0.5))

        # max_pool(inputs, kernel_size, stride=2, padding='VALID', scope=None)
        net = layers.max_pool2d(net, 2, scope='pool1')

        net = layers.conv2d(
            net,
            64, [5, 5],
            scope='conv2',
            weights_regularizer=regularizers.l2_regularizer(0.5))
        summaries.summarize_tensor(net, tag='conv2')

        net = layers.max_pool2d(net, 2, scope='pool2')

        net = layers.flatten(net, scope='flatten1')

        # fully_connected(inputs, num_outputs, activation_fn=nn.relu, normalizer_fn=None,
        # normalizer_params=None, weights_initializer=initializers.xavier_initializer(),
        # weights_regularizer=None, biases_initializer=init_ops.zeros_initializer,
        # biases_regularizer=None, scope=None):
        net = layers.fully_connected(net, 1024, scope='fc1')

        # dropout(inputs, keep_prob=0.5, is_training=True, scope=None)
        net = layers.dropout(net, keep_prob=keep_prob, scope='dropout1')

        net = layers.fully_connected(net, 10, scope='fc2')
    return net
        def build_layer_fn(x, w_initializer, b_initializer):
            var_collection = {
                'weights': ['CONTRIB_LAYERS_FC_WEIGHTS'],
                'biases': ['CONTRIB_LAYERS_FC_BIASES']
            }
            x = contrib_layers.flatten(x)
            net = contrib_layers.fully_connected(
                x,
                3,
                weights_initializer=w_initializer,
                biases_initializer=b_initializer,
                variables_collections=var_collection)
            weight_vars = ops.get_collection('CONTRIB_LAYERS_FC_WEIGHTS')
            self.assertEquals(1, len(weight_vars))
            bias_vars = ops.get_collection('CONTRIB_LAYERS_FC_BIASES')
            self.assertEquals(1, len(bias_vars))
            expected_normalized_vars = {
                'contrib.layers.fully_connected.weights': weight_vars[0]
            }
            expected_not_normalized_vars = {
                'contrib.layers.fully_connected.bias': bias_vars[0]
            }

            return net, expected_normalized_vars, expected_not_normalized_vars
    def build_layer_fn(x, w_initializer, b_initializer):
      var_collection = {
          'weights': ['CONTRIB_LAYERS_FC_WEIGHTS'],
          'biases': ['CONTRIB_LAYERS_FC_BIASES']
      }
      x = contrib_layers.flatten(x)
      net = contrib_layers.fully_connected(
          x,
          3,
          weights_initializer=w_initializer,
          biases_initializer=b_initializer,
          variables_collections=var_collection)
      weight_vars = ops.get_collection('CONTRIB_LAYERS_FC_WEIGHTS')
      self.assertEquals(1, len(weight_vars))
      bias_vars = ops.get_collection('CONTRIB_LAYERS_FC_BIASES')
      self.assertEquals(1, len(bias_vars))
      expected_normalized_vars = {
          'contrib.layers.fully_connected.weights': weight_vars[0]
      }
      expected_not_normalized_vars = {
          'contrib.layers.fully_connected.bias': bias_vars[0]
      }

      return net, expected_normalized_vars, expected_not_normalized_vars
Beispiel #12
0
def main(_):
    global _train_epochs_completed
    global _validation_epochs_completed
    global _test_epochs_completed
    global _datasets
    global _validation_size
    global _test_labels

    dropout_on = tf.placeholder(tf.float32)
    if dropout_on is not None:
        conv_keep_prob = 1.0
    else:
        conv_keep_prob = 1.0

    file_name = 'out_' + str(int(time.time())) + '.csv'
    f = open(file_name, 'w')  # clear file
    f.write('dataset_num,dataset_name,roc_auc\n')
    f.close()
    for dataset_num in range(0, len(_datasets)):
        load_ENCODE_k562_dataset(dataset_num)

        x = tf.placeholder(tf.float32, shape=[None, 101 * 4])
        y_ = tf.placeholder(tf.float32, shape=[None, 2])

        # Create the model
        x_image = tf.reshape(x, [-1, 101, 4, 1])

        # CONVOLUTIONAL LAYER(S)
        n_conv3 = 64
        L_conv3 = 9
        maxpool_len3 = int(101 - L_conv3 +
                           1)  # global maxpooling ("across temporal domain")
        conv3 = convolution2d(x_image,
                              n_conv3, [L_conv3, 4],
                              padding='VALID',
                              normalizer_fn=None)
        conv3_pool = max_pool2d(conv3, [maxpool_len3, 1], [maxpool_len3, 1])

        # LINEAR FC LAYER
        y_conv = fully_connected(flatten(conv3_pool), 2, activation_fn=None)
        y_conv_softmax = tf.nn.softmax(y_conv)

        cross_entropy = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(y_conv, y_))
        train_step = tf.train.AdamOptimizer().minimize(cross_entropy)
        correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        sess.run(tf.initialize_all_variables())

        i = 0
        prev_auc = 0.0001  # small value to prevent DIV0
        stop_condition = None
        t0 = time.time()
        while stop_condition is None:
            if i % 1000 == 0:
                #t0 = time.time()
                pred_validation_labels = None
                true_validation_labels = None
                prev_validation_epochs_completed = _validation_epochs_completed
                while _validation_epochs_completed - prev_validation_epochs_completed == 0:  # do in mini batches because single GTX970 has insufficient memory to test all at once
                    if _validation_size > 1024 * 5:
                        validation_batch = get_next_batch(1, 1024)
                    else:
                        validation_batch = get_next_batch(1, 64)
                    if pred_validation_labels is None:
                        pred_validation_labels = y_conv_softmax.eval(
                            feed_dict={
                                x: validation_batch[0],
                                y_: validation_batch[1]
                            })
                        true_validation_labels = validation_batch[1]
                    else:
                        pred_validation_labels = numpy.vstack([
                            pred_validation_labels,
                            y_conv_softmax.eval(feed_dict={
                                x: validation_batch[0],
                                y_: validation_batch[1]
                            })
                        ])
                        true_validation_labels = numpy.vstack(
                            [true_validation_labels, validation_batch[1]])
                fpr, tpr, _ = roc_curve(true_validation_labels[:, 0],
                                        pred_validation_labels[:, 0])
                roc_auc = auc(fpr, tpr)
                #check stop condition:
                perc_chg_auc = (roc_auc - prev_auc) / prev_auc
                #if perc_chg_auc < 0.005: # stop when auc moving average on validation set changes by <0.5%
                #    stop_condition = 1
                prev_auc = roc_auc
                print(
                    "%s, dataset %g, epoch %d, step %d, time elapsed %g, validation roc auc %g, perc chg in auc %g"
                    % (_datasets[dataset_num], dataset_num,
                       _train_epochs_completed, i, time.time() - t0, roc_auc,
                       perc_chg_auc))
                t0 = time.time()
            batch = get_next_batch(0)
            train_step.run(feed_dict={
                x: batch[0],
                y_: batch[1],
                dropout_on: 1
            })
            if i == 7000:
                stop_condition = 1
            i += 1

        pred_test_labels = None
        true_test_labels = None
        while _test_epochs_completed == 0:  # do testing in mini batches because single GTX970 has insufficient memory to test all at once
            test_batch = get_next_batch(2, 64)
            if pred_test_labels is None:
                pred_test_labels = y_conv_softmax.eval(feed_dict={
                    x: test_batch[0],
                    y_: test_batch[1]
                })
                true_test_labels = test_batch[1]
            else:
                pred_test_labels = numpy.vstack([
                    pred_test_labels,
                    y_conv_softmax.eval(feed_dict={
                        x: test_batch[0],
                        y_: test_batch[1]
                    })
                ])
                true_test_labels = numpy.vstack(
                    [true_test_labels, test_batch[1]])
        fpr, tpr, _ = roc_curve(true_test_labels[:, 0], pred_test_labels[:, 0])
        roc_auc = auc(fpr, tpr)
        print("%s, dataset %g, final test roc auc %g" %
              (_datasets[dataset_num], dataset_num, roc_auc))
        f = open(file_name, 'a')
        f.write(
            str(dataset_num) + ',' + _datasets[dataset_num] + ',' +
            str(roc_auc) + '\n')
        f.close()
Beispiel #13
0
def main(_):
    global _train_epochs_completed
    global _validation_epochs_completed
    global _test_epochs_completed
    global _datasets
    global _validation_size
    global _test_labels

    dropout_on = tf.placeholder(tf.float32)
    if dropout_on is not None:
        rnn_keep_prob = 0.5
    else:
        rnn_keep_prob = 1.0

    file_name = 'out_' + str(int(time.time())) + '.csv'
    f = open(file_name, 'w')  # clear file
    f.write('dataset_num,dataset_name,roc_auc\n')
    f.close()
    for dataset_num in range(0, len(_datasets)):
        load_ENCODE_k562_dataset(dataset_num)

        # LSTM Network Parameters ============================
        n_hidden = 32  # hidden layer num of features
        # ====================================================
        n_input = 4  # data input (4 possible dna bases)
        n_steps = 101  # timesteps (101 dna bases)
        n_classes = 2  # total classes (binary classification for binding/nonbinding)

        x = tf.placeholder(tf.float32, shape=[None, n_steps * n_input])
        y_ = tf.placeholder(tf.float32, shape=[None, 2])

        # Create the model
        x_image = tf.reshape(x, [-1, n_steps, n_input])
        birnn_out = BiRNN(x_image, n_input, n_steps, n_hidden)

        # Linear activation
        # rnn_out = birnn_out[-1] # ...using LAST of the rnn inner loops (as in the MNIST example)
        rnn_out = tf.div(tf.add_n(birnn_out),
                         n_steps)  # ...using AVERAGE of the rnn inner loops
        rnn_out_drop = tf.nn.dropout(
            rnn_out, rnn_keep_prob)  # apply dropout to regularize the LSTM
        pred = fully_connected(flatten(rnn_out_drop), 2, activation_fn=None)
        pred_softmax = tf.nn.softmax(pred)

        cross_entropy = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(pred, y_))
        train_step = tf.train.AdamOptimizer().minimize(cross_entropy)
        sess.run(tf.initialize_all_variables())

        i = 0
        prev_auc = 0.0001  # small value to prevent DIV0
        stop_condition = None
        t0 = time.time()
        while stop_condition is None:
            if i % 1000 == 0:
                #t0 = time.time()
                pred_validation_labels = None
                true_validation_labels = None
                prev_validation_epochs_completed = _validation_epochs_completed
                while _validation_epochs_completed - prev_validation_epochs_completed == 0:  # do in mini batches because single GTX970 has insufficient memory to test all at once
                    if _validation_size > 1024 * 5:
                        validation_batch = get_next_batch(1, 1024)
                    else:
                        validation_batch = get_next_batch(1, 64)
                    if pred_validation_labels is None:
                        pred_validation_labels = pred_softmax.eval(
                            feed_dict={
                                x: validation_batch[0],
                                y_: validation_batch[1]
                            })
                        true_validation_labels = validation_batch[1]
                    else:
                        pred_validation_labels = numpy.vstack([
                            pred_validation_labels,
                            pred_softmax.eval(feed_dict={
                                x: validation_batch[0],
                                y_: validation_batch[1]
                            })
                        ])
                        true_validation_labels = numpy.vstack(
                            [true_validation_labels, validation_batch[1]])
                fpr, tpr, _ = roc_curve(true_validation_labels[:, 0],
                                        pred_validation_labels[:, 0])
                roc_auc = auc(fpr, tpr)
                #check stop condition:
                perc_chg_auc = (roc_auc - prev_auc) / prev_auc
                #if perc_chg_auc < 0.005: # stop when auc moving average on validation set changes by <0.5%
                #    stop_condition = 1
                prev_auc = roc_auc
                print(
                    "%s, dataset %g, epoch %d, step %d, time elapsed %g, validation roc auc %g, perc chg in auc %g"
                    % (_datasets[dataset_num], dataset_num,
                       _train_epochs_completed, i, time.time() - t0, roc_auc,
                       perc_chg_auc))
                t0 = time.time()
            batch = get_next_batch(0)
            train_step.run(feed_dict={
                x: batch[0],
                y_: batch[1],
                dropout_on: 1
            })
            if i == 7000:
                stop_condition = 1
            i += 1

        pred_test_labels = None
        true_test_labels = None
        while _test_epochs_completed == 0:  # do testing in mini batches because single GTX970 has insufficient memory to test all at once
            test_batch = get_next_batch(2, 64)
            if pred_test_labels is None:
                pred_test_labels = pred_softmax.eval(feed_dict={
                    x: test_batch[0],
                    y_: test_batch[1]
                })
                true_test_labels = test_batch[1]
            else:
                pred_test_labels = numpy.vstack([
                    pred_test_labels,
                    pred_softmax.eval(feed_dict={
                        x: test_batch[0],
                        y_: test_batch[1]
                    })
                ])
                true_test_labels = numpy.vstack(
                    [true_test_labels, test_batch[1]])
        fpr, tpr, _ = roc_curve(true_test_labels[:, 0], pred_test_labels[:, 0])
        roc_auc = auc(fpr, tpr)
        print("%s, dataset %g, final test roc auc %g" %
              (_datasets[dataset_num], dataset_num, roc_auc))
        f = open(file_name, 'a')
        f.write(
            str(dataset_num) + ',' + _datasets[dataset_num] + ',' +
            str(roc_auc) + '\n')
        f.close()
Beispiel #14
0
def main(_):
    global _train_epochs_completed
    global _validation_epochs_completed
    global _test_epochs_completed
    global _datasets
    global _validation_size
    global _test_labels

    dropout_on = tf.placeholder(tf.float32)
    if dropout_on is not None:
        conv_keep_prob = 1.0
    else:
        conv_keep_prob = 1.0

    file_name = 'out_' + str(int(time.time())) + '.csv'
    f = open(file_name, 'w')  # clear file
    f.write(
        'dataset_num,motif_discovery=0|motif_occupancy=1,dataset_name,roc_auc,prc_auc,time(sec)\n'
    )
    f.close()
    for dataset_num in range(0, len(_datasets)):
        for motif_occ in range(0, 2):
            success = False
            try:
                load_ENCODE_k562_dataset(dataset_num, motif_occ)
                success = True
            except:
                print('Hmm.. Something happened. Skipping dataset ' +
                      _datasets[dataset_num])
            if success:
                with tf.variable_scope('scopename_' + str(dataset_num) + '_' +
                                       str(motif_occ)):
                    # LSTM Parameters ============================
                    lstm_n_hidden = 32  # hidden layer num features
                    # ============================================

                    x = tf.placeholder(tf.float32, shape=[None, 101 * 4])
                    y_ = tf.placeholder(tf.float32, shape=[None, 2])

                    # Create the model
                    x_image = tf.reshape(x, [-1, 101, 4, 1])

                    # CONVOLUTIONAL LAYER(S)
                    n_conv1 = 384
                    L_conv1 = 9
                    maxpool_len1 = 2
                    conv1 = convolution2d(x_image,
                                          n_conv1, [L_conv1, 4],
                                          padding='VALID',
                                          normalizer_fn=None)
                    conv1_pool = max_pool2d(conv1, [maxpool_len1, 1],
                                            [maxpool_len1, 1])
                    #conv1_drop = tf.nn.dropout(conv1_pool, conv_keep_prob)
                    conv1_pool_len = int((101 - L_conv1 + 1) / maxpool_len1)

                    n_conv2 = n_conv1
                    L_conv2 = 5
                    maxpool_len2 = int(
                        conv1_pool_len - L_conv2 + 1
                    )  # global maxpooling (max-pool across temporal domain)
                    conv2 = convolution2d(conv1_pool,
                                          n_conv2, [L_conv2, 1],
                                          padding='VALID',
                                          normalizer_fn=None)
                    conv2_pool = max_pool2d(conv2, [maxpool_len2, 1],
                                            [maxpool_len2, 1])
                    #conv2_drop = tf.nn.dropout(conv2_pool, conv_keep_prob)

                    # LINEAR FC LAYER
                    y_conv = fully_connected(flatten(conv2_pool),
                                             2,
                                             activation_fn=None)
                    y_conv_softmax = tf.nn.softmax(y_conv)

                    cross_entropy = tf.reduce_mean(
                        tf.nn.softmax_cross_entropy_with_logits(y_conv, y_))
                    train_step = tf.train.AdamOptimizer().minimize(
                        cross_entropy)
                    correct_prediction = tf.equal(tf.argmax(y_conv, 1),
                                                  tf.argmax(y_, 1))
                    accuracy = tf.reduce_mean(
                        tf.cast(correct_prediction, tf.float32))
                    sess.run(tf.initialize_all_variables())

                    i = 0
                    prev_auc = 0.0001  # small value to prevent DIV0
                    stop_condition = None
                    t0 = time.time()
                    while stop_condition is None:
                        #if i%100 == 0:
                        if 1 == 0:  # turned off
                            #t0 = time.time()
                            pred_validation_labels = None
                            true_validation_labels = None
                            prev_validation_epochs_completed = _validation_epochs_completed
                            while _validation_epochs_completed - prev_validation_epochs_completed == 0:  # do in mini batches because single GTX970 has insufficient memory to test all at once
                                if _validation_size > 1024 * 5:
                                    validation_batch = get_next_batch(1, 1024)
                                else:
                                    validation_batch = get_next_batch(1, 64)
                                if pred_validation_labels is None:
                                    pred_validation_labels = y_conv_softmax.eval(
                                        feed_dict={
                                            x: validation_batch[0],
                                            y_: validation_batch[1]
                                        })
                                    true_validation_labels = validation_batch[
                                        1]
                                else:
                                    pred_validation_labels = numpy.vstack([
                                        pred_validation_labels,
                                        y_conv_softmax.eval(
                                            feed_dict={
                                                x: validation_batch[0],
                                                y_: validation_batch[1]
                                            })
                                    ])
                                    true_validation_labels = numpy.vstack([
                                        true_validation_labels,
                                        validation_batch[1]
                                    ])
                            fpr, tpr, _ = roc_curve(
                                true_validation_labels[:, 0],
                                pred_validation_labels[:, 0])
                            roc_auc = auc(fpr, tpr)
                            #check stop condition:
                            perc_chg_auc = (roc_auc - prev_auc) / prev_auc
                            #if perc_chg_auc < 0.005: # stop when auc moving average on validation set changes by <0.5%
                            #    stop_condition = 1
                            prev_auc = roc_auc
                            print(
                                "%s, dataset %g, epoch %d, step %d, time elapsed %g, validation roc auc %g, perc chg in auc %g"
                                % (_datasets[dataset_num],
                                   dataset_num, _train_epochs_completed, i,
                                   time.time() - t0, roc_auc, perc_chg_auc))
                            t0 = time.time()
                        batch = get_next_batch(0)
                        train_step.run(feed_dict={
                            x: batch[0],
                            y_: batch[1],
                            dropout_on: 1
                        })
                        if i == 7000:
                            stop_condition = 1
                        i += 1

                    pred_test_labels = None
                    true_test_labels = None
                    while _test_epochs_completed == 0:  # do testing in mini batches because single GTX970 has insufficient memory to test all at once
                        test_batch = get_next_batch(2, 64)
                        if pred_test_labels is None:
                            pred_test_labels = y_conv_softmax.eval(
                                feed_dict={
                                    x: test_batch[0],
                                    y_: test_batch[1]
                                })
                            true_test_labels = test_batch[1]
                        else:
                            pred_test_labels = numpy.vstack([
                                pred_test_labels,
                                y_conv_softmax.eval(feed_dict={
                                    x: test_batch[0],
                                    y_: test_batch[1]
                                })
                            ])
                            true_test_labels = numpy.vstack(
                                [true_test_labels, test_batch[1]])
                    fpr, tpr, _ = roc_curve(
                        true_test_labels[:, 0], pred_test_labels[:, 0]
                    )  # get receiver operating characteristics
                    precision, recall, _ = precision_recall_curve(
                        true_test_labels[:, 0],
                        pred_test_labels[:, 0])  # get precision recall curve
                    roc_auc = auc(fpr, tpr)
                    prc_auc = auc(recall, precision)
                    print(
                        "%s, dataset %g, final test roc auc %g, final test prc auc %g, time elapsed %g seconds"
                        % (_datasets[dataset_num], dataset_num, roc_auc,
                           prc_auc, time.time() - t0))
                    f = open(file_name, 'a')
                    f.write(
                        str(dataset_num) + ',' + str(motif_occ) + ',' +
                        _datasets[dataset_num] + ',' + str(roc_auc) + ',' +
                        str(prc_auc) + ',' + str(time.time() - t0) + '\n')
                    f.close()
                    t0 = time.time()
Beispiel #15
0
def main(_):
    global _train_epochs_completed
    global _validation_epochs_completed
    global _test_epochs_completed
    global _datasets
    global _validation_size
    global _test_labels

    dropout_on = tf.placeholder(tf.float32)
    if dropout_on is not None:
        rnn_keep_prob = 1.0
    else:
        rnn_keep_prob = 1.0

    file_name = 'out_' + str(int(time.time())) + '.csv'
    f=open(file_name,'w') # clear file
    f.write('dataset_num,motif_discovery=0|motif_occupancy=1,dataset_name,roc_auc,prc_auc,time(sec)\n')
    f.close()
    for dataset_num in range(106, len(_datasets)):
        for motif_occ in range(0,2):
            success = False
            try:
                load_ENCODE_k562_dataset(dataset_num,motif_occ)
                success = True
            except:
                print('Hmm.. Something happened. Skipping dataset ' + _datasets[dataset_num])
            if success:
                with tf.variable_scope('scopename_' + str(dataset_num) + '_' + str(motif_occ)):
                    # LSTM Parameters ============================
                    lstm_n_hidden = 32 # hidden layer num features
                    # ============================================

                    x = tf.placeholder(tf.float32, shape=[None, 101*4])
                    y_ = tf.placeholder(tf.float32, shape=[None, 2])

                    # Create the model
                    x_image = tf.reshape(x, [-1,101,4,1])

                    # CONVOLUTIONAL LAYER(S)
                    n_conv1 = 128
                    L_conv1 = 9
                    n_steps1 = (101-L_conv1+1)
                    conv1 = convolution2d(x_image, n_conv1, [L_conv1,4], padding='VALID', normalizer_fn=None)
                    conv1_resh = tf.reshape(conv1, [-1,n_steps1,n_conv1])

                    # LSTM LAYER(S)
                    conv1_unpacked = tf.unpack(conv1_resh, axis=1) # this func does it all for us :)
                    lstm_fw_cell = rnn_cell.BasicLSTMCell(lstm_n_hidden)
                    lstm_bw_cell = rnn_cell.BasicLSTMCell(lstm_n_hidden)
                    birnn_out,_,_ = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, conv1_unpacked, dtype=tf.float32)
                    
                    # Linear activation
                    # rnn_out = birnn_out[-1] # to use LAST of the rnn inner loops (as in the MNIST example)
                    rnn_out = tf.div(tf.add_n(birnn_out), 101) # to use the AVERAGE of the rnn inner loops
                    rnn_out_drop = tf.nn.dropout(rnn_out, rnn_keep_prob) # apply dropout to regularize the LSTM
                    pred = fully_connected(flatten(rnn_out_drop), 2, activation_fn=None)
                    pred_softmax = tf.nn.softmax(pred)

                    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y_))
                    train_step = tf.train.AdamOptimizer().minimize(cross_entropy)
                    sess.run(tf.initialize_all_variables())
                    
                    i = 0
                    prev_auc = 0.0001 # small value to prevent DIV0
                    stop_condition = None
                    t0 = time.time()
                    while stop_condition is None:
                        #if i%100 == 0:
                        if 1 == 0: # turned off
                            #t0 = time.time()
                            pred_validation_labels = None
                            true_validation_labels = None
                            prev_validation_epochs_completed = _validation_epochs_completed
                            while _validation_epochs_completed - prev_validation_epochs_completed == 0: # do in mini batches because single GTX970 has insufficient memory to test all at once
                                if _validation_size > 1024*5:
                                    validation_batch = get_next_batch(1,1024)
                                else:
                                    validation_batch = get_next_batch(1,64)
                                if pred_validation_labels is None:
                                    pred_validation_labels = pred_softmax.eval(feed_dict={x: validation_batch[0], y_: validation_batch[1]})
                                    true_validation_labels = validation_batch[1]
                                else:
                                    pred_validation_labels = numpy.vstack([pred_validation_labels, pred_softmax.eval(feed_dict={x: validation_batch[0], y_: validation_batch[1]})])
                                    true_validation_labels = numpy.vstack([true_validation_labels, validation_batch[1]])
                            fpr, tpr, _ = roc_curve(true_validation_labels[:,0], pred_validation_labels[:,0])
                            roc_auc = auc(fpr, tpr)
                            #check stop condition:
                            perc_chg_auc = (roc_auc - prev_auc) / prev_auc
                            #if perc_chg_auc < 0.005: # stop when auc moving average on validation set changes by <0.5%
                            #    stop_condition = 1
                            prev_auc = roc_auc
                            print("%s, dataset %g, epoch %d, step %d, time elapsed %g, validation roc auc %g, perc chg in auc %g"%(_datasets[dataset_num], dataset_num, _train_epochs_completed, i, time.time()-t0, roc_auc, perc_chg_auc))
                            t0 = time.time()
                        batch = get_next_batch(0)
                        train_step.run(feed_dict={x: batch[0], y_: batch[1], dropout_on: 1})
                        if i == 4800:
                            stop_condition = 1
                        i += 1

                    pred_test_labels = None
                    true_test_labels = None
                    while _test_epochs_completed == 0: # do testing in mini batches because single GTX970 has insufficient memory to test all at once
                        test_batch = get_next_batch(2, 64)
                        if pred_test_labels is None:
                            pred_test_labels = pred_softmax.eval(feed_dict={x: test_batch[0], y_: test_batch[1]})
                            true_test_labels = test_batch[1]
                        else:
                            pred_test_labels = numpy.vstack([pred_test_labels, pred_softmax.eval(feed_dict={x: test_batch[0], y_: test_batch[1]})])
                            true_test_labels = numpy.vstack([true_test_labels, test_batch[1]])
                    fpr, tpr, _ = roc_curve(true_test_labels[:,0], pred_test_labels[:,0]) # get receiver operating characteristics
                    precision, recall, _ = precision_recall_curve(true_test_labels[:,0], pred_test_labels[:,0]) # get precision recall curve
                    roc_auc = auc(fpr, tpr)
                    prc_auc = auc(recall, precision)
                    print("%s, dataset %g, final test roc auc %g, final test prc auc %g, time elapsed %g seconds"%(_datasets[dataset_num], dataset_num, roc_auc, prc_auc, time.time()-t0))
                    f=open(file_name,'a')
                    f.write(str(dataset_num) + ',' + str(motif_occ) + ',' + _datasets[dataset_num] + ',' + str(roc_auc) + ',' + str(prc_auc) + ',' + str(time.time()-t0) + '\n')
                    f.close()
                    t0 = time.time()
Beispiel #16
0
def main(_):
    global _train_epochs_completed
    global _validation_epochs_completed
    global _test_epochs_completed
    global _datasets
    global _validation_size
    global _test_labels

    file_name = 'out_' + str(int(time.time())) + '.csv'
    f = open(file_name, 'w')  # clear file
    f.write('dataset_num,dataset_name,roc_auc\n')
    f.close()
    _datasets = utils.remove_non_existing_datafiles(_datasets)
    for dataset_num in range(0, len(_datasets)):
        load_ENCODE_k562_dataset(dataset_num)

        x = tf.placeholder(tf.float32, shape=[None, 101 * 4])
        y_ = tf.placeholder(tf.float32, shape=[None, 2])
        conv_keep_prob = tf.placeholder(tf.float32)

        # Create the model
        x_image = tf.reshape(x, [-1, 101, 4, 1])

        # CONVOLUTIONAL LAYER(S)
        n_conv1 = 64
        L_conv1 = 9
        maxpool_len1 = 2
        conv1 = convolution2d(x_image,
                              n_conv1, [L_conv1, 4],
                              padding='VALID',
                              normalizer_fn=None)
        conv1_pool = max_pool2d(conv1, [maxpool_len1, 1], [maxpool_len1, 1])
        conv1_drop = tf.nn.dropout(conv1_pool, conv_keep_prob)
        conv1_pool_len = int((101 - L_conv1 + 1) / maxpool_len1)

        n_conv2 = n_conv1
        L_conv2 = 5
        maxpool_len2 = int(
            conv1_pool_len - L_conv2 +
            1)  # global maxpooling (max-pool across temporal domain)
        conv2 = convolution2d(conv1_drop,
                              n_conv2, [L_conv2, 1],
                              padding='VALID',
                              normalizer_fn=None)
        conv2_pool = max_pool2d(conv2, [maxpool_len2, 1], [maxpool_len2, 1])
        conv2_drop = tf.nn.dropout(conv2_pool, conv_keep_prob)

        # LINEAR FC LAYER
        y_conv = fully_connected(flatten(conv2_drop), 2, activation_fn=None)
        y_conv_softmax = tf.nn.softmax(y_conv)

        cross_entropy = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=y_conv, labels=y_))
        train_step = tf.train.AdamOptimizer().minimize(cross_entropy)
        correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        sess.run(tf.initialize_all_variables())

        i = 0
        prev_auc = 0.0001  # small value to prevent DIV0
        prev_train_epochs_compl = 0
        stop_condition = None
        t0 = time.time()
        this_conv_keep_prob = 0.5
        final_keep_prob = 0.75
        while stop_condition is None:
            if i % 1000 == 0:
                #t0 = time.time()
                pred_validation_labels = None
                true_validation_labels = None
                prev_validation_epochs_completed = _validation_epochs_completed
                while _validation_epochs_completed - prev_validation_epochs_completed == 0:  # do in mini batches because single GTX970 has insufficient memory to test all at once
                    if _validation_size > 1024 * 5:
                        validation_batch = get_next_batch(1, 1024)
                    else:
                        validation_batch = get_next_batch(1, 64)
                    if pred_validation_labels is None:
                        pred_validation_labels = y_conv_softmax.eval(
                            feed_dict={
                                x: validation_batch[0],
                                y_: validation_batch[1],
                                conv_keep_prob: 1.0
                            })
                        true_validation_labels = validation_batch[1]
                    else:
                        pred_validation_labels = numpy.vstack([
                            pred_validation_labels,
                            y_conv_softmax.eval(
                                feed_dict={
                                    x: validation_batch[0],
                                    y_: validation_batch[1],
                                    conv_keep_prob: 1.0
                                })
                        ])
                        true_validation_labels = numpy.vstack(
                            [true_validation_labels, validation_batch[1]])
                fpr, tpr, _ = roc_curve(true_validation_labels[:, 0],
                                        pred_validation_labels[:, 0])
                roc_auc = auc(fpr, tpr)
                perc_chg_auc = (roc_auc - prev_auc) / prev_auc
                print(
                    "%s, dataset %g, epoch %d, step %d, time elapsed %g, validation roc auc %g, perc chg in auc %g, conv_keep_prob %g"
                    % (_datasets[dataset_num], dataset_num,
                       _train_epochs_completed, i, time.time() - t0, roc_auc,
                       perc_chg_auc, this_conv_keep_prob))
                #check stop condition:
                if i == 3000 or i == 10000 or i == 20000:  # increase keep_prob at these iteration numbers (not epochs)
                    if this_conv_keep_prob < final_keep_prob:
                        this_conv_keep_prob += 0.2
                        if this_conv_keep_prob > final_keep_prob:
                            this_conv_keep_prob = final_keep_prob
                    else:
                        stop_condition = 1
                prev_train_epochs_compl = _train_epochs_completed
                prev_auc = roc_auc
                t0 = time.time()
            batch = get_next_batch(0)
            train_step.run(feed_dict={
                x: batch[0],
                y_: batch[1],
                conv_keep_prob: this_conv_keep_prob
            })
            i += 1

        pred_test_labels = None
        true_test_labels = None
        while _test_epochs_completed == 0:  # do testing in mini batches because single GTX970 has insufficient memory to test all at once
            test_batch = get_next_batch(2, 64)
            if pred_test_labels is None:
                pred_test_labels = y_conv_softmax.eval(feed_dict={
                    x: test_batch[0],
                    y_: test_batch[1],
                    conv_keep_prob: 1.0
                })
                true_test_labels = test_batch[1]
            else:
                pred_test_labels = numpy.vstack([
                    pred_test_labels,
                    y_conv_softmax.eval(feed_dict={
                        x: test_batch[0],
                        y_: test_batch[1],
                        conv_keep_prob: 1.0
                    })
                ])
                true_test_labels = numpy.vstack(
                    [true_test_labels, test_batch[1]])
        fpr, tpr, _ = roc_curve(true_test_labels[:, 0], pred_test_labels[:, 0])
        roc_auc = auc(fpr, tpr)
        print("%s, dataset %g, final test roc auc %g" %
              (_datasets[dataset_num], dataset_num, roc_auc))
        f = open(file_name, 'a')
        f.write(
            str(dataset_num) + ',' + _datasets[dataset_num] + ',' +
            str(roc_auc) + '\n')
        f.close()