Beispiel #1
0
def run_experiment(general_cfg, seed):
    tf.set_random_seed(seed)
    np.random.seed(seed)
    ds = EnhancersData('/cs/grad/pazbu/paz/dev/projects/dnanet-v2/data')

    x = tf.placeholder(tf.float32, shape=[None, 4, 1000])
    y_ = tf.placeholder(tf.uint8, shape=[None, 2])
    keep_prob = tf.placeholder(tf.float32)

    y_conv = CNN(x, dropout_keep_prob=keep_prob)

    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_,
                                                            logits=y_conv)
    train_step = tf.train.AdamOptimizer(1e-2).minimize(cross_entropy)

    y_pred_sig = tf.sigmoid(y_conv)
    correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y_pred_sig, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    num_runs = 2
    num_epochs = general_cfg["num_epochs"]
    mini_batch_size = general_cfg["batch_size"]
    iters_per_epoch = int(ds.train.num_examples / mini_batch_size)

    with tf.Session() as sess:
        print('run, epoch, step, training accuracy, validation accuracy')
        for run_idx in range(num_runs):
            sess.run(tf.global_variables_initializer())
            for epoch_idx in range(num_epochs):
                for iter_idx in range(iters_per_epoch):
                    batch = ds.train.next_batch(mini_batch_size)
                    if iter_idx % 100 == 0:
                        # out = y_.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})
                        # print(out)
                        train_accuracy = accuracy.eval(feed_dict={
                            x: batch[0],
                            y_: batch[1],
                            keep_prob: 1.0
                        })
                        valid_accuracy = accuracy.eval(
                            feed_dict={
                                x: ds.validation.seqs,
                                y_: ds.validation.labels,
                                keep_prob: 1.0
                            })
                        print('%d, %d, %d, %g, %g' %
                              (run_idx, epoch_idx, iter_idx, train_accuracy,
                               valid_accuracy))
                    train_step.run(feed_dict={
                        x: batch[0],
                        y_: batch[1],
                        keep_prob: 0.5
                    })
            print('test accuracy %g' % accuracy.eval(feed_dict={
                x: ds.test.seqs,
                y_: ds.test.labels,
                keep_prob: 1.0
            }))
Beispiel #2
0
def run_experiment(general_cfg, seed):
    tf.set_random_seed(seed)
    np.random.seed(seed)
    data_dir_path = '/cs/grad/pazbu/paz/dev/projects/dnanet-v2/data'
    ds = EnhancersData(data_dir_path)
    log_files(data_dir_path)

    x = tf.placeholder(tf.float32, shape=[None, 4, 1000], name="x")
    y_ = tf.placeholder(tf.uint8, shape=[None, 2], name="y_")
    keep_prob = tf.placeholder(tf.float32, name="keep_prob")

    y_conv = CNN(x, dropout_keep_prob=keep_prob)

    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv)

    # attach update ops used for the batch normalization
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_step = tf.train.AdamOptimizer().minimize(cross_entropy)

    y_pred_sig = tf.sigmoid(y_conv, name="sigmoid_out")
    correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y_pred_sig, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    num_runs = general_cfg["num_runs"]
    num_epochs = general_cfg["num_epochs"]
    mini_batch_size = general_cfg["batch_size"]

    for var in tf.trainable_variables():
        tf.summary.histogram(var.name, var)
    merged_summary_op = tf.summary.merge_all()

    saver = tf.train.Saver()
    best_val_acc = 0
    with tf.Session() as sess:
        for run_idx in range(num_runs):
            sess.run(tf.global_variables_initializer())
            summary_writer = tf.summary.FileWriter("/cs/grad/pazbu/paz/dev/projects/dnanet-v2/summaries", graph=sess.graph)
            current_step = 0
            while ds.train.epochs_completed < num_epochs:
                current_step += 1
                batch = ds.train.next_batch(mini_batch_size)
                if current_step % 100 == 0:
                    train_accuracy = accuracy.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})
                    val_corrects = []
                    for x_val_batch, y_val_batch in ds.validation.single_pass_batch_iter(1000):
                        val_corrects.extend(correct_prediction.eval(feed_dict={x: x_val_batch,
                                                                               y_: y_val_batch,
                                                                               keep_prob: 1.0}))
                    valid_accuracy = sum(val_corrects) / ds.validation.num_examples
                    if best_val_acc < valid_accuracy:
                        saver.save(sess, save_path='chk', global_step=current_step)
                        best_val_acc = valid_accuracy
                    print('run: %d, epoch: %d, iteration: %d, train accuracy: %g, validation accuracy: %g' %
                          (run_idx, ds.train.epochs_completed, current_step, train_accuracy, valid_accuracy))
                summary_str, _ = sess.run([merged_summary_op, train_step], feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
                summary_writer.add_summary(summary_str, current_step)
def run_experiment(general_cfg, seed):
    tf.set_random_seed(seed)
    np.random.seed(seed)
    data_dir_path = '/cs/grad/pazbu/paz/dev/projects/dnanet-v2/data'
    ds = EnhancersData(data_dir_path)
    log_files(data_dir_path)

    x = tf.placeholder(tf.float32, shape=[None, 4, 1000])
    y_ = tf.placeholder(tf.uint8, shape=[None, 2])
    keep_prob = tf.placeholder(tf.float32)

    y_conv = CNN(x, dropout_keep_prob=keep_prob)

    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_,
                                                            logits=y_conv)
    train_step = tf.train.AdamOptimizer(1e-2).minimize(cross_entropy)

    y_pred_sig = tf.sigmoid(y_conv)
    correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y_pred_sig, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    num_runs = 2
    num_epochs = general_cfg["num_epochs"]
    mini_batch_size = general_cfg["batch_size"]
    iters_per_epoch = int(ds.train.num_examples / mini_batch_size)

    sv = tf.train.Supervisor(
        logdir="/cs/grad/pazbu/paz/dev/projects/dnanet-v2/chk")
    with sv.managed_session() as sess:
        for step in range(100000):
            if sv.should_stop():
                break
            batch = ds.train.next_batch(mini_batch_size)
            if step % 100 == 0:
                # out = y_.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})
                # print(out)
                train_accuracy = accuracy.eval(feed_dict={
                    x: batch[0],
                    y_: batch[1],
                    keep_prob: 1.0
                })
                valid_accuracy = accuracy.eval(
                    feed_dict={
                        x: ds.validation.seqs,
                        y_: ds.validation.labels,
                        keep_prob: 1.0
                    })
                print(
                    'run: %d, epoch: %d, iteration: %d, train accuracy: %g, validation accuracy: %g'
                    % (0, 0, step, train_accuracy, valid_accuracy))
            train_step.run(feed_dict={
                x: batch[0],
                y_: batch[1],
                keep_prob: 0.5
            })
Beispiel #4
0
def run_experiment(general_cfg, seed):
    tf.set_random_seed(seed)
    np.random.seed(seed)
    data_dir_path = '/cs/grad/pazbu/paz/dev/projects/dnanet-v2/data'
    ds = EnhancersData(data_dir_path)
    log_files(data_dir_path)

    x = tf.placeholder(tf.float32, shape=[None, 4, 1000])
    y_ = tf.placeholder(tf.uint8, shape=[None, 2])
    keep_prob = tf.placeholder(tf.float32)

    y_conv = CNN(x, dropout_keep_prob=keep_prob)

    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv)
    train_step = tf.train.AdamOptimizer(1e-2).minimize(cross_entropy)

    y_pred_sig = tf.sigmoid(y_conv)
    correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y_pred_sig, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    num_runs = 2
    num_epochs = general_cfg["num_epochs"]
    mini_batch_size = general_cfg["batch_size"]
    iters_per_epoch = int(ds.train.num_examples / mini_batch_size)

    for var in tf.trainable_variables():
        tf.summary.histogram(var.name, var)
    merged_summary_op = tf.summary.merge_all()

    with tf.Session() as sess:
        for run_idx in range(num_runs):
            sess.run(tf.global_variables_initializer())
            summary_writer = tf.summary.FileWriter("/cs/grad/pazbu/paz/dev/projects/dnanet-v2/summaries",
                                                   graph=sess.graph)
            for epoch_idx in range(num_epochs):
                for iter_idx in range(iters_per_epoch):
                    batch = ds.train.next_batch(mini_batch_size)
                    if iter_idx % 100 == 0:
                        train_accuracy = accuracy.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})
                        valid_accuracy = accuracy.eval(feed_dict={x: ds.validation.seqs,
                                                                  y_: ds.validation.labels,
                                                                  keep_prob: 1.0})
                        print('run: %d, epoch: %d, iteration: %d, train accuracy: %g, validation accuracy: %g' %
                              (run_idx, epoch_idx, iter_idx, train_accuracy, valid_accuracy))
                    _, summary_str = sess.run([train_step, merged_summary_op], feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
                    # summary_str = sess.run(merged_summary_op, feed_dict={x: ds.validation.seqs,
                    #                                                      y_: ds.validation.labels,
                    #                                                      keep_prob: 1.0})
                    summary_writer.add_summary(summary_str, iters_per_epoch*epoch_idx + iter_idx)
            print('test accuracy %g' % accuracy.eval(feed_dict={x: ds.test.seqs, y_: ds.test.labels, keep_prob: 1.0}))
Beispiel #5
0
def run_experiment(general_cfg, dropout_keep_prob, seed):
    tf.set_random_seed(seed)
    np.random.seed(seed)
    ds = EnhancersData(data_dir)
    log_files(data_dir)

    x = tf.placeholder(tf.float32, shape=[None, 4, general_cfg["seq_length"]], name="x")
    y_ = tf.placeholder(tf.float32, shape=[None, general_cfg["num_outs"]], name="y_")
    keep_prob = tf.placeholder(tf.float32, name="keep_prob")

    y_conv = CNN(x, dropout_keep_prob=keep_prob)

    cross_entropy = tf.losses.sigmoid_cross_entropy(multi_class_labels=y_, logits=y_conv)

    train_step = tf.train.AdamOptimizer().minimize(cross_entropy)

    y_pred_sig = tf.sigmoid(y_conv, name="sigmoid_out")
    correct_prediction = tf.equal(y_, tf.round(y_pred_sig))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    num_runs = general_cfg["num_runs"]
    num_epochs = general_cfg["num_epochs"]
    mini_batch_size = general_cfg["batch_size"]

    for var in tf.trainable_variables():
        tf.summary.histogram(var.name, var)
    merged_summary_op = tf.summary.merge_all()

    saver = tf.train.Saver()
    best_val_acc = 0
    ex_id = ex.current_run._id
    class_weights = np.sum(ds.test.labels, axis=0) / np.sum(ds.test.labels)
    with tf.Session() as sess:
        for run_idx in range(num_runs):
            chkp_dir = path.join(ex_log_dir, str(ex_id), "run" + str(run_idx))
            ds.reset()
            sess.run(tf.global_variables_initializer())
            summary_writer = tf.summary.FileWriter("/cs/grad/pazbu/paz/dev/projects/dnanet-roadmap/summaries", graph=sess.graph)
            current_step = 0
            while ds.train.epochs_completed < num_epochs:
                current_step += 1
                batch = ds.train.next_batch(mini_batch_size)
                summary_str,_ , l = sess.run([merged_summary_op, train_step, cross_entropy],
                                             feed_dict={x: batch[0], y_: batch[1], keep_prob: dropout_keep_prob})
                l_sum = np.sum(l)
                # skip to next epoch if loss is very small:
                if l_sum < 0.001:
                    print('loss < 0.001. skipping to next epoch...')
                    curr_epoch = ds.train.epochs_completed
                    while curr_epoch == ds.train.epochs_completed:
                        ds.train.next_batch(mini_batch_size)
                        current_step += 1
                    continue

                # summary_writer.add_summary(summary_str, current_step)
                if current_step % 200 == 0:
                    train_accuracy = accuracy.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})
                    y_vals = batched_eval(y_pred_sig, ds.validation.single_pass_batch_iter(1000), x, y_, keep_prob)
                    valid_accuracy = accuracy.eval(feed_dict={y_: ds.validation.labels, y_pred_sig: y_vals})
                    if best_val_acc < valid_accuracy:
                        saver.save(sess, save_path=path.join(chkp_dir, str(valid_accuracy)), global_step=current_step)
                        best_val_acc = valid_accuracy
                        ex.info["best_validation_accuracy"] = best_val_acc
                    print('run: %d, epoch: %d, iteration: %d, train accuracy: %g, validation accuracy: %g, loss: %g' %
                          (run_idx, ds.train.epochs_completed, current_step, train_accuracy, valid_accuracy, l_sum))

            y_pred = batched_eval(y_pred_sig, ds.test.single_pass_batch_iter(1000), x, y_, keep_prob)
            test_accuracy = accuracy.eval(feed_dict={y_: ds.test.labels, y_pred_sig: y_pred})
            print('test accuracy: ', test_accuracy)
            auc_rocs = calc_auc_rocs(ds.test.labels, y_pred)
            for i, auc_roc in enumerate(auc_rocs):
                print('roc-auc ' + str(i) + ":", auc_roc)
            auc_rocs_weighted_sum = np.sum(np.multiply(auc_rocs, class_weights))
            print('roc-auc weighted sum:', auc_rocs_weighted_sum)
            ex.info["final_auc_rocs_run" + str(run_idx)] = auc_rocs
Beispiel #6
0
import tensorflow as tf
import numpy as np
from enhancersdata import EnhancersData

data_dir_path = '/cs/grad/pazbu/paz/dev/projects/dnanet-v2/data'
ds = EnhancersData(data_dir_path)

with tf.Session() as sess:

    #First let's load meta graph and restore weights
    saver = tf.train.import_meta_graph('chk-0.936961433878-120900.meta')
    saver.restore(sess, tf.train.latest_checkpoint('./'))

    x = tf.placeholder(tf.float32, shape=[None, 4, 1000])
    y_ = tf.placeholder(tf.uint8, shape=[None, 2])
    keep_prob = tf.placeholder(tf.float32)

    graph = tf.get_default_graph()
    y_pred_sig = graph.get_tensor_by_name("sigmoid_out:0")
    correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y_pred_sig, 1))
    # accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    all_outcomes = []
    batch_iter = ds.test.single_pass_batch_iter(100)
    num_batches = np.ceil(ds.test.num_examples / 100)
    c = 0
    for x_batch, y_batch in batch_iter:
        out = correct_prediction.eval(feed_dict={
            "x:0": x_batch,
            y_: y_batch,
            "keep_prob:0": 1.0