def run_experiment(general_cfg, seed): tf.set_random_seed(seed) np.random.seed(seed) ds = EnhancersData('/cs/grad/pazbu/paz/dev/projects/dnanet-v2/data') x = tf.placeholder(tf.float32, shape=[None, 4, 1000]) y_ = tf.placeholder(tf.uint8, shape=[None, 2]) keep_prob = tf.placeholder(tf.float32) y_conv = CNN(x, dropout_keep_prob=keep_prob) cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv) train_step = tf.train.AdamOptimizer(1e-2).minimize(cross_entropy) y_pred_sig = tf.sigmoid(y_conv) correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y_pred_sig, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) num_runs = 2 num_epochs = general_cfg["num_epochs"] mini_batch_size = general_cfg["batch_size"] iters_per_epoch = int(ds.train.num_examples / mini_batch_size) with tf.Session() as sess: print('run, epoch, step, training accuracy, validation accuracy') for run_idx in range(num_runs): sess.run(tf.global_variables_initializer()) for epoch_idx in range(num_epochs): for iter_idx in range(iters_per_epoch): batch = ds.train.next_batch(mini_batch_size) if iter_idx % 100 == 0: # out = y_.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0}) # print(out) train_accuracy = accuracy.eval(feed_dict={ x: batch[0], y_: batch[1], keep_prob: 1.0 }) valid_accuracy = accuracy.eval( feed_dict={ x: ds.validation.seqs, y_: ds.validation.labels, keep_prob: 1.0 }) print('%d, %d, %d, %g, %g' % (run_idx, epoch_idx, iter_idx, train_accuracy, valid_accuracy)) train_step.run(feed_dict={ x: batch[0], y_: batch[1], keep_prob: 0.5 }) print('test accuracy %g' % accuracy.eval(feed_dict={ x: ds.test.seqs, y_: ds.test.labels, keep_prob: 1.0 }))
def run_experiment(general_cfg, seed): tf.set_random_seed(seed) np.random.seed(seed) data_dir_path = '/cs/grad/pazbu/paz/dev/projects/dnanet-v2/data' ds = EnhancersData(data_dir_path) log_files(data_dir_path) x = tf.placeholder(tf.float32, shape=[None, 4, 1000], name="x") y_ = tf.placeholder(tf.uint8, shape=[None, 2], name="y_") keep_prob = tf.placeholder(tf.float32, name="keep_prob") y_conv = CNN(x, dropout_keep_prob=keep_prob) cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv) # attach update ops used for the batch normalization update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_step = tf.train.AdamOptimizer().minimize(cross_entropy) y_pred_sig = tf.sigmoid(y_conv, name="sigmoid_out") correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y_pred_sig, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) num_runs = general_cfg["num_runs"] num_epochs = general_cfg["num_epochs"] mini_batch_size = general_cfg["batch_size"] for var in tf.trainable_variables(): tf.summary.histogram(var.name, var) merged_summary_op = tf.summary.merge_all() saver = tf.train.Saver() best_val_acc = 0 with tf.Session() as sess: for run_idx in range(num_runs): sess.run(tf.global_variables_initializer()) summary_writer = tf.summary.FileWriter("/cs/grad/pazbu/paz/dev/projects/dnanet-v2/summaries", graph=sess.graph) current_step = 0 while ds.train.epochs_completed < num_epochs: current_step += 1 batch = ds.train.next_batch(mini_batch_size) if current_step % 100 == 0: train_accuracy = accuracy.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0}) val_corrects = [] for x_val_batch, y_val_batch in ds.validation.single_pass_batch_iter(1000): val_corrects.extend(correct_prediction.eval(feed_dict={x: x_val_batch, y_: y_val_batch, keep_prob: 1.0})) valid_accuracy = sum(val_corrects) / ds.validation.num_examples if best_val_acc < valid_accuracy: saver.save(sess, save_path='chk', global_step=current_step) best_val_acc = valid_accuracy print('run: %d, epoch: %d, iteration: %d, train accuracy: %g, validation accuracy: %g' % (run_idx, ds.train.epochs_completed, current_step, train_accuracy, valid_accuracy)) summary_str, _ = sess.run([merged_summary_op, train_step], feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5}) summary_writer.add_summary(summary_str, current_step)
def run_experiment(general_cfg, seed): tf.set_random_seed(seed) np.random.seed(seed) data_dir_path = '/cs/grad/pazbu/paz/dev/projects/dnanet-v2/data' ds = EnhancersData(data_dir_path) log_files(data_dir_path) x = tf.placeholder(tf.float32, shape=[None, 4, 1000]) y_ = tf.placeholder(tf.uint8, shape=[None, 2]) keep_prob = tf.placeholder(tf.float32) y_conv = CNN(x, dropout_keep_prob=keep_prob) cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv) train_step = tf.train.AdamOptimizer(1e-2).minimize(cross_entropy) y_pred_sig = tf.sigmoid(y_conv) correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y_pred_sig, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) num_runs = 2 num_epochs = general_cfg["num_epochs"] mini_batch_size = general_cfg["batch_size"] iters_per_epoch = int(ds.train.num_examples / mini_batch_size) sv = tf.train.Supervisor( logdir="/cs/grad/pazbu/paz/dev/projects/dnanet-v2/chk") with sv.managed_session() as sess: for step in range(100000): if sv.should_stop(): break batch = ds.train.next_batch(mini_batch_size) if step % 100 == 0: # out = y_.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0}) # print(out) train_accuracy = accuracy.eval(feed_dict={ x: batch[0], y_: batch[1], keep_prob: 1.0 }) valid_accuracy = accuracy.eval( feed_dict={ x: ds.validation.seqs, y_: ds.validation.labels, keep_prob: 1.0 }) print( 'run: %d, epoch: %d, iteration: %d, train accuracy: %g, validation accuracy: %g' % (0, 0, step, train_accuracy, valid_accuracy)) train_step.run(feed_dict={ x: batch[0], y_: batch[1], keep_prob: 0.5 })
def run_experiment(general_cfg, seed): tf.set_random_seed(seed) np.random.seed(seed) data_dir_path = '/cs/grad/pazbu/paz/dev/projects/dnanet-v2/data' ds = EnhancersData(data_dir_path) log_files(data_dir_path) x = tf.placeholder(tf.float32, shape=[None, 4, 1000]) y_ = tf.placeholder(tf.uint8, shape=[None, 2]) keep_prob = tf.placeholder(tf.float32) y_conv = CNN(x, dropout_keep_prob=keep_prob) cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv) train_step = tf.train.AdamOptimizer(1e-2).minimize(cross_entropy) y_pred_sig = tf.sigmoid(y_conv) correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y_pred_sig, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) num_runs = 2 num_epochs = general_cfg["num_epochs"] mini_batch_size = general_cfg["batch_size"] iters_per_epoch = int(ds.train.num_examples / mini_batch_size) for var in tf.trainable_variables(): tf.summary.histogram(var.name, var) merged_summary_op = tf.summary.merge_all() with tf.Session() as sess: for run_idx in range(num_runs): sess.run(tf.global_variables_initializer()) summary_writer = tf.summary.FileWriter("/cs/grad/pazbu/paz/dev/projects/dnanet-v2/summaries", graph=sess.graph) for epoch_idx in range(num_epochs): for iter_idx in range(iters_per_epoch): batch = ds.train.next_batch(mini_batch_size) if iter_idx % 100 == 0: train_accuracy = accuracy.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0}) valid_accuracy = accuracy.eval(feed_dict={x: ds.validation.seqs, y_: ds.validation.labels, keep_prob: 1.0}) print('run: %d, epoch: %d, iteration: %d, train accuracy: %g, validation accuracy: %g' % (run_idx, epoch_idx, iter_idx, train_accuracy, valid_accuracy)) _, summary_str = sess.run([train_step, merged_summary_op], feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5}) # summary_str = sess.run(merged_summary_op, feed_dict={x: ds.validation.seqs, # y_: ds.validation.labels, # keep_prob: 1.0}) summary_writer.add_summary(summary_str, iters_per_epoch*epoch_idx + iter_idx) print('test accuracy %g' % accuracy.eval(feed_dict={x: ds.test.seqs, y_: ds.test.labels, keep_prob: 1.0}))
def run_experiment(general_cfg, dropout_keep_prob, seed): tf.set_random_seed(seed) np.random.seed(seed) ds = EnhancersData(data_dir) log_files(data_dir) x = tf.placeholder(tf.float32, shape=[None, 4, general_cfg["seq_length"]], name="x") y_ = tf.placeholder(tf.float32, shape=[None, general_cfg["num_outs"]], name="y_") keep_prob = tf.placeholder(tf.float32, name="keep_prob") y_conv = CNN(x, dropout_keep_prob=keep_prob) cross_entropy = tf.losses.sigmoid_cross_entropy(multi_class_labels=y_, logits=y_conv) train_step = tf.train.AdamOptimizer().minimize(cross_entropy) y_pred_sig = tf.sigmoid(y_conv, name="sigmoid_out") correct_prediction = tf.equal(y_, tf.round(y_pred_sig)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) num_runs = general_cfg["num_runs"] num_epochs = general_cfg["num_epochs"] mini_batch_size = general_cfg["batch_size"] for var in tf.trainable_variables(): tf.summary.histogram(var.name, var) merged_summary_op = tf.summary.merge_all() saver = tf.train.Saver() best_val_acc = 0 ex_id = ex.current_run._id class_weights = np.sum(ds.test.labels, axis=0) / np.sum(ds.test.labels) with tf.Session() as sess: for run_idx in range(num_runs): chkp_dir = path.join(ex_log_dir, str(ex_id), "run" + str(run_idx)) ds.reset() sess.run(tf.global_variables_initializer()) summary_writer = tf.summary.FileWriter("/cs/grad/pazbu/paz/dev/projects/dnanet-roadmap/summaries", graph=sess.graph) current_step = 0 while ds.train.epochs_completed < num_epochs: current_step += 1 batch = ds.train.next_batch(mini_batch_size) summary_str,_ , l = sess.run([merged_summary_op, train_step, cross_entropy], feed_dict={x: batch[0], y_: batch[1], keep_prob: dropout_keep_prob}) l_sum = np.sum(l) # skip to next epoch if loss is very small: if l_sum < 0.001: print('loss < 0.001. skipping to next epoch...') curr_epoch = ds.train.epochs_completed while curr_epoch == ds.train.epochs_completed: ds.train.next_batch(mini_batch_size) current_step += 1 continue # summary_writer.add_summary(summary_str, current_step) if current_step % 200 == 0: train_accuracy = accuracy.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0}) y_vals = batched_eval(y_pred_sig, ds.validation.single_pass_batch_iter(1000), x, y_, keep_prob) valid_accuracy = accuracy.eval(feed_dict={y_: ds.validation.labels, y_pred_sig: y_vals}) if best_val_acc < valid_accuracy: saver.save(sess, save_path=path.join(chkp_dir, str(valid_accuracy)), global_step=current_step) best_val_acc = valid_accuracy ex.info["best_validation_accuracy"] = best_val_acc print('run: %d, epoch: %d, iteration: %d, train accuracy: %g, validation accuracy: %g, loss: %g' % (run_idx, ds.train.epochs_completed, current_step, train_accuracy, valid_accuracy, l_sum)) y_pred = batched_eval(y_pred_sig, ds.test.single_pass_batch_iter(1000), x, y_, keep_prob) test_accuracy = accuracy.eval(feed_dict={y_: ds.test.labels, y_pred_sig: y_pred}) print('test accuracy: ', test_accuracy) auc_rocs = calc_auc_rocs(ds.test.labels, y_pred) for i, auc_roc in enumerate(auc_rocs): print('roc-auc ' + str(i) + ":", auc_roc) auc_rocs_weighted_sum = np.sum(np.multiply(auc_rocs, class_weights)) print('roc-auc weighted sum:', auc_rocs_weighted_sum) ex.info["final_auc_rocs_run" + str(run_idx)] = auc_rocs
import tensorflow as tf import numpy as np from enhancersdata import EnhancersData data_dir_path = '/cs/grad/pazbu/paz/dev/projects/dnanet-v2/data' ds = EnhancersData(data_dir_path) with tf.Session() as sess: #First let's load meta graph and restore weights saver = tf.train.import_meta_graph('chk-0.936961433878-120900.meta') saver.restore(sess, tf.train.latest_checkpoint('./')) x = tf.placeholder(tf.float32, shape=[None, 4, 1000]) y_ = tf.placeholder(tf.uint8, shape=[None, 2]) keep_prob = tf.placeholder(tf.float32) graph = tf.get_default_graph() y_pred_sig = graph.get_tensor_by_name("sigmoid_out:0") correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y_pred_sig, 1)) # accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) all_outcomes = [] batch_iter = ds.test.single_pass_batch_iter(100) num_batches = np.ceil(ds.test.num_examples / 100) c = 0 for x_batch, y_batch in batch_iter: out = correct_prediction.eval(feed_dict={ "x:0": x_batch, y_: y_batch, "keep_prob:0": 1.0