def testOneThreadDynamicPad(self): with self.test_session() as sess: batch_size = 10 num_batches = 3 zero64 = tf.constant(0, dtype=tf.int64) examples = tf.Variable(zero64) counter = examples.count_up_to(num_batches * batch_size) string = tf.tile(["string"], tf.to_int32(tf.pack([counter]))) tf.initialize_all_variables().run() tf.initialize_local_variables().run() batched = tf.train.batch( [counter, string], batch_size=batch_size, dynamic_pad=True) threads = tf.train.start_queue_runners() for i in range(num_batches): results = sess.run(batched) expected_results = np.arange(i * batch_size, (i + 1) * batch_size) max_len = expected_results[-1] self.assertAllEqual(results[0], expected_results) expected_strings = [ [b"string"] * rep + [b""] * (max_len - rep) for rep in expected_results] self.assertAllEqual(results[1], expected_strings) # Reached the limit. with self.assertRaises(tf.errors.OutOfRangeError): sess.run(batched) for thread in threads: thread.join()
def _testRemoveSqueezableDimensions( self, predictions_have_static_shape, predictions_have_extra_dim, labels_have_static_shape, labels_have_extra_dim ): assert not (predictions_have_extra_dim and labels_have_extra_dim) predictions_value = (0, 1, 1, 0, 0, 1, 0) labels_value = (0, 0, 1, 1, 0, 0, 0) input_predictions_value = [[p] for p in predictions_value] if predictions_have_extra_dim else predictions_value input_labels_value = [[l] for l in labels_value] if labels_have_extra_dim else labels_value with tf.Graph().as_default() as g: feed_dict = {} if predictions_have_static_shape: predictions = tf.constant(input_predictions_value, dtype=tf.int32) else: predictions = tf.placeholder(dtype=tf.int32, name="predictions") feed_dict[predictions] = input_predictions_value if labels_have_static_shape: labels = tf.constant(input_labels_value, dtype=tf.int32) else: labels = tf.placeholder(dtype=tf.int32, name="labels") feed_dict[labels] = input_labels_value squeezed_predictions, squeezed_labels = tf.contrib.framework.remove_squeezable_dimensions( predictions, labels ) with self.test_session(g): tf.initialize_local_variables().run() self.assertAllClose(predictions_value, squeezed_predictions.eval(feed_dict=feed_dict)) self.assertAllClose(labels_value, squeezed_labels.eval(feed_dict=feed_dict))
def testNoLimit(self): with self.test_session(): seven = tf.constant(7) seven_forever = tf.train.limit_epochs(seven) tf.initialize_local_variables().run() for _ in range(100): self.assertEqual(7, seven_forever.eval())
def __init__(self, model_def_file, class_labels_file): logging.info('Loading net and associated files...') with tf.Graph().as_default(), tf.device('cpu:0'): self.sess = tf.Session() self.image_buffer = tf.placeholder(tf.string) image = tf.image.decode_jpeg(self.image_buffer, channels=3) image = tf.image.convert_image_dtype(image, dtype=tf.float32) image = self.eval_image(image, 299, 299) image = tf.sub(image, 0.5) image = tf.mul(image, 2.0) images = tf.expand_dims(image, 0) # Run inference. logits, predictions = inception_model.inference( images, NUM_CLASSES + 1) # Transform output to topK result. self.values, self.indices = tf.nn.top_k( predictions, NUM_TOP_CLASSES) variable_averages = tf.train.ExponentialMovingAverage( inception_model.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() tf.initialize_all_variables().run(session=self.sess) tf.initialize_local_variables().run(session=self.sess) saver = tf.train.Saver(variables_to_restore) saver.restore(self.sess, model_def_file) # Required to get the filename matching to run. self.label_names = ['none'] with open(class_labels_file) as f: for line in f.read().decode("utf-8").splitlines(): self.label_names.append(line)
def read_data_int64(input_fname): import pdb with tictoc(): input_fname_queue = tf.train.string_input_producer([input_fname], num_epochs=1) reader = tf.TFRecordReader() _, serialized_example = reader.read(input_fname_queue) features = {'bit_features' : tf.VarLenFeature(tf.int64)} parsed_example = tf.parse_single_example(serialized_example, features) bit_features = parsed_example['bit_features'] bit_features = tf.sparse_tensor_to_dense(bit_features) bit_features = tf.reshape(bit_features, [-1, 62]) with tf.Session() as sess: tf.initialize_all_variables().run() tf.initialize_local_variables().run() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: i = 0 while not coord.should_stop(): x = bit_features.eval() if i % 10000 == 0: print("substance {}".format(i)) i += 1 except tf.errors.OutOfRangeError: pass finally: coord.request_stop() coord.join(threads)
def cnn_train(config, data_len, embed, pf_r1, pf_r2): config.data_len = data_len tf.reset_default_graph() with tf.Session() as session: # build model with tf.variable_scope("cnn_ch", reuse=None): m_train = ch_model(config) with tf.variable_scope("cnn_ch", reuse=True): m_valid = ch_model(config) doc_datas, pf_r1s, pf_r2s, labels = read_batch(config.csv_file, config, True) doc_datas_v, pf_r1s_V, pf_r2s_v, labels_v = read_batch(config.csv_file, config, False) for item in tf.all_variables(): print "var: ", item for item in tf.local_variables(): print "local:", item loss, _ = m_train.inference(doc_datas, pf_r1s, pf_r2s, labels) loss_v, acc_v = m_valid.inference(doc_datas_v, pf_r1s_V, pf_r2s_v, labels_v) train_op = m_train.train(loss) tf.initialize_all_variables().run() tf.initialize_local_variables().run() m_train.assign_word_embed(session, embed) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord, sess=session) epoch = 0 step = 0 min_cost = sys.maxint try: while not coord.should_stop(): _, f_l = session.run([train_op, loss]) step += 1 if step == config.data_len // config.batch_size: cost = 0.0 acc = 0.0 for i in range(step): v_l, acc_l = session.run([loss_v, acc_v]) cost += v_l acc += acc_l cost /= step acc /= step if cost < min_cost: min_cost = cost print "save model as cost:", cost m_train.saver.save(session, config.model_path) print "epoch: ", epoch, "loss: ", cost, "acc: ", acc, "step:", step step = 0 epoch += 1 except tf.errors.OutOfRangeError: print("Done training") finally: coord.request_stop() coord.join(threads)
def main(_): if FLAGS.train_data: num_labels, num_features, train_data, train_labels = extract_data(FLAGS.train_data, feature_limit=FEATURE_LIMIT) else: num_labels, num_features = 2, FEATURE_LIMIT train_data, train_labels = [], [] print "labels", num_labels, "features", num_features if FLAGS.test_data: _, _, test_data, test_labels = extract_data(FLAGS.test_data, feature_limit=FEATURE_LIMIT) else: test_data, test_labels = [], [] train_size = len(train_data) model = LinearModel(num_features, num_labels, FLAGS.learning_rate) # Create local session to train and test with tf.Session(graph=model.graph) as s: ckpt = tf.train.get_checkpoint_state(FLAGS.models) if ckpt and ckpt.model_checkpoint_path: model.saver.restore(s, ckpt.model_checkpoint_path) print "Model loaded from", ckpt.model_checkpoint_path else: model.init.run() print "Initialized" if test_data: print 'testing' correct = 0 total = 0 tf.initialize_local_variables() for i in range(len(test_data) // BATCH_SIZE): offset = i * BATCH_SIZE batch_data = transform(test_data[offset:(offset + BATCH_SIZE)], num_features) batch_labels = test_labels[offset:(offset + BATCH_SIZE)] c = s.run( [model.correct_sum], feed_dict={model.x: batch_data, model.y_: batch_labels}) correct += c[0] total += BATCH_SIZE print correct, total, "accuracy:", float(correct) / total return # Iterate and train. average_loss = 0 for step in xrange(FLAGS.train_steps * len(train_data) // BATCH_SIZE): offset = (step * BATCH_SIZE) % train_size batch_data = transform(train_data[offset: (offset + BATCH_SIZE)], num_features) batch_labels = train_labels[offset: (offset + BATCH_SIZE)] _, loss_val = s.run([model.optimizer, model.cross_entropy], feed_dict={model.x: batch_data, model.y_: batch_labels}) average_loss += loss_val if step > 0 and step % K == 0: print "Average loss at step: ", model.global_step.eval(), " loss: ", average_loss / K average_loss = 0 checkpoint_path = os.path.join(FLAGS.models, "pe.ckpt") model.saver.save(s, checkpoint_path, global_step=model.global_step)
def test_empty_labels_and_scores_gives_nan_auc(self): with self.test_session(): labels = tf.constant([], shape=[0], dtype=tf.bool) scores = tf.constant([], shape=[0], dtype=tf.float32) score_range = [0, 1.] auc, update_op = tf.contrib.metrics.auc_using_histogram(labels, scores, score_range) tf.initialize_local_variables().run() update_op.run() self.assertTrue(np.isnan(auc.eval()))
def testLimit(self): with self.test_session(): love_me = tf.constant("Love Me") love_me_two_times = tf.train.limit_epochs(love_me, num_epochs=2) tf.initialize_all_variables().run() tf.initialize_local_variables().run() self.assertEqual(b"Love Me", love_me_two_times.eval()) self.assertEqual(b"Love Me", love_me_two_times.eval()) with self.assertRaises(tf.errors.OutOfRangeError): love_me_two_times.eval()
def _check_auc(self, nbins=100, desired_auc=0.75, score_range=None, num_records=50, frac_true=0.5, atol=0.05, num_updates=10): """Check auc accuracy against synthetic data. Args: nbins: nbins arg from contrib.metrics.auc_using_histogram. desired_auc: Number in [0, 1]. The desired auc for synthetic data. score_range: 2-tuple, (low, high), giving the range of the resultant scores. Defaults to [0, 1.]. num_records: Positive integer. The number of records to return. frac_true: Number in (0, 1). Expected fraction of resultant labels that will be True. This is just in expectation...more or less may actually be True. atol: Absolute tolerance for final AUC estimate. num_updates: Update internal histograms this many times, each with a new batch of synthetic data, before computing final AUC. Raises: AssertionError: If resultant AUC is not within atol of theoretical AUC from synthetic data. """ score_range = [0, 1.] or score_range with self.test_session(): labels = tf.placeholder(tf.bool, shape=[num_records]) scores = tf.placeholder(tf.float32, shape=[num_records]) auc, update_op = tf.contrib.metrics.auc_using_histogram(labels, scores, score_range, nbins=nbins) tf.initialize_local_variables().run() # Updates, then extract auc. for _ in range(num_updates): labels_a, scores_a = synthetic_data(desired_auc, score_range, num_records, self.rng, frac_true) update_op.run(feed_dict={labels: labels_a, scores: scores_a}) labels_a, scores_a = synthetic_data(desired_auc, score_range, num_records, self.rng, frac_true) # Fetch current auc, and verify that fetching again doesn't change it. auc_eval = auc.eval() self.assertAlmostEqual(auc_eval, auc.eval(), places=5) msg = ('nbins: %s, desired_auc: %s, score_range: %s, ' 'num_records: %s, frac_true: %s, num_updates: %s') % (nbins, desired_auc, score_range, num_records, frac_true, num_updates) np.testing.assert_allclose(desired_auc, auc_eval, atol=atol, err_msg=msg)
def run_eval(dataset, hps, logdir, mode, num_eval_steps): with tf.variable_scope("model"): hps.num_sampled = 0 hps.keep_prob = 1.0 model = LM(hps, "eval", "/cpu:0") if hps.average_params: print("Averaging parameters for evaluation.") saver = tf.train.Saver(model.avg_dict) else: saver = tf.train.Saver() # Use only 4 threads for the evaluation config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=20, inter_op_parallelism_threads=1) sess = tf.Session(config=config) sw = tf.train.SummaryWriter(logdir + '/' + mode, sess.graph) ckpt_loader = CheckpointLoader(saver, model.global_step, logdir + "/train") with sess.as_default(): while ckpt_loader.load_checkpoint(): global_step = ckpt_loader.last_global_step data_iterator = dataset.iterate_once(hps.batch_size * hps.num_gpus, hps.num_steps) tf.initialize_local_variables().run() loss_nom = 0.0 loss_den = 0.0 for i, (x, y, w) in enumerate(data_iterator): if i >= num_eval_steps: break loss = sess.run(model.loss, {model.x: x, model.y: y, model.w: w}) loss_nom += loss loss_den += w.mean() loss = loss_nom / loss_den sys.stdout.write("%d: %.3f (%.3f) ... " % (i, loss, np.exp(loss))) sys.stdout.flush() sys.stdout.write("\n") log_perplexity = loss_nom / loss_den print("Results at %d: log_preplexity = %.3f perplexity = %.3f" % ( global_step, log_perplexity, np.exp(log_perplexity))) summary = tf.Summary() summary.value.add(tag='eval/log_perplexity', simple_value=log_perplexity) summary.value.add(tag='eval/perplexity', simple_value=np.exp(log_perplexity)) sw.add_summary(summary, global_step) sw.flush()
def test_input_fname_producer(input_fname): import pdb pdb.set_trace() with tf.Session() as sess: queue = tf.train.string_input_producer( [input_fname], num_epochs=None, shuffle=False) dequeue = queue.dequeue() tf.initialize_all_variables().run() tf.initialize_local_variables().run() threads = tf.train.start_queue_runners() output = dequeue.eval() for thread in threads: thread.join()
def _testTwoThreadsHelper(self, use_dict): with self.test_session() as sess: batch_size = 10 num_batches = 3 zero64 = tf.constant(0, dtype=tf.int64) examples = tf.Variable(zero64) counter = examples.count_up_to(num_batches * batch_size) sparse_counter = tf.SparseTensor( indices=tf.reshape(zero64, [1, 1]), values=tf.pack([tf.cast(counter, tf.float32)]), shape=[1]) if use_dict: batched = tf.train.shuffle_batch( {"c": counter, "s": sparse_counter, "S": "string"}, batch_size=batch_size, capacity=32, min_after_dequeue=16, seed=141421) batched_fetch = [batched["c"], batched["s"], batched["S"]] else: batched = tf.train.shuffle_batch( [counter, sparse_counter, "string"], batch_size=batch_size, capacity=32, min_after_dequeue=16, seed=141421) batched_fetch = batched tf.initialize_all_variables().run() tf.initialize_local_variables().run() threads = tf.train.start_queue_runners() all_counts = [] for i in range(num_batches): results = sess.run(batched_fetch) self.assertEqual(len(results[0]), batch_size) all_counts.extend(results[0]) self.assertAllEqual( results[1].indices, np.vstack((np.arange(batch_size), np.zeros(batch_size))).T) self.assertAllEqual(results[0], results[1].values) self.assertAllEqual(results[1].shape, [batch_size, 1]) self.assertAllEqual(results[2], [b"string"] * batch_size) # Results scrambled, but include all the expected numbers. deltas = [all_counts[i + 1] - all_counts[i] for i in range(len(all_counts) - 1)] self.assertFalse(all(d == deltas[0] for d in deltas)) self.assertItemsEqual(all_counts, range(num_batches * batch_size)) # Reached the limit. with self.assertRaises(tf.errors.OutOfRangeError): sess.run(batched_fetch) for thread in threads: thread.join()
def testFinalOpsIsEvaluated(self): _, update_op = slim.metrics.streaming_accuracy(self._predictions, self._labels) init_op = tf.group(tf.initialize_all_variables(), tf.initialize_local_variables()) with self.test_session() as sess: accuracy_value = slim.evaluation.evaluation(sess, init_op=init_op, final_op=update_op) self.assertAlmostEqual(accuracy_value, self._expected_accuracy)
def testMultipleUpdatesWithWeightedValues(self): with self.test_session() as sess: # Create the queue that populates the predictions. preds_queue = tf.FIFOQueue(4, dtypes=tf.float32, shapes=(1, 1)) _enqueue_vector(sess, preds_queue, [0]) _enqueue_vector(sess, preds_queue, [1]) _enqueue_vector(sess, preds_queue, [2]) _enqueue_vector(sess, preds_queue, [1]) predictions = preds_queue.dequeue() # Create the queue that populates the labels. labels_queue = tf.FIFOQueue(4, dtypes=tf.float32, shapes=(1, 1)) _enqueue_vector(sess, labels_queue, [0]) _enqueue_vector(sess, labels_queue, [1]) _enqueue_vector(sess, labels_queue, [1]) _enqueue_vector(sess, labels_queue, [2]) labels = labels_queue.dequeue() # Create the queue that populates the missing labels. weights_queue = tf.FIFOQueue(4, dtypes=tf.int64, shapes=(1, 1)) _enqueue_vector(sess, weights_queue, [1]) _enqueue_vector(sess, weights_queue, [1]) _enqueue_vector(sess, weights_queue, [0]) _enqueue_vector(sess, weights_queue, [0]) weights = weights_queue.dequeue() accuracy, update_op = tf.contrib.metrics.streaming_accuracy( predictions, labels, weights) sess.run(tf.initialize_local_variables()) for _ in range(4): sess.run(update_op) self.assertEqual(1.0, accuracy.eval())
def testNullString(self): # Runtime check for empty string list. This is slightly oblique: # The queue runner should die with an assertion error on the null # input tensor, causing the dequeue to fail with an OutOfRangeError. with self.test_session(): coord = tf.train.Coordinator() queue = tf.train.string_input_producer(tf.constant([], dtype=tf.string)) dequeue = queue.dequeue() tf.initialize_all_variables().run() tf.initialize_local_variables().run() threads = tf.train.start_queue_runners(coord=coord) with self.assertRaises(tf.errors.OutOfRangeError): dequeue.eval() coord.request_stop() for thread in threads: thread.join()
def test_batch_text_lines(self): gfile.Glob = self._orig_glob filename = self._create_temp_file("A\nB\nC\nD\nE\n") batch_size = 3 queue_capacity = 10 name = "my_batch" with tf.Graph().as_default() as g, self.test_session(graph=g) as session: inputs = tf.contrib.learn.io.read_batch_examples( [filename], batch_size, reader=tf.TextLineReader, randomize_input=False, num_epochs=1, queue_capacity=queue_capacity, read_batch_size=10, name=name) session.run(tf.initialize_local_variables()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(session, coord=coord) self.assertAllEqual(session.run(inputs), [b"A", b"B", b"C"]) self.assertAllEqual(session.run(inputs), [b"D", b"E"]) with self.assertRaises(errors.OutOfRangeError): session.run(inputs) coord.request_stop() coord.join(threads)
def initialize_session(sess): tf.train.start_queue_runners(sess=sess) enqueue_thread = threading.Thread(target=fill_fifo_queue, args=(sess,)) enqueue_thread.start() sess.run(tf.initialize_all_variables()) sess.run(tf.initialize_local_variables()) return enqueue_thread
def compute_accuracy(x, l, mask): """Compute model accuracy.""" preds = ch_model.get_probs(x) preds = tf.squeeze(preds) preds = tf.argmax(preds, -1, output_type=l.dtype) _, acc_update_op = tf.metrics.accuracy(l, preds, weights=mask) if FLAGS.surrogate_attack: preds = sur_ch_model.get_probs(x) preds = tf.squeeze(preds) preds = tf.argmax(preds, -1, output_type=l.dtype) acc_update_op = tf.tuple((acc_update_op, tf.metrics.accuracy(l, preds, weights=mask)[1])) sess.run(tf.initialize_local_variables()) for i in range(FLAGS.eval_steps): tf.logging.info( "\tEvaluating batch [%d / %d]" % (i + 1, FLAGS.eval_steps)) acc = sess.run(acc_update_op) if FLAGS.surrogate_attack: tf.logging.info("\tFinal acc: (%.4f, %.4f)" % (acc[0], acc[1])) else: tf.logging.info("\tFinal acc: %.4f" % acc) return acc
def testEvaluationLoopTimeout(self): _, update_op = slim.metrics.streaming_accuracy( self._predictions, self._labels) init_op = tf.group(tf.initialize_all_variables(), tf.initialize_local_variables()) # Create checkpoint and log directories. chkpt_dir = os.path.join(self.get_temp_dir(), 'tmp_logs/') gfile.MakeDirs(chkpt_dir) logdir = os.path.join(self.get_temp_dir(), 'tmp_logs2/') gfile.MakeDirs(logdir) # Save initialized variables to checkpoint directory. saver = tf.train.Saver() with self.test_session() as sess: init_op.run() saver.save(sess, os.path.join(chkpt_dir, 'chkpt')) # Run the evaluation loop with a timeout. with self.test_session() as sess: start = time.time() slim.evaluation.evaluation_loop( '', chkpt_dir, logdir, eval_op=update_op, eval_interval_secs=2.0, timeout=6.0) end = time.time() # Check we've waited for the timeout. self.assertGreater(end - start, 6.0) # Then the timeout kicked in and stops the loop. self.assertLess(end - start, 7.5)
def testMultipleMetricsOnMultipleBatchesOfSizeOne(self): with self.test_session() as sess: # Create the queue that populates the predictions. preds_queue = tf.FIFOQueue(2, dtypes=tf.float32, shapes=(1, 3)) _enqueue_vector(sess, preds_queue, [10, 8, 6]) _enqueue_vector(sess, preds_queue, [-4, 3, -1]) predictions = preds_queue.dequeue() # Create the queue that populates the labels. labels_queue = tf.FIFOQueue(2, dtypes=tf.float32, shapes=(1, 3)) _enqueue_vector(sess, labels_queue, [1, 3, 2]) _enqueue_vector(sess, labels_queue, [2, 4, 6]) labels = labels_queue.dequeue() mae, ma_update_op = tf.contrib.metrics.streaming_mean_absolute_error( predictions, labels) mse, ms_update_op = tf.contrib.metrics.streaming_mean_squared_error( predictions, labels) sess.run(tf.initialize_local_variables()) sess.run([ma_update_op, ms_update_op]) sess.run([ma_update_op, ms_update_op]) self.assertAlmostEqual(32 / 6.0, mae.eval(), 5) self.assertAlmostEqual(208 / 6.0, mse.eval(), 5)
def test_read_csv(self): gfile.Glob = self._orig_glob tempdir = tempfile.mkdtemp() filename = os.path.join(tempdir, "file.csv") gfile.Open(filename, "w").write("ABC\nDEF\nGHK\n") batch_size = 1 queue_capacity = 5 name = "my_batch" with tf.Graph().as_default() as g, self.test_session(graph=g) as session: inputs = tf.contrib.learn.io.read_batch_examples( filename, batch_size, reader=tf.TextLineReader, randomize_input=False, num_epochs=1, queue_capacity=queue_capacity, name=name) session.run(tf.initialize_local_variables()) coord = tf.train.Coordinator() tf.train.start_queue_runners(session, coord=coord) self.assertAllEqual(session.run(inputs), [b"ABC"]) self.assertAllEqual(session.run(inputs), [b"DEF"]) self.assertAllEqual(session.run(inputs), [b"GHK"]) with self.assertRaises(errors.OutOfRangeError): session.run(inputs) coord.request_stop()
def testSummariesAreFlushedToDiskWithoutGlobalStep(self): output_dir = os.path.join(self.get_temp_dir(), 'flush_test_no_global_step') if tf.gfile.Exists(output_dir): # For running on jenkins. tf.gfile.DeleteRecursively(output_dir) names_to_metrics, names_to_updates = self._create_names_to_metrics( self._predictions, self._labels) for k in names_to_metrics: v = names_to_metrics[k] tf.scalar_summary(k, v) summary_writer = tf.train.SummaryWriter(output_dir) initial_op = tf.group(tf.initialize_all_variables(), tf.initialize_local_variables()) eval_op = tf.group(*names_to_updates.values()) with self.test_session() as sess: slim.evaluation.evaluation( sess, initial_op=initial_op, eval_op=eval_op, summary_op=tf.merge_all_summaries(), summary_writer=summary_writer) names_to_values = {name: names_to_metrics[name].eval() for name in names_to_metrics} self._verify_summaries(output_dir, names_to_values)
def main(): #tf.reset_default_graph() sess = tf.Session() batch_size = 50 num_epochs = 5000 tf_records_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../tf_records') filenames = [os.path.join(tf_records_folder, 'billboard', 'train.tfrecords.proto')] filename_queue = tf.train.string_input_producer(filenames, num_epochs=num_epochs, shuffle=True) track_id_batch, length_batch, features_batch, labels_batch = batches_from_queue( filename_queue, batch_size) model = BasicLSTMModel(length_batch, features_batch, labels_batch) sess.run(tf.initialize_all_variables()) sess.run(tf.initialize_local_variables()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: while True: _, accuracy = sess.run([model.train, model.accuracy]) print 'train accuracy: %.2f%%' % (accuracy * 100) except tf.errors.OutOfRangeError, e: coord.request_stop(e)
def _assert_metrics( test_case, expected_loss, expected_eval_metrics, model_fn_ops): test_case.assertAlmostEqual(expected_loss, model_fn_ops.loss.eval(), places=4) for k in six.iterkeys(expected_eval_metrics): test_case.assertIn(k, six.iterkeys(model_fn_ops.eval_metric_ops)) tf.initialize_local_variables().run() for key, expected_value in six.iteritems(expected_eval_metrics): value_tensor, update_tensor = model_fn_ops.eval_metric_ops[key] update = update_tensor.eval() test_case.assertAlmostEqual( expected_value, update, places=4, msg="%s: update, expected %s, got %s." % (key, expected_value, update)) value = value_tensor.eval() test_case.assertAlmostEqual( expected_value, value, places=4, msg="%s: value, expected %s, got %s." % (key, expected_value, value))
def initialize_session(sess, task_params): if task_params['verbose']: print("Initalizing tensorflow session ...") saver = tf.train.Saver() if task_params['restore_from_checkpoint']: saver.restore( sess=sess, save_path=task_params['save_path']) if task_params['verbose']: print("Restoring variables from '{}'".format(task_params['save_path'])) else: sess.run(tf.initialize_all_variables()) sess.run(tf.initialize_local_variables()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) logdir=task_params['summaries_dir'] + '/train_' + time.strftime("%Y%m%d_%H-%M-%S") train_writer = tf.train.SummaryWriter(logdir=logdir, graph=sess.graph) summaries = tf.merge_all_summaries() return coord, threads, saver, train_writer, summaries
def test_keyed_read_text_lines(self): gfile.Glob = self._orig_glob filename = self._create_temp_file("ABC\nDEF\nGHK\n") batch_size = 1 queue_capacity = 5 name = "my_batch" with tf.Graph().as_default() as g, self.test_session(graph=g) as session: keys, inputs = tf.contrib.learn.io.read_keyed_batch_examples( filename, batch_size, reader=tf.TextLineReader, randomize_input=False, num_epochs=1, queue_capacity=queue_capacity, name=name) session.run(tf.initialize_local_variables()) coord = tf.train.Coordinator() tf.train.start_queue_runners(session, coord=coord) self.assertAllEqual(session.run([keys, inputs]), [[filename.encode("utf-8") + b":1"], [b"ABC"]]) self.assertAllEqual(session.run([keys, inputs]), [[filename.encode("utf-8") + b":2"], [b"DEF"]]) self.assertAllEqual(session.run([keys, inputs]), [[filename.encode("utf-8") + b":3"], [b"GHK"]]) with self.assertRaises(errors.OutOfRangeError): session.run(inputs) coord.request_stop()
def test_multiple_workers_with_shared_queue(self): gfile.Glob = self._orig_glob filenames = self._create_sorted_temp_files([ "ABC\n", "DEF\n", "GHI\n", "JKL\n", "MNO\n", "PQR\n", "STU\n", "VWX\n", "YZ\n" ]) batch_size = 1 queue_capacity = 5 name = "my_batch" shared_file_name_queue_name = "%s/file_name_queue" % name example_queue_name = "%s/fifo_queue" % name worker_file_name_queue_name = "%s/file_name_queue/fifo_queue" % name server = tf.train.Server.create_local_server() with tf.Graph().as_default() as g1, tf.Session( server.target, graph=g1) as session: _, inputs = _read_keyed_batch_examples_shared_queue( filenames, batch_size, reader=tf.TextLineReader, randomize_input=False, num_epochs=1, queue_capacity=queue_capacity, name=name) session.run(tf.initialize_local_variables()) # Run the three queues once manually. self._run_queue(shared_file_name_queue_name, session) self._run_queue(worker_file_name_queue_name, session) self._run_queue(example_queue_name, session) self.assertAllEqual(session.run(inputs), [b"ABC"]) # Run the worker and the example queue. self._run_queue(worker_file_name_queue_name, session) self._run_queue(example_queue_name, session) self.assertAllEqual(session.run(inputs), [b"DEF"]) with tf.Graph().as_default() as g2, tf.Session( server.target, graph=g2) as session: _, inputs = _read_keyed_batch_examples_shared_queue( filenames, batch_size, reader=tf.TextLineReader, randomize_input=False, num_epochs=1, queue_capacity=queue_capacity, name=name) # Run the worker and the example queue. self._run_queue(worker_file_name_queue_name, session) self._run_queue(example_queue_name, session) self.assertAllEqual(session.run(inputs), [b"GHI"]) self.assertTrue(g1 is not g2)
def eval_model(): preds = spec.predictions["predictions"] preds = tf.argmax(preds, -1, output_type=labels.dtype) _, acc_update_op = tf.metrics.accuracy(labels=labels, predictions=preds) sess.run(tf.initialize_local_variables()) for _ in range(FLAGS.eval_steps): acc = sess.run(acc_update_op) return acc
def testZeroTruePositivesAndFalseNegativesGivesZeroRecall(self): predictions = tf.zeros((1, 4)) labels = tf.zeros((1, 4)) recall, update_op = tf.contrib.metrics.streaming_recall(predictions, labels) with self.test_session() as sess: sess.run(tf.initialize_local_variables()) sess.run(update_op) self.assertEqual(0, recall.eval())
def test_multi_gpu(): g = tf.Graph() with g.as_default(), tf.device('/cpu:0'): global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) image_outputs = [] for i in xrange(FLAGS.num_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('tower_%d' % i) as scope: net_in = get_inputs() srnet = SRNet_x4(net_in) net_out = srnet.output image_deblurred = tf.image.encode_jpeg( tf.saturate_cast(tf.squeeze(net_out) + reader.mean_pixel, tf.uint8)) tf.get_variable_scope().reuse_variables() image_outputs.append(image_deblurred) ncreader = tf.train.NewCheckpointReader("training_checkpoints/fast-deblur-model_*-36000") var_to_shape_map = ncreader.get_variable_to_shape_map() ckpt_varnames = var_to_shape_map.keys() new_varnames = [var.name for var in g.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)] print(len(ckpt_varnames)) print(len(new_varnames)) print(type(ckpt_varnames[0])) count = 0 var_dict = {} for name in ckpt_varnames: for var in g.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES): if name in var.name: var_dict[name] = var count += 1 continue; print(count) saver = tf.train.Saver(var_dict) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.per_process_gpu_memory_fraction = 1 with tf.Session(graph=g, config=config) as sess: saver.restore(sess, 'training_checkpoints/fast-deblur-model_*-36000') sess.run(tf.initialize_local_variables()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) start_time = time.time() try: for step in xrange(200): if coord.should_stop(): break outs_ = sess.run(image_outputs) #print(len(outs_)) print(step, os.path.basename(imagePaths[step])) with open('test_bsds/'+ os.path.basename(imagePaths[step]), 'wb') as f: f.write(im) except Exception as e: coord.request_stop(e) print('Done -- epoch limit reached') finally: coord.request_stop() coord.join(threads) #wait for threads to finish elapsed_time = time.time() - start_time print('Cost inference time for {} images: {} secs.'.format(num_images, elapsed_time)) print('{} secs. per image'.format(elapsed_time/num_images))
def main(args): """ main """ if not os.path.exists(args.test_ds): print("{} does not exists".format(args.test_ds)) return 1 # export model.pb from session dir. Skip if model.pb already exists model.export(train.NUM_CLASSES, train.SESSION_DIR, "model-best-0", train.MODEL_PATH) results_dir = "{}/results".format( os.path.dirname(os.path.abspath(__file__))) files = { label: open(results_dir + "/VOC2012/Main/comp3_det_test_{}.txt".format(label), "w") for label in pascal.CLASSES } graph = model.load(train.MODEL_PATH, args.device) with graph.as_default(): # (?, n, n, NUM_CLASSES) tensor logits = graph.get_tensor_by_name(model.OUTPUT_TENSOR_NAME + ":0") images_ = graph.get_tensor_by_name(model.INPUT_TENSOR_NAME + ":0") # each cell in coords (batch_position, i, j) -> is a probability vector per_region_probabilities = tf.nn.softmax( tf.reshape(logits, [-1, train.NUM_CLASSES])) # [tested positions, train.NUM_CLASSES] # array[0]=values, [1]=indices # get every probabiliy, because we can use localization to do classification top_k = tf.nn.top_k(per_region_probabilities, k=train.NUM_CLASSES) # each with shape [tested_positions, k] k = 2 input_side = model.INPUT_SIDE + model.DOWNSAMPLING_FACTOR * model.LAST_CONV_INPUT_STRIDE * k test_queue, test_filename_queue = pascal.test( args.test_ds, 29, input_side, args.test_ds + "/ImageSets/Main/test.txt") init_op = tf.group(tf.global_variables_initializer(), tf.initialize_local_variables()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: sess.run(init_op) coordinator = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coordinator) try: processed = 0 while not coordinator.should_stop(): image_batch, filename_batch = sess.run( [test_queue, test_filename_queue]) probability_map, top_values, top_indices = sess.run( [logits, top_k[0], top_k[1]], feed_dict={images_: image_batch}) # let's think to the net as a big net, with the last layer (before the FC # layers for classification) with a receptive field of # LAST_KERNEL_SIDE x LAST_KERNEL_SIDE. Lets approximate the net with this last kernel: # If the image is scaled down to LAST_KERNEL_SIDE x LAST_KERNEL_SIDE # the output is a single point. # if the image is scaled down to something bigger # (that make the output side of contolution integer) the result is a spacial map # of points. Every point has a depth of num classes. # for every image in the input batch probability_coords = 0 for batch_elem_id in range(len(image_batch)): # scaling factor between original image and resized image decoded_filename = filename_batch[ batch_elem_id].decode("utf-8") image = sess.run( image_processing.read_image_jpg(args.test_ds + "/JPEGImages/" + decoded_filename + ".jpg")) full_image_scaling_factors = np.array([ image.shape[1] / input_side, image.shape[0] / input_side ]) glance = defaultdict(list) group = defaultdict(lambda: defaultdict(float)) for pmap_y in range(probability_map.shape[1]): # calculate position in the downsampled image ds ds_y = pmap_y * model.LAST_CONV_OUTPUT_STRIDE for pmap_x in range(probability_map.shape[2]): ds_x = pmap_x * model.LAST_CONV_OUTPUT_STRIDE if top_indices[probability_coords][ 0] != pascal.BACKGROUND_CLASS_ID: # create coordinates of rect in the downsampled image # convert to numpy array in order to use broadcast ops coord = [ ds_x, ds_y, ds_x + model.LAST_KERNEL_SIDE, ds_y + model.LAST_KERNEL_SIDE ] # if something is found, append rectagle to the # map of rectalges per class rect = utils.upsample_and_shift( coord, model.DOWNSAMPLING_FACTOR, [0, 0], full_image_scaling_factors) prob = top_values[probability_coords][0] label = pascal.CLASSES[ top_indices[probability_coords][0]] rect_prob = [rect, prob] glance[label].append(rect_prob) group[label]["count"] += 1 group[label]["prob"] += prob # update probability coord value probability_coords += 1 classes = group.keys() print('Found {} classes: {}'.format( len(classes), classes)) # find out the minimum amount of intersection among regions # in the original image, that can be used to trigger a match # or 2, is s square. 0 dim is batch map_side = probability_map.shape[1] map_area = map_side**2 min_intersection = map_side # Save the relative frequency for every class # To trigger a match, at least a fraction of intersection should be present for label in group: group[label]["prob"] /= group[label]["count"] group[label][ "rf"] = group[label]["count"] / map_area # merge overlapping rectangles for each class. # return a map of {"label": [rect, prob, count] localize = utils.group_overlapping_regions( glance, eps=RECT_SIMILARITY) detected_labels = set() for label, rect_prob_list in localize.items(): for rect_prob in rect_prob_list: count = rect_prob[2] freq = group[label]["rf"] if count >= min_intersection and freq > 0.1: detected_labels.add(label) confidence = rect_prob[1] rect = rect_prob[0] left = rect[0] top = rect[1] right = rect[2] bottom = rect[3] files[label].write( "{} {} {} {} {} {}\n".format( decoded_filename, confidence, left, top, right, bottom)) processed += 1 except tf.errors.OutOfRangeError: print("[I] Done. Test completed!") print("Processed {} images".format(processed)) finally: coordinator.request_stop() coordinator.join(threads) for label in files: files[label].close()
def test(): """单独的,测试模型效果. """ data_dir = FLAGS.data_dir cv_batch_size = FLAGS.cv_batch_size cv_maxsize_file = path.join(data_dir, FLAGS.cv_maxsize_file) dev_data_config = asr.read_data_config(cv_maxsize_file) dev_data = asr.get_dev_data(dev_data_config, cv_batch_size) dev_examples_num = dev_data_config.example_number dev_num_batches_per_epoch = int(dev_examples_num / cv_batch_size) with tf.variable_scope("inference") as scope: dev_ctc_in, dev_targets, dev_seq_len = asr.rnn(dev_data, dev_data_config, cv_batch_size) dev_decoded, dev_log_prob = tf.nn.ctc_greedy_decoder(dev_ctc_in, dev_seq_len) edit_distance = tf.edit_distance(tf.to_int32(dev_decoded[0]), dev_targets, normalize=False) batch_error_count = tf.reduce_sum(edit_distance, name="batch_error_count") batch_label_count = tf.shape(dev_targets.values)[0] local_init = tf.initialize_local_variables() saver = tf.train.Saver() gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as session: ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir) saver.restore(session, ckpt.model_checkpoint_path) global_step = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) logging.info("从%s载入模型参数, global_step = %d", ckpt.model_checkpoint_path, global_step) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=session, coord=coord) try: dev_error_count = 0 dev_label_count = 0 for batch in range(dev_num_batches_per_epoch): cv_error_count_value, cv_label_count = session.run( [batch_error_count, batch_label_count]) dev_error_count += cv_error_count_value dev_label_count += cv_label_count dev_acc_ratio = (dev_label_count - dev_error_count) / dev_label_count logging.info("eval: eval_acc = %.3f ", dev_acc_ratio) except tf.errors.OutOfRangeError: logging.info("训练完成.") finally: coord.request_stop() coord.join(threads)
def run(config, target='', cluster_spec=None, is_chief=True, job_name=None, task_index=None, get_model_fn=get_model, get_dataset_fn=get_dataset, environment=None): model_class = get_model_fn(config.model.type) image_vis = config.train.get('image_vis') var_vis = config.train.get('var_vis') if config.train.get('seed') is not None: tf.set_random_seed(config.train.seed) log_prefix = '[{}-{}] - '.format(job_name, task_index) \ if job_name is not None and task_index is not None else '' if config.train.debug or config.train.tf_debug: tf.logging.set_verbosity(tf.logging.DEBUG) else: tf.logging.set_verbosity(tf.logging.INFO) model = model_class(config) #print("model construct end !!!!") #pause.seconds(100000) # Placement of ops on devices using replica device setter # which automatically places the parameters on the `ps` server # and the `ops` on the workers # # See: # https://www.tensorflow.org/api_docs/python/tf/train/replica_device_setter with tf.device(tf.train.replica_device_setter(cluster=cluster_spec)): try: config['dataset']['type'] except KeyError: raise KeyError('dataset.type should be set on the custom config.') try: dataset_class = get_dataset_fn(config.dataset.type) dataset = dataset_class(config) train_dataset = dataset() except InvalidDataDirectory as exc: tf.logging.error( "Error while reading dataset, {}".format(exc) ) sys.exit(1) train_image = train_dataset['image'] train_filename = train_dataset['filename'] train_bboxes = train_dataset['bboxes'] prediction_dict = model(train_image, train_bboxes, is_training=True) total_loss = model.loss(prediction_dict) if hasattr(model, "partial_reduce_pred_list"): print("perform partial reduce !!!!!") prediction_dict = model.partial_reduce_pred_list(prediction_dict) global_step = tf.train.get_or_create_global_step() optimizer = get_optimizer(config.train, global_step) # TODO: Is this necesarry? Couldn't we just get them from the # trainable vars collection? We should probably improve our # usage of collections. trainable_vars = model.get_trainable_vars() # Compute, clip and apply gradients with tf.name_scope('gradients'): grads_and_vars = optimizer.compute_gradients( total_loss, trainable_vars ) if config.train.clip_by_norm: grads_and_vars = clip_gradients_by_norm(grads_and_vars) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.apply_gradients( grads_and_vars, global_step=global_step ) # Create custom init for slots in optimizer, as we don't save them to # our checkpoints. An example of slots in an optimizer are the Momentum # variables in MomentumOptimizer. We do this because slot variables can # effectively duplicate the size of your checkpoint! slot_variables = [ optimizer.get_slot(var, name) for name in optimizer.get_slot_names() for var in trainable_vars ] slot_variables = list(filter(lambda var: var, slot_variables)) slot_init = tf.variables_initializer( slot_variables, name='optimizer_slots_initializer' ) # Create saver for saving/restoring model model_saver = tf.train.Saver( set(tf.global_variables()) - set(slot_variables), name='model_saver', max_to_keep=config.train.get('checkpoints_max_keep', 1), ) # Create saver for loading pretrained checkpoint into base network base_checkpoint_vars = model.get_base_network_checkpoint_vars() checkpoint_file = model.get_checkpoint_file() if base_checkpoint_vars and checkpoint_file: base_net_checkpoint_saver = tf.train.Saver( base_checkpoint_vars, name='base_net_checkpoint_saver' ) # We'll send this fn to Scaffold init_fn def load_base_net_checkpoint(_, session): base_net_checkpoint_saver.restore( session, checkpoint_file ) else: load_base_net_checkpoint = None tf.logging.info('{}Starting training for {}'.format(log_prefix, model)) run_options = None if config.train.full_trace: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE ) # Create custom Scaffold to make sure we run our own init_op when model # is not restored from checkpoint. summary_op = [model.summary] summaries = tf.summary.merge_all() if summaries is not None: summary_op.append(summaries) summary_op = tf.summary.merge(summary_op) # `ready_for_local_init_op` is hardcoded to 'ready' as local init doesn't # depend on global init and `local_init_op` only runs when it is set as # 'ready' (an empty string tensor sets it as ready). scaffold = tf.train.Scaffold( saver=model_saver, init_op=tf.global_variables_initializer() if is_chief else tf.no_op(), local_init_op=tf.group(tf.initialize_local_variables(), slot_init), ready_for_local_init_op=tf.constant([], dtype=tf.string), summary_op=summary_op, init_fn=load_base_net_checkpoint, ) # Custom hooks for our session hooks = [] chief_only_hooks = [] if config.train.tf_debug: debug_hook = tf_debug.LocalCLIDebugHook() debug_hook.add_tensor_filter( 'has_inf_or_nan', tf_debug.has_inf_or_nan ) hooks.extend([debug_hook]) if not config.train.job_dir: tf.logging.warning( '`job_dir` is not defined. Checkpoints and logs will not be saved.' ) checkpoint_dir = None elif config.train.run_name: # Use run_name when available checkpoint_dir = os.path.join( config.train.job_dir, config.train.run_name ) else: checkpoint_dir = config.train.job_dir should_add_hooks = ( config.train.display_every_steps or config.train.display_every_secs and checkpoint_dir is not None ) if should_add_hooks: if not config.train.debug and image_vis == 'debug': tf.logging.warning('ImageVisHook will not run without debug mode.') elif image_vis is not None: # ImageVis only runs on the chief. #if "prediction_1_dict" in prediction_dict: if type(prediction_dict) == type([]): if hasattr(model, "partial_reduce_pred_list"): prediction_dict = prediction_dict[0] else: prediction_dict = prediction_dict[1] chief_only_hooks.append( ImageVisHook( prediction_dict, image=prediction_dict["image"], gt_bboxes=prediction_dict["gt_boxes"], config=config.model, output_dir=checkpoint_dir, every_n_steps=config.train.display_every_steps, every_n_secs=config.train.display_every_secs, image_visualization_mode=image_vis ) ) else: chief_only_hooks.append( ImageVisHook( prediction_dict, image=train_dataset['image'], gt_bboxes=train_dataset['bboxes'], config=config.model, output_dir=checkpoint_dir, every_n_steps=config.train.display_every_steps, every_n_secs=config.train.display_every_secs, image_visualization_mode=image_vis ) ) if var_vis is not None: # VarVis only runs on the chief. chief_only_hooks.append( VarVisHook( every_n_steps=config.train.display_every_steps, every_n_secs=config.train.display_every_secs, mode=var_vis, output_dir=checkpoint_dir, vars_summary=model.vars_summary, ) ) step = -1 with tf.train.MonitoredTrainingSession( master=target, is_chief=is_chief, checkpoint_dir=checkpoint_dir, scaffold=scaffold, hooks=hooks, chief_only_hooks=chief_only_hooks, save_checkpoint_secs=config.train.save_checkpoint_secs, save_summaries_steps=config.train.save_summaries_steps, save_summaries_secs=config.train.save_summaries_secs, ) as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: while not coord.should_stop(): before = time.time() _, train_loss, step, filename = sess.run([ train_op, total_loss, global_step, train_filename ], options=run_options) # TODO: Add image summary every once in a while. tf.logging.info( '{}step: {}, file: {}, train_loss: {}, in {:.2f}s'.format( log_prefix, step, filename, train_loss, time.time() - before )) if is_chief and step == 1: # We save the run after first batch to make sure everything # works properly. save_run(config, environment=environment) except tf.errors.OutOfRangeError: tf.logging.info( '{}finished training after {} epoch limit'.format( log_prefix, config.train.num_epochs ) ) # TODO: Print summary finally: coord.request_stop() # Wait for all threads to stop. coord.join(threads) return step
def model_rnn(x_t, y_t, x_e, y_e): with tf.variable_scope("Inputs"): x = tf.placeholder(tf.float32, [None, 10, 3], "Input") y = tf.placeholder(tf.float32, [None, 3], "Output") with tf.variable_scope("Net"): #norm=tf.nn.l2_normalize(x,2,name="norm") l_cells = [tf.nn.rnn_cell.BasicLSTMCell(3) for _ in range(10)] rnn_cells = tf.nn.rnn_cell.MultiRNNCell(cells=l_cells) output, state = tf.nn.dynamic_rnn(rnn_cells, x, dtype=tf.float32, scope="LTSM_l_inp") for i in range(LAYERS): l_cells = [tf.nn.rnn_cell.BasicLSTMCell(3) for _ in range(10)] rnn_cells = tf.nn.rnn_cell.MultiRNNCell(cells=l_cells) output, state = tf.nn.dynamic_rnn(rnn_cells, output, dtype=tf.float32, scope="LTSM_l_" + "{}".format(i)) with tf.variable_scope("predictions"): output = tf.reshape(output, [tf.shape(output)[0], 30]) prediction = tf.layers.dense(inputs=output, units=3, activation=None, name="prediction") classes = tf.nn.softmax(prediction) with tf.variable_scope("train"): global_step = tf.Variable(initial_value=0, trainable=False, name="global_step") loss = tf.losses.softmax_cross_entropy(onehot_labels=y, logits=prediction) train_step = tf.train.AdamOptimizer( learning_rate=LEARNING_RATE).minimize( loss=loss, global_step=tf.train.get_global_step()) tf.summary.scalar(name="Cross Entropy", tensor=loss) with tf.variable_scope("Metrics"): pred = tf.round(classes) lab = tf.cast(y, tf.int32) pred = tf.cast(pred, tf.int32) accurasy = tf.contrib.metrics.accuracy(labels=lab, predictions=pred) tf.summary.scalar(name="Accuracy", tensor=accurasy) idx = list(range(x_t.shape[0])) n_batches = int(np.ceil(len(idx) / BATCH_SIZE)) merged = tf.summary.merge_all() init_global = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: train_writer = tf.summary.FileWriter(logdir="./logs/train/", graph=sess.graph) test_writer = tf.summary.FileWriter(logdir="./logs/test/", graph=sess.graph) sess.run(fetches=init_global) sess.run(tf.initialize_local_variables()) sess.run(tf.initialize_local_variables()) for e in range(1, EPOCHS + 1): for s in range(n_batches): feed = { x: x_t[s * BATCH_SIZE:s * BATCH_SIZE + BATCH_SIZE], y: y_t[s * BATCH_SIZE:s * BATCH_SIZE + BATCH_SIZE] } acc = sess.run([train_step], feed_dict=feed) summary_train, loss_train, acc_train = sess.run( [merged, loss, accurasy], feed_dict={ x: x_t, y: y_t }) train_writer.add_summary(summary_train, e) summary_test, loss_test, acc_test = sess.run( [merged, loss, accurasy], feed_dict={ x: x_e, y: y_e }) test_writer.add_summary(summary_test, e) print( "Эпоха: {0} Ошибка: {1} {3} Ошибка на тестовых данных: {2} {4}" .format(e, loss_train, loss_test, acc_train, acc_test)) if (loss_train < 0.01): break saver.save(sess=sess, save_path="./ModelRNNClass/RNNClass") rez = sess.run(classes, feed_dict={x: x_e}) for i in range(len(rez)): print(rez[i]) return
def run(self): ops.reset_default_graph() tf.set_random_seed(1) seed = 3 #costs = [] X,Y = self.create_placeholders(self.n_x, self.n_y) parameters = self.architecture.initialize() last_Z = self.architecture.forward_prop(X,parameters) prediction = tf.nn.sigmoid(last_Z) #_,aucTrain = tf.metrics.auc(Y,prediction, summation_method='careful_interpolation') _,aucTest = tf.metrics.auc(Y,prediction, summation_method='careful_interpolation') #_,aucTrainPR = tf.metrics.auc(Y,prediction,curve="PR", summation_method='careful_interpolation') #_,aucTestPR = tf.metrics.auc(Y,prediction, curve="PR", summation_method='careful_interpolation') cost = self.compute_cost_weighted(last_Z,Y) optimizer = tf.contrib.opt.AdamWOptimizer(learning_rate = self.learning_rate, weight_decay = self.weight_decay).minimize(cost) init0 = tf.initialize_local_variables() init = tf.global_variables_initializer() saver = tf.train.Saver() #logfp = open(self.logfile, "w") #logfp.write(self.timestr + "\n") #logfp.close() if not os.path.isdir(self.savedir): subprocess.run(['mkdir', '-p', self.savedir]) with tf.Session() as sess: sess.run(init) sess.run(init0) for epoch in range(self.num_epochs): epoch_cost = 0. num_minibatches = int(self.m / self.minibatch_size) seed = seed + 1 minibatches = self.random_mini_batches(seed) for minibatch in minibatches: (minibatch_X, minibatch_Y) = minibatch _, minibatch_cost = sess.run([optimizer, cost], feed_dict = {X:minibatch_X, Y:minibatch_Y}) epoch_cost += minibatch_cost / num_minibatches #if self.print_cost and epoch % 100 == 0: #saver.save(sess, self.savedir + self.savestr, global_step = epoch) #trainAUC = sess.run(aucTrain, feed_dict={X:self.X_train, Y:self.Y_train}) #testAUC = sess.run(aucTest, feed_dict={X:self.X_test, Y:self.Y_test}) #trainAUCPR = sess.run(aucTrainPR, feed_dict={X:self.X_train, Y:self.Y_train}) #testAUCPR = sess.run(aucTestPR, feed_dict={X:self.X_test, Y:self.Y_test}) #self.print_accuracies(last_Z, X, Y, epoch_cost, trainAUC, testAUC, trainAUCPR, testAUCPR, epoch) #if self.print_cost and epoch % 5 == 0: #costs.append(epoch_cost) saver.save(sess, self.savedir + self.savestr, global_step = epoch) #parameters = sess.run(parameters) #self.plot_cost(costs) #trainAUC = sess.run(aucTrain, feed_dict={X:self.X_train, Y:self.Y_train}) testAUC = sess.run(aucTest, feed_dict={X:self.X_test, Y:self.Y_test}) #trainAUCPR = sess.run(aucTrainPR, feed_dict={X:self.X_train, Y:self.Y_train}) #testAUCPR = sess.run(aucTestPR, feed_dict={X:self.X_test, Y:self.Y_test}) #self.print_accuracies(last_Z, X, Y, epoch_cost, trainAUC, testAUC, trainAUCPR, testAUCPR, self.num_epoch) predFunc = self.get_predictions(last_Z) preds = predFunc.eval({X:self.X_test}) return preds, testAUC
svfOptimizationStep = optimizer.minimize(stateValueLoss) #other ops policyParams = utils.get_vars(policyParamsScope) getPolicyParams = utils.flat_concat(policyParams) setPolicyParams = utils.assign_params_from_flat(policyParamsFlatten, policyParams) d, HxOp = utils.hesian_vector_product(KLcontraint, policyParams) surrogateFlatLoss = utils.flat_grad(Lloss, policyParams) if args.damping_coef > 0: HxOp += args.damping_coef * d #tf session initialization init = tf.initialize_local_variables() init2 = tf.initialize_all_variables() sess.run([init, init2]) nextObs = env.reset() nextDone = 0 epLen = 0 epTotalRew = 0 totalEpisodes = 0 epTotalTrainRews = deque(maxlen=args.test_episodes_with_noise) statistics = [] statistics.append( Statistics(args.epoch_len, inputLength, "observation", True)) #algorithm
def main(argv=None): if not os.path.exists(model_path): os.makedirs(model_path) style_paths = STYLE_IMAGES.split(',') style_features_t = get_style_features(style_paths, STYLE_LAYERS) images = tf.expand_dims(reader.get_image(content_path, 256), 0) generated = image_transfer_net.net(images - reader.mean_pixel, training=True) #将生成图片和一次训练的图片一起通过VGG以提高效率 net, _ = vgg.net(tf.concat([generated, images], 0) - reader.mean_pixel) content_loss = 0 for layer in CONTENT_LAYERS: generated_images, content_images = tf.split(net[layer], 2, axis=0) size = tf.size(generated_images) shape = tf.shape(generated_images) width = shape[1] height = shape[2] num_filters = shape[3] content_loss += tf.nn.l2_loss(generated_images - content_images) / tf.to_float(size) content_loss = content_loss style_loss = 0 for style_grams, layer in zip(style_features_t, STYLE_LAYERS): generated_images, _ = tf.split(net[layer], 2, axis=0) size = tf.size(generated_images) for style_gram in style_grams: style_loss += tf.nn.l2_loss(gram(generated_images) - style_gram) / tf.to_float(size) style_loss = style_loss / len(style_paths) tv_loss = total_variation_loss(generated) loss = STYLE_WEIGHT * style_loss + CONTENT_WEIGHT * content_loss + TV_WEIGHT * tv_loss global_step = tf.Variable(0, name="global_step", trainable=False) train_op = tf.train.AdamOptimizer(1e-3).minimize(loss, global_step=global_step) output_image = tf.image.encode_jpeg( tf.saturate_cast(tf.squeeze(generated) + reader.mean_pixel, tf.uint8)) with tf.Session() as sess: saver = tf.train.Saver(tf.all_variables()) file = tf.train.latest_checkpoint(model_path) sess.run( [tf.initialize_all_variables(), tf.initialize_local_variables()]) if file: print('Restoring model from {}'.format(file)) saver.restore(sess, file) #多线程 coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) try: start_time = time.time() this_time = start_time while not coord.should_stop(): _, loss_t, step = sess.run([train_op, loss, global_step]) if step % 100 == 0: elapsed = time.time() - this_time total_time = time.time() - start_time this_time = time.time() print("step:", step, " total_loss:", loss_t, " this time:", elapsed, " total time:", total_time) if step % 10000 == 0: saver.save(sess, model_path + '/fast-style-tresfer', global_step=step) with open('out.jpg', 'wb') as f: f.write(output_image) except tf.errors.OutOfRangeError: saver.save(sess, model_path + '/fast-style-tresfer-done') print('Done training -- epoch limit reached') finally: coord.request_stop() coord.join(threads)
def main(): st_time = time.time() global W1, W2 cid = 000000 # representing ps data # filename = 'ps_train.svm' # sc = SparkContext("local", "Simple App") # filename = 'hdfs://jetblue-nn1.blue.ygrid.yahoo.com:8020/projects/predseg/models/2017-09-29/ps.51/training_set' filename = '../../ps_data/ps_oct/training_set' # sc = SparkContext(conf=SparkConf().setAppName("ps_spark_grid") # conf = (SparkConf().set('spark.yarn.executor.memoryOverhead', '4096').set('spark.kryoserializer.buffer.max.mb', '2047').set('spark.driver.maxResultSize','2g')) conf = (SparkConf().setMaster('local[*]').set('spark.executor.memory', '4G').set('spark.driver.memory', '45G').set('spark.driver.maxResultSize', '10G')) sc = SparkContext(conf=conf) data = sc.textFile(filename) # labels_sca = data.map(lambda x: int(x[0])) # int type labels_sca = data.map(lambda line: line.split(',')).map(lambda y:float(y[len(y)-1])) nbr_samples = data.count() # nbr_samples = 10000 l_sca = np.array(labels_sca.take(nbr_samples)) #l, _ = fOnehot_encode(labels_sca.take(nbr_samples)) l = np.column_stack([np.array(l_sca), 1-np.array(l_sca)]) # features = data.map(lambda x: x.split(' ')).map(lambda y: [int(y[i][-1]) for i in range(902)]) features = data.map(lambda line: line.split(',')).map(lambda y: [float(y[i]) for i in range( len(y)-1) ]) X = np.array(features.take(nbr_samples)) # l = np.array(l) nbr_feature = len(X[0]) print ('nbr of features: ' + str(nbr_feature)) train_percentage = 0.67 # data_train, _ = fSplitTrainAndTest(X, l, l_sca, train_percentage) data_train, data_test = fSplitTrainAndTest(X, l, l_sca, train_percentage) # data_train = Data(X, l, l_sca) n = len(data_train.X) # total number of training samples d = len(data_train.X[0]) # number of features ll = len(data_train.labels[0]) #output dimension # print (n) # print (d) # print (ll) # Create the model x = tf.placeholder(tf.float32, [None, d], name = 'x') keep_prob = tf.placeholder(tf.float32, name = 'keep_prob') # if False: # y = deepnn(x, d, ll) # else: # y = deepnn_withBN(x, d, ll, 3, keep_prob) nbr_of_layers = 3 nbr_layer1 = 750 nbr_layer2 = 350 epsilon = 1e-3 x_drop = tf.nn.dropout(x, keep_prob) # adding dropout in the input layer # x_drop = x # no dropout on input layer W1 = weight_variable([d, nbr_layer1]) b1 = bias_variable([nbr_layer1]) z1 = tf.matmul(x_drop, W1) + b1 batch_mean1, batch_var1 = tf.nn.moments(z1, [0]) z1_hat = (z1 - batch_mean1)/tf.sqrt(batch_var1 + epsilon) scale1 = tf.Variable(tf.ones([nbr_layer1])) beta1 = tf.Variable(tf.zeros([nbr_layer1])) #b1 = bias_variable([nbr_layer1]) h1 = tf.nn.relu(scale1*z1_hat + beta1) h1_drop = tf.nn.dropout(h1, keep_prob) if nbr_of_layers == 2: W2 = weight_variable([nbr_layer1, ll]) b2 = bias_variable([ll]) y = tf.matmul(h1_drop,W2) + b2 #h1 = tf.nn.sigmoid(scale1*z1_hat + beta1) else: W2 = weight_variable([nbr_layer1, nbr_layer2]) b2 = bias_variable([nbr_layer2]) z2 = tf.matmul(h1_drop,W2) + b2 batch_mean2, batch_var2 = tf.nn.moments(z2, [0]) z2_hat = (z2 - batch_mean2)/tf.sqrt(batch_var2 + epsilon) scale2 = tf.Variable(tf.ones([nbr_layer2])) beta2 = tf.Variable(tf.zeros([nbr_layer2])) h2 = tf.nn.relu(scale2*z2_hat + beta2) h2_drop = tf.nn.dropout(h2, keep_prob) #h2 = tf.nn.sigmoid(scale2*z2_hat + beta2) W3 = weight_variable([nbr_layer2, ll]) b3 = bias_variable([ll]) y = tf.matmul(h2_drop, W3) + b3 # Define loss and optimizer y_ = tf.placeholder(tf.float32, [None, ll], name = 'y_') saver = tf.train.Saver() tf.summary.histogram('W1',W1) tf.summary.histogram('W2',W2) cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y), name = 'cross_entropy') starter_learning_rate = 0.05 global_step = tf.Variable(0, trainable=False) # train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step , decay_steps = 5000, decay_rate = 0.95, staircase=True, name=None) # train_step = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cross_entropy, global_step = global_step) train_step = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cross_entropy, global_step = global_step) # with tf.Session() as sess: sess = tf.InteractiveSession() # saver.save(sess, './myM') tf.global_variables_initializer().run() correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1), name = 'correct_prediction') accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name = 'acc') auc_ftrain = tf.metrics.auc(tf.cast(tf.argmax(y, 1), tf.float32), tf.cast(tf.argmax(y_, 1), tf.float32), name = 'auc_ftrain') auc_ftest = tf.metrics.auc(tf.cast(tf.argmax(y, 1), tf.float32), tf.cast(tf.argmax(y_, 1), tf.float32), name = 'auc_ftest') softmaxed_logits = tf.nn.softmax(y, name = 'softmaxed_logits') tf.local_variables_initializer().run() sess.run(tf.initialize_local_variables()) tf.summary.scalar('cross_entropy', cross_entropy) tf.summary.scalar('accuracy', accuracy) tf.summary.scalar('auc_ftrain', auc_ftrain[0]) tf.summary.scalar('auc_ftest', auc_ftest[0]) train_writer = tf.summary.FileWriter("/tmp/histogram_example/train", sess.graph) test_writer = tf.summary.FileWriter("/tmp/histogram_example/test") # writer = tf.summary.FileWriter("/tmp/histogram_example") summaries = tf.summary.merge_all() # save st = np.array([]) ac_train = np.array([]) ca_train = np.array([]) auc_train = np.array([]) ac_test = np.array([]) ca_test = np.array([]) auc_test = np.array([]) batch_size = 40 for i in range(20): # train the whole epoch (first shuffle the data) idx = np.arange(0, n) np.random.shuffle(idx) X_shuffle = [data_train.X[k] for k in idx] labels_shuffle = [data_train.labels[k] for k in idx] for j in range(int(n/batch_size)): batch_xs = X_shuffle[j*batch_size: (j+1)*batch_size-1] batch_ys = labels_shuffle[j*batch_size: (j+1)*batch_size-1] sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys, keep_prob: 0.5}) # finish training, try on testing data if i % 10 is 0: print (i) soft_logits_train, summary_train, ca_train_i, ac_train_i, auc_train_i = sess.run([softmaxed_logits, summaries, cross_entropy, accuracy, auc_ftrain], feed_dict={x: data_train.X, y_: data_train.labels, keep_prob: 1.0}) soft_logits_test, summary_test, ca_test_i, ac_test_i,auc_test_i = sess.run([softmaxed_logits, summaries, cross_entropy, accuracy, auc_ftest], feed_dict={x: data_test.X, y_: data_test.labels, keep_prob: 1.0}) # [ca_test_i, ac_test_i,auc_test_i] = [0, 0, [0, 0]] #train_writer.add_summary(summary_train, i) #test_writer.add_summary(summary_test, i) # print (soft_logits_train) # print (data_train.labels) sk_auc_train = metrics.roc_auc_score(y_true = np.array(data_train.labels), y_score = np.array(soft_logits_train)) sk_auc_test = metrics.roc_auc_score(y_true = np.array(data_test.labels), y_score = np.array(soft_logits_test)) print ('learning rate: ' + str(sess.run(learning_rate))) print ('train cross entropy: ' + str(ca_train_i)) print ('test cross entropy: ' + str(ca_test_i)) print ('train accuracy: ' + str(ac_train_i)) print ('test accuracy: ' + str(ac_test_i)) print ('train auc: ' + str(auc_train_i[0])) print ('test auc: '+ str(auc_test_i[0])) print('train sk auc: ' + str(sk_auc_train)) print('test sk auc: ' + str(sk_auc_test)) saver.save(sess, './myM') sess.close() sc.stop() end_time = time.time() print('run time: '+ str(round(end_time-st_time)) + ' seconds') print('tensorboard --logdir=/tmp/histogram_example') return 1
def predict(image): with tf.Session() as sess: with gfile.FastGFile(graph_path, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) sess.graph.as_default() tf.import_graph_def(graph_def, name='') resnet = importlib.import_module('models.nn4', 'inference') weight_decay = 0. keep_probability = 1. # images = convert(files) phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') prelogits, _ = resnet.inference(image, keep_probability, phase_train=True, weight_decay=weight_decay) logits = slim.fully_connected( prelogits, 7, activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(0.), scope='Logits', reuse=False) # embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') labels = ops.convert_to_tensor([i for i in range(0, 7)], dtype=tf.int32) sess.run(tf.initialize_all_variables()) sess.run(tf.initialize_local_variables()) feed_dict = {'input:0': image.eval(), phase_train_placeholder: False} # emb, lab = sess.run([embeddings, labels]) # emb_array = np.zeros((7, int(embeddings.get_shape()[1]))) # nums = ops.convert_to_tensor(emb_array, dtype=tf.float32) # emb_array[lab] = emb emb, lab = sess.run([embeddings, labels]) ''' emb_array = np.zeros((7, int(embeddings.get_shape()[1]))) emb_array[lab] = emb ''' embits = slim.fully_connected( emb, 7, activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(0.), scope='Embits', reuse=False) # print dir(embits) sess.run(tf.initialize_all_variables()) sess.run(tf.initialize_local_variables()) print image.name[:-2] target_arr = [0] * 7 target = int(image.name[:1]) target_arr[target] = 1 print image.name print 'target ', target, target_arr outDict = {'target': target} def extract_values(tens, name): # print tens.eval()[0] # print logits.eval()[0] probLog = tf.nn.softmax(tens) # probEmb = tf.nn.softmax(embits) classLog = probLog.eval(feed_dict=feed_dict) # classEmb = probEmb.eval(feed_dict=feed_dict) from operator import add arrLog = [0] * 7 # arrEmb = [0]*7 # for i, n in enumerate(classEmb): # arrEmb = map(add, arrEmb, n) # arrEmb = [(x*100)/sum(arrEmb) for x in arrEmb] for i, n in enumerate(classLog): arrLog = map(add, arrLog, n) arrLog = [int((x * 100) / sum(arrLog)) for x in arrLog] # result_arr = map(add, arrLog, arrEmb) # maxRes = pd.Series(result_arr).idxmax() # em = pd.Series(arrEmb).idxmax() lo = res = pd.Series(arrLog).idxmax() # print 'log+emb:', res, result_arr print name, lo, arrLog return [name, lo] # print 'embeds :', em, arrEmb # out = {'target':target, 'Average':res, 'Embeddings':em, 'Logits':lo} # out = {'target':target, name:lo} lgts = extract_values(logits, 'logits') outDict[lgts[0]] = lgts[1] mbts = extract_values(embits, 'embits') outDict[mbts[0]] = mbts[1] with open('test_results.txt', 'a') as myfile: myfile.write(str(outDict) + ', ') return outDict
col1, col2, col3, col4, col5 = tf.decode_csv(value, record_defaults=record_defaults) features = tf.stack([col1, col2, col3, col4]) #将特征和标签push进ExampleQueue enq_op = example_queue.enqueue([features, [col5]]) #使用QueueRunner创建两个进程加载数据到ExampleQueue qr = tf.train.QueueRunner(example_queue, [enq_op] * 2) #使用此方法方便后面tf.train.start_queue_runner统一开始进程 tf.train.add_queue_runner(qr) xs = example_queue.dequeue() with tf.Session() as sess: sess.run(tf.initialize_local_variables()) #必须加上这句话,否则报错! coord = tf.train.Coordinator() #开始所有进程 threads = tf.train.start_queue_runners(coord=coord) try: while not coord.should_stop(): x = sess.run(xs) print(x) except tf.errors.OutOfRangeError: print('Done training -- epch limit reached') finally: coord.request_stop() coord.request_stop() coord.join(threads)
print('runing gen_epoch...') for i in range(0, len(inputs), batch_size): batch_inputs = inputs[i:i + batch_size] batch_labels = labels[i:i + batch_size] yield batch_inputs, batch_labels # Hyperparameters BATCH_SIZE = 64 NUM_EPOCHS = 10 LRATE = 0.001 # Initialization sess = tf.Session() model = Model(sess, LRATE) sess.run([tf.initialize_all_variables(), tf.initialize_local_variables()]) saver = tf.train.Saver() # train print('training...') for epoch in range(NUM_EPOCHS): batch_gen = gen_epoch(inputs_train, labels_train, BATCH_SIZE) for (inputs, labels) in batch_gen: step = model.train(inputs, labels) # save the model if step % 1000 == 0: save_path = saver.save(sess, "models/ ") # TODO: enter some save path once dataset is downloaded print("Model saved in file: %s" % save_path)
# 把读取的样本进行转换 features = tf.parse_single_example( # 读取单个样本,一个文件可能多个样本 serialized=serialized_example, features={ 'i': tf.FixedLenFeature([], tf.int64), 'j': tf.FixedLenFeature([], tf.int64) } ) # tf.train.shuffle_batch(num_threads=) with tf.Session() as sess: # print(tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES)) # train.match_filenames_once() 作为局部变量 # tf.global_variables_initializer 只初始化全局变量 tf.initialize_local_variables().run() print(sess.run(files)) # 声明 Coordinator,当执行完 start_queue_runners 之后才会启动填充队列的线程 # 不然计算单元会一直阻塞 coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) print(threads) # 多次执行获取数据操作 for i in range(6): print(sess.run([features['i'], features['j']])) coord.request_stop() coord.join(threads)
def train(lr=0.0001, nb_iterations=100000, batch_size=64): with tf.Graph().as_default(): images, labels = model.distorted_inputs('training', batch_size) imagesVal, labelsVal = model.distorted_inputs('test', batch_size) logits = model.inference_vgg(images, False, training=True) objectiveGT = model.loss_op(logits, labels, batch_size) accuracy = model.evaluate(logits, labels, 0.05555555) logitsVal = model.inference_vgg(imagesVal, True, training=False) objectiveGTVal = model.loss_op(logitsVal, labelsVal, batch_size) accuracyVal = model.evaluate(logitsVal, labelsVal, 0.0277778) ########################### #FLOP ########################### imagesFlop = model.flop_inputs('training', batch_size) imagesFlopVal = model.flop_inputs('test', batch_size) imagesFlop2 = tf.reverse(imagesFlop, [False, False, True, False]) imagesFlopVal2 = tf.reverse(imagesFlopVal, [False, False, True, False]) logitsFlop = model.inference_vgg(imagesFlop, True, training=True) logitsFlop2 = model.inference_vgg(imagesFlop2, True, training=True) objectiveFlop = model.loss_flop(logitsFlop, logitsFlop2, batch_size) logitsFlopVal = model.inference_vgg(imagesFlopVal, True, training=False) logitsFlopVal2 = model.inference_vgg(imagesFlopVal2, True, training=False) objectiveFlopVal = model.loss_flop(logitsFlopVal, logitsFlopVal2, batch_size) ########################### optimizer = tf.train.AdamOptimizer(lr) global_step = tf.Variable(0, name="global_step", trainable=False) train_step_GT = optimizer.minimize(objectiveGT, global_step=global_step) train_step_Flop = optimizer.minimize(objectiveFlop, global_step=global_step) summaries = tf.get_collection(tf.GraphKeys.SUMMARIES) #summaries.append(tf.scalar_summary('Loss Training GT', objectiveGT)) #summaries.append(tf.scalar_summary('Loss Training Flop', objectiveFlop)) summaries.append( tf.scalar_summary('Loss Validation GT', objectiveGTVal)) summaries.append( tf.scalar_summary('Loss Validation Flop', objectiveFlopVal)) summary_op = tf.merge_summary(summaries) # Start running operations on the Graph. with tf.Session() as sess: train_writer = tf.train.SummaryWriter('../summaries' + '/flop', sess.graph) saver = tf.train.Saver() sess.run(tf.initialize_all_variables()) sess.run(tf.initialize_local_variables()) #saver.restore(sess,"modelFlop.cpkt") #print("Model restored") coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) model.init() for iteration in range(nb_iterations): result = sess.run([ logits, train_step_GT, objectiveGT, accuracy, objectiveFlop, train_step_Flop ]) if iteration % 100 == 0: trn_lossGT = result[2] print( "iter:%5d, trn_lossGT: %s, acc : %s, trn_lossFlop : %s" % (iteration, trn_lossGT, result[3], result[4])) if iteration % 1000 == 0: result = sess.run([ labelsVal, logitsVal, objectiveGTVal, objectiveFlopVal, summary_op ]) trn_lossGT = result[2] trn_lossFlop = result[3] # print("VALIDATION BATCH, iter:%5d, trn_lossGT: %s, trn_lossFlop : %s" % (iteration, trn_lossGT, trn_lossFlop)) # print(result[0]) # print(result[1]) #Save to summaries summary = tf.Summary() summary.ParseFromString(result[4]) train_writer.add_summary(summary, iteration) print("Saving model...") save_path = saver.save(sess, "modelFlop100000.cpkt") print("Model saved in file : %s" % save_path)
op_train = optimizer.apply_gradients(grads) idx_e = tf.placeholder(tf.int32, [None]) idx_r = tf.placeholder(tf.int32, [None]) normedE = tf.nn.l2_normalize(tf.nn.embedding_lookup(ent_embedding, idx_e), axis=1) normedR = tf.nn.l2_normalize(tf.nn.embedding_lookup(rel_embedding, idx_r), axis=1) updateE = tf.scatter_update(ent_embedding, idx_e, normedE) updateR = tf.scatter_update(rel_embedding, idx_r, normedR) saver = tf.train.Saver() # 启动图 (graph) init = tf.global_variables_initializer() init_local_op = tf.initialize_local_variables() loss_sum = 0 with tf.Session() as sess: sess.run(init) sess.run(init_local_op) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print("success! %s." % ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) else: print('fail to restore') n_epoch = 0 n_iter = 0 total = math.ceil(n_triple / n_batch) * num_epoch * 2
def train_autoencoder(): print("Slim-autoencoder running") #construct the autoencoder graph g = tf.Graph() with g.as_default(): # 4D Tensor placeholder for input images inputs = tf.placeholder(tf.float32, shape=[None] + [256, 256, 3], name="images") with tf.variable_scope("TF-Slim", [inputs]): # add model to graph lsr = build_encoder(inputs) result = build_decoder(lsr) #define optimiser and loss function loss = tf.reduce_mean(tf.square(result - inputs)) optimiser = tf.train.GradientDescentOptimizer(learning_rate).minimize( loss) # Initialize the variables init = tf.group(tf.initialize_local_variables(), tf.global_variables_initializer()) print("autoencoder starting tf.session") with tf.Session() as sess: sess.run(init) # save object saver = tf.train.Saver(tf.all_variables()) filename = './apmldataset.tfrecords' filename_queue = tf.train.string_input_producer([filename]) image, label = ld.read_and_decode(filename_queue, n_nodes_inpl) images, labels = tf.train.shuffle_batch([image, label], batch_size=batch_size, capacity=800, num_threads=1, min_after_dequeue=50) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: for epoch in range(num_epochs): epoch_loss = 0 for batch in range(int(total_images / batch_size)): input_x, _ = sess.run([images, labels]) _, l1 = sess.run([optimiser, loss], feed_dict={inputs: input_x}) epoch_loss += l1 print("batch loss " + str(l1)) print('Epoch loss ' + str(epoch_loss)) #save the model saved_path = saver.save(sess, save_path) print("Saved in path: %s" % saved_path) except Exception as e: #we hit a mine, so stop doing shit print(e) coord.request_stop(e) finally: #Shut it down! Code red! Burn the evidence and run! coord.request_stop() coord.join(threads) print("autoencoder training complte")
def runTest(hps): #def runTest(data, cost, cost_fl, accuracy, sess, print_step=10, saveData=False): # mean, std = runTest(data=[images_batch_test, fl_batch_test, presence_batch_test, points_batch_test],cost=cost_test,cost_fl=cost_test_fl, accuracy=accuracy, sess=sess, print_step=1, saveData=True) print_step = 10 saveData = True # print(FLAGS.eval_data_path) _, filenames = getFileList(FLAGS.eval_data_path) n_files = len(filenames) # print(n_files) with tf.device('/cpu:0'): images_batch, fl_batch, presence_batch, points_batch = build_input( FLAGS.eval_data_path, hps.batch_size, FLAGS.mode) # print(images.get_shape()) # print(fl.get_shape()) # print(presence.get_shape()) # print(points.get_shape()) x = tf.placeholder(tf.float32, [hps.batch_size, 224, 224, 3]) y = tf.placeholder(tf.float32, [hps.batch_size, 3006]) fl = tf.placeholder(tf.float32, shape=[hps.batch_size]) presence = tf.placeholder(tf.float32, shape=[hps.batch_size]) model = resnet_model.ResNet(hps, x, y, fl, presence, FLAGS.mode) #, labels, fl, presence, mode): model.build_graph() saver = tf.train.Saver() config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allocator_type = 'BFC' with tf.Session(config=config) as sess: sess.run( tf.group(tf.initialize_all_variables(), tf.initialize_local_variables())) try: ckpt_state = tf.train.get_checkpoint_state(FLAGS.log_root) except tf.errors.OutOfRangeError as e: tf.logging.error('Cannot restore checkpoint: %s', e) if not (ckpt_state and ckpt_state.model_checkpoint_path): tf.logging.info('No model to eval yet at %s', FLAGS.log_root) tf.logging.info('Loading checkpoint %s', ckpt_state.model_checkpoint_path) saver.restore(sess, ckpt_state.model_checkpoint_path) rmse = tf.reduce_mean( tf.sqrt(tf.reduce_mean(tf.square(tf.sub(model.pred, model.labels))))) cost_fl = tf.reduce_mean(tf.divide(tf.abs(fl - model.pred_fl), fl)) # presence_one_hot = tf.one_hot(tf.cast(presence, tf.int32), 2) # correct = tf.cast(tf.equal(tf.cast(presence, tf.int64), tf.argmax(model.predictions, 1)), tf.float32) accuracy = model.accuracy #tf.reduce_mean(correct) # images = data[0] # efl = data[1] # pres = data[2] # points = data[3] losses = [] losses_fl = [] acc = [] fname = str(time.time()) coord2 = tf.train.Coordinator() threads2 = tf.train.start_queue_runners(coord=coord2, sess=sess) n_saved = 100 data = {} data['pred'] = np.empty((n_saved, 3006)) data['pred_fl'] = np.empty((n_saved)) data['gt'] = np.empty((n_saved, 3006)) data['gt_fl'] = np.empty((n_saved)) data['presence'] = np.empty((n_saved)) data['detected'] = np.empty((n_saved)) try: print("Testing..") step = 0 # while step < n_files: while not coord2.should_stop(): start_time = time.time() #images_batch, fl_batch, presence_batch, points_batch image_test, fl_test, presence_test, points_test = sess.run( [images_batch, fl_batch, presence_batch, points_batch]) # test_loss = cost.eval(feed_dict={x: image_test, y:points_test,keep_prob: 1.0}) test_loss = model.pred.eval( feed_dict={ x: image_test, fl: fl_test, presence: presence_test, y: points_test }) test_fl_loss = model.fl_re.eval( feed_dict={ x: image_test, fl: fl_test, presence: presence_test, y: points_test }) test_loss, test_fl_loss, test_acc = sess.run( [rmse, cost_fl, accuracy], feed_dict={ x: image_test, fl: fl_test, presence: presence_test, y: points_test }) if presence_test == 1: losses.append(test_loss) losses_fl.append(test_fl_loss) acc.append(test_acc) duration = time.time() - start_time if print_step != -1 and step % print_step == 0: print( 'Step %d: loss = %.2f loss_fl = %.2f acc = %.2f (%.3f sec)' % (step, test_loss * presence_test, test_fl_loss, test_acc, duration)) if saveData: if step < n_saved: data['gt'][step, :] = points_test data['gt_fl'][step] = fl_test data['presence'][step] = presence_test # data['pred'][step,:] = model.pred.eval(feed_dict={x: image_test, y:points_test,keep_prob: 1.0}) # data['pred'][step,:] = model.pred.eval(feed_dict={x: image_test, fl:fl_test, y:points_test}) # data['pred_fl'][step] = model.pred_fl.eval(feed_dict={x: image_test, fl:fl_test, y:points_test}) # data['detected'] = model.detected.eval(feed_dict={x: image_test, fl:fl_test, y:points_test}) if presence_test == 1: data['pred'][step, :], data['pred_fl'][ step], det = sess.run( [ model.pred, model.pred_fl, model.predictions ], feed_dict={ x: image_test, fl: fl_test, presence: presence_test, y: points_test }) data['detected'][step] = np.argmax(det) else: data['pred'][step, :] = 0 data['pred_fl'][step], data['detected'][ step] = sess.run( [model.pred_fl, model.predictions], feed_dict={ x: image_test, fl: fl_test, presence: presence_test, y: points_test }) # = np.argmax(det) # else: # break step += 1 except tf.errors.OutOfRangeError: pass finally: coord2.request_stop() coord2.join(threads2) mean_loss = np.mean(losses) mean_loss_fl = np.mean(losses_fl) mean_acc = np.mean(acc) print("Mean testing loss: {} std={} min={} max={}".format( mean_loss, np.std(losses), min(losses), max(losses))) print("Mean testing FL loss: {} std={} min={} max={}".format( mean_loss_fl, np.std(losses_fl), min(losses_fl), max(losses_fl))) print("Mean accuracy: {}".format(mean_acc)) if saveData: np.savez('results/test' + fname + '.npz', **data) print("Results saved to {}.".format('results/test' + fname + '.npz')) return mean_loss, np.std(losses)
def train(): header, train, val, test, data_dict = get_data(N_CLASSES, MERGE_TAGS, SPLIT_RANDOMLY) print header weights, biases = get_vars() coord = tf.train.Coordinator() data_man_train, pred, cost, auc_op, update_auc_op = get_end_ops( train, data_dict, coord, weights, biases) data_man_val, pred_val, cost_val, auc_op_val, update_auc_op_val = get_end_ops( val, data_dict, coord, weights, biases) optimizer = tf.train.AdamOptimizer( learning_rate=LEARNING_RATE).minimize(cost) saver = tf.train.Saver() sess = tf.Session() sess.run(tf.initialize_all_variables()) sess.run(tf.initialize_local_variables()) threads = tf.train.start_queue_runners(sess=sess, coord=coord) data_man_train.start_threads(sess) data_man_val.start_threads(sess) if not os.path.exists(SAVE_DIR): os.mkdir(SAVE_DIR) try: print 'Starting training' step = 0 start = time.time() while True: sess.run(optimizer, feed_dict={keep_prob: DROPOUT}) if step % PRINT_EVERY == 0: loss, _ = sess.run([cost, update_auc_op], feed_dict={keep_prob: 1}) print 'Step', step, 'Epochs', float(step) * BATCH_SIZE / len(train), \ 'Minibatch loss', loss, 'Time', time.time() - start if step % EVAL_EVERY == 0 and step != 0: total_loss = 0 for _ in range(len(val) / BATCH_SIZE): loss, _ = sess.run([cost_val, update_auc_op_val], feed_dict={keep_prob: 1}) total_loss += loss auc, auc_val = sess.run([auc_op, auc_op_val]) print 'Train set AUC', auc print 'Validation set loss', total_loss / ( len(val) / BATCH_SIZE), 'Validation set AUC', auc_val print '' saver.save(sess, SAVE_DIR + 'model', global_step=step) sess.run(tf.initialize_local_variables()) step += 1 except: pass finally: coord.request_stop() coord.join(threads) sess.close()
def run_eval(): # Run evaluation on the input data set with tf.Graph().as_default() as g: # Get images and labels for the MRI data eval_data = FLAGS.eval_data == 'eval' # choose whether to evaluate the training set or the evaluation set evalfile = os.path.join(FLAGS.data_dir, VALIDATION_FILE if eval_data else TRAIN_FILE) # read the proper data set images, labels = nn.inputs(batch_size=FLAGS.batch_size, num_epochs=1, filename=evalfile) # Build a Graph that computes the logits predictions from the # inference model. We'll use a prior graph built by the training logits = nn.inference(images) # Calculate predictions. top_k_op = nn.evaluation(logits, labels) # setup the initialization of variables local_init = tf.initialize_local_variables() # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, g) # create the saver and session saver = tf.train.Saver() sess = tf.Session() # init the local variables sess.run(local_init) while True: # read in the most recent checkpointed graph and weights ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] else: print('No checkpoint file found in %s' % FLAGS.checkpoint_dir) return # start up the threads coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: # true_count accumulates the correct predictions true_count = 0 step = 0 while not coord.should_stop(): # run a single iteration of evaluation predictions = sess.run([top_k_op]) # aggregate correct predictions true_count += np.sum(predictions) step += 1 # uncomment below line for debugging # print("step truecount", step, true_count) except tf.errors.OutOfRangeError: # print and output the relevant prediction accuracy precision = true_count / ( step * 256.0 * 256 ) print('OUTPUT: %s: precision = %.3f' % (datetime.now(), precision)) print('OUTPUT: %d images evaluated from file %s' % (step, evalfile)) # create summary to show in TensorBoard summary = tf.Summary() summary.ParseFromString(sess.run(summary_op)) summary.value.add(tag='1cnn_accuracy', simple_value=precision) summary_writer.add_summary(summary, global_step) finally: coord.request_stop() # shutdown gracefully coord.join(threads) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs) sess.close()
def main(): dataset = tf.placeholder_with_default(0, []) document_batch, document_weights, query_batch, query_weights, answer_batch = read_records( dataset) y_hat, reg = inference(document_batch, document_weights, query_batch, query_weights) loss, train_op, global_step, accuracy = train(y_hat, reg, document_batch, document_weights, answer_batch) summary_op = tf.merge_all_summaries() with tf.Session() as sess: summary_writer = tf.train.SummaryWriter(model_path, sess.graph) saver_variables = tf.all_variables() if not FLAGS.training: saver_variables = filter( lambda var: var.name != 'input_producer/limit_epochs/epochs:0', saver_variables) saver_variables = filter(lambda var: var.name != 'smooth_acc:0', saver_variables) saver_variables = filter(lambda var: var.name != 'avg_acc:0', saver_variables) saver = tf.train.Saver(saver_variables) sess.run( [tf.initialize_all_variables(), tf.initialize_local_variables()]) model = tf.train.latest_checkpoint(model_path) if model: print('Restoring ' + model) saver.restore(sess, model) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) start_time = time.time() accumulated_accuracy = 0 try: if FLAGS.training: while not coord.should_stop(): loss_t, _, step, acc = sess.run( [loss, train_op, global_step, accuracy], feed_dict={dataset: 0}) elapsed_time, start_time = time.time( ) - start_time, time.time() print(step, loss_t, acc, elapsed_time) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) if step % 1000 == 0: saver.save(sess, model_path + '/aoa', global_step=step) else: step = 0 while not coord.should_stop(): acc = sess.run(accuracy, feed_dict={dataset: 2}) step += 1 accumulated_accuracy += (acc - accumulated_accuracy) / step elapsed_time, start_time = time.time( ) - start_time, time.time() print(accumulated_accuracy, acc, elapsed_time) except tf.errors.OutOfRangeError: print('Done!') finally: coord.request_stop() coord.join(threads) '''
def main(_): # # 1. read training data # # image - 784 (=28 x 28) elements of grey-scaled integer value [0, 1] # label - digit (0, 1, ..., 9) train_queue = tf.train.string_input_producer( [FLAGS.train_file], num_epochs=10) # when all data is read, it raises OutOfRange train_reader = tf.TFRecordReader() _, train_serialized_exam = train_reader.read(train_queue) train_exam = tf.parse_single_example(train_serialized_exam, features={ 'image_raw': tf.FixedLenFeature([], tf.string), 'label': tf.FixedLenFeature([], tf.int64) }) train_image = tf.decode_raw(train_exam['image_raw'], tf.uint8) train_image.set_shape([784]) train_image = tf.cast(train_image, tf.float32) * (1. / 255) train_label = tf.cast(train_exam['label'], tf.int32) train_batch_image, train_batch_label = tf.train.batch( [train_image, train_label], batch_size=batch_size) # # 2. read test data # test_queue = tf.train.string_input_producer( [FLAGS.test_file], num_epochs=1) # when all data is read, it raises OutOfRange test_reader = tf.TFRecordReader() _, test_serialized_exam = test_reader.read(test_queue) test_exam = tf.parse_single_example(test_serialized_exam, features={ 'image_raw': tf.FixedLenFeature([], tf.string), 'label': tf.FixedLenFeature([], tf.int64) }) test_image = tf.decode_raw(test_exam['image_raw'], tf.uint8) test_image.set_shape([784]) test_image = tf.cast(test_image, tf.float32) * (1. / 255) test_label = tf.cast(test_exam['label'], tf.int32) test_batch_image, test_batch_label = tf.train.batch( [test_image, test_label], batch_size=batch_size) # simply enqueue/dequeue_many with tf.FIFOQueue # # for debugging... (check input) # with tf.Session() as sess: # sess.run(tf.global_variables_initializer()) # tf.train.start_queue_runners(sess=sess) # for i in range(2): # debug_image, debug_label = sess.run([train_batch_image, train_batch_label]) # tf.summary.image('images', debug_image) # print(debug_label) # # 2. define graph # # define input plchd_image = tf.placeholder( dtype=tf.float32, shape=(batch_size, 784) ) # here we use fixed dimension with batch_size. (Please use undefined dimension with None in production.) plchd_label = tf.placeholder( dtype=tf.int32, shape=(batch_size) ) # here we use fixed dimension with batch_size. (Please use undefined dimension with None in production.) # define network and inference # (simple 2 fully connected hidden layer : 784->128->64->10) with tf.name_scope('hidden1'): weights = tf.Variable(tf.truncated_normal([784, 128], stddev=1.0 / math.sqrt(float(784))), name='weights') biases = tf.Variable(tf.zeros([128]), name='biases') hidden1 = tf.nn.relu(tf.matmul(plchd_image, weights) + biases) with tf.name_scope('hidden2'): weights = tf.Variable(tf.truncated_normal([128, 64], stddev=1.0 / math.sqrt(float(128))), name='weights') biases = tf.Variable(tf.zeros([64]), name='biases') hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases) with tf.name_scope('softmax_linear'): weights = tf.Variable(tf.truncated_normal([64, 10], stddev=1.0 / math.sqrt(float(64))), name='weights') biases = tf.Variable(tf.zeros([10]), name='biases') logits = tf.matmul(hidden2, weights) + biases # define optimization (training) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.07) loss = tf.losses.sparse_softmax_cross_entropy(labels=plchd_label, logits=logits) train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step()) # define testing array_correct = tf.nn.in_top_k(logits, plchd_label, 1) test_op = tf.reduce_sum(tf.cast(array_correct, tf.int32)) # # 3. run session # with tf.Session( ) as sess: # use tf.train.MonitoredTrainingSession for more advanced features ... sess.run( tf.group(tf.initialize_all_variables(), tf.initialize_local_variables())) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners( sess=sess, coord=coord) # for data batching # train !!! try: step = 0 while not coord.should_stop(): array_image, array_label = sess.run( [train_batch_image, train_batch_label]) feed_dict = { plchd_image: array_image, plchd_label: array_label } _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) if step % 100 == 0: print("Worker: Step %d (Loss: %.2f)" % (step, loss_value)) step += 1 except tf.errors.OutOfRangeError: print('Training done !') # test (evaluate) !!! num_true = 0 try: num_test = 0 while not coord.should_stop(): array_image, array_label = sess.run( [test_batch_image, test_batch_label]) feed_dict = { plchd_image: array_image, plchd_label: array_label } num_true += sess.run(test_op, feed_dict=feed_dict) num_test += batch_size except tf.errors.OutOfRangeError: print('Scoring done !') precision = float(num_true) / num_test print('Accuracy: %0.04f (Num of samples: %d)' % (precision, num_test)) coord.request_stop() coord.join(threads)
def main(args): """ main """ if not os.path.exists(args.test_ds): print("{} does not exists".format(args.test_ds)) return 1 # export model.pb from session dir. Skip if model.pb already exists model.export(train.NUM_CLASSES, train.SESSION_DIR, "model-0", train.MODEL_PATH) graph = model.load(train.MODEL_PATH, args.device) with graph.as_default(): logits = graph.get_tensor_by_name(model.OUTPUT_TENSOR_NAME + ":0") logits = tf.squeeze(logits, [1, 2]) # sparse labels, pgnet output -> 20 possible values labels_ = tf.placeholder(tf.int64, [None]) predicted_labels = tf.argmax(logits, 1) top_1_op = tf.nn.in_top_k(logits, labels_, 1) top_5_op = tf.nn.in_top_k(logits, labels_, 5) image_queue, label_queue = pascifar.test(args.test_ds, BATCH_SIZE, model.INPUT_SIDE, args.test_ds + "/ts.csv") # initialize all variables init_op = tf.group(tf.global_variables_initializer(), tf.initialize_local_variables()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: sess.run(init_op) # Start input enqueue threads. print("Starting input enqueue threads. Please wait...") coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: count_top_1 = 0.0 count_top_5 = 0.0 processed = 0 while not coord.should_stop(): image_batch, label_batch = sess.run( [image_queue, label_queue]) print(label_batch) top_1, top_5, pred_lab = sess.run( [top_1_op, top_5_op, predicted_labels], feed_dict={ "images_:0": image_batch, labels_: label_batch, }) count_top_1 += np.sum(top_1) count_top_5 += np.sum(top_5) processed += 1 print(pred_lab) print(label_batch) print(top_1, top_5) except tf.errors.OutOfRangeError: total_sample_count = processed * BATCH_SIZE precision_at_1 = count_top_1 / total_sample_count recall_at_5 = count_top_5 / total_sample_count print( 'precision @ 1 = {} recall @ 5 = {} [{} examples]'.format( precision_at_1, recall_at_5, total_sample_count)) finally: # When done, ask the threads to stop. coord.request_stop() # Wait for threads to finish. coord.join(threads)
lstm_tail_gate = lstm_tail y_conv = tf.matmul(lstm_tail_gate, W_atten) y_conv_softmax = tf.nn.softmax(y_conv) #print(y_conv) ### Densely Connected Layer ### a fully-connected layer with 1024 neurons to allow processing on the entire seq. cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(y_conv, y_)) train_step = tf.train.AdamOptimizer(training_speed).minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) sess = tf.InteractiveSession() sess.run(tf.initialize_all_variables()) sess.run(tf.initialize_local_variables()) training_accuracy = open('training_accuracy.txt', 'w') all_file_matrix = open('all_file_matrix.txt', 'r') file_vector = all_file_matrix.readline().split( ) #[x.strip() for x in all_file_matrix.readline().split('\t')] index_file = file_vector[0] label_file = file_vector[1] seq_file = file_vector[2] dnase_file = file_vector[3] conserve_file = file_vector[4] print('read data') data_seq_pos, data_dnase_pos, data_seq_neg, data_dnase_neg = read_data_sep( index_file, label_file, dnase_file, seq_file, conserve_file, 5000000)
def run_experiment(env, seed, visualize, algorithm): print("Running experiment " + str(seed + 1)) np.random.seed(seed) env.set_seed(seed) spec = env.get_spec() # CMA-ES has a bit of a special-treatment if algorithm == "cmaes": opts = cma.CMAOptions() opts["bounds"] = [ spec.init_policy.lower_bounds, spec.init_policy.upper_bounds ] opts["maxfevals"] = (spec.n_iter + spec.buffer_size) * spec.n_samples opts["verbose"] = 1 opts["tolstagnation"] = int(1e6) # We need the +1 here since CMA-ES rejects zero seeds opts["seed"] = seed + 1 # We are only allowed to specify one variance for all variables, so we need to take the maximum to not # shrink the search space alg = cma.CMAEvolutionStrategy( spec.init_policy._mu, np.sqrt(np.max(np.diag(spec.init_policy._sigma))), opts) idx = [] rewards = [] successes = [] theta_history = [] count = 0 while "maxfevals" not in alg.stop(): thetas = alg.ask() contexts = np.array([ spec.target_dist.get_moments()[0] for i in range(len(thetas)) ]) r, s = env.evaluate(contexts, np.array(thetas)) idx.append( np.maximum(0., (float(count) - float(spec.n_samples * (spec.buffer_size - 1))) / float(spec.n_samples))) count += len(thetas) rewards.append(np.mean(r)) successes.append(np.mean(s)) theta_history.append(np.array(thetas)) print("Seed: %d, Count: %d, Reward: %4.2f, Success Rate: %1.2f" % (seed, count, np.mean(r), np.mean(s))) alg.tell(thetas, list(-r)) alg.disp() log_data = { "idx": idx, "rewards": rewards, "successes": successes, "thetas": theta_history } # The remaining algorithms can be treated quite uniformly except for their setup else: t_start = time.time() policies = [] average_rewards = [] average_successes = [] if algorithm == "goalgan": if isinstance(spec.init_dist, KLJoint): lb = np.copy(spec.init_dist.distribution.lower_bounds) ub = np.copy(spec.init_dist.distribution.upper_bounds) else: lb = np.copy(spec.init_dist.lower_bounds) ub = np.copy(spec.init_dist.upper_bounds) n_old_samples = int(spec.goal_gan_spec.p_old_samples * spec.n_samples) n_samples = spec.n_samples - n_old_samples n_context_samples = int(spec.goal_gan_spec.p_context_samples * n_samples) # We allow GoalGAN for one more sample in case the samples are not evenly dividable n_resamples = int(np.ceil((n_samples - 2 * n_context_samples) / 2)) tf_session = tf.Session() gan = StateGAN( state_size=len(lb), evaluater_size=1, state_range=0.5 * (ub - lb), state_center=lb + 0.5 * (ub - lb), state_noise_level=(spec.goal_gan_spec.state_noise_level * (ub - lb))[None, :], generator_layers=[256, 256], discriminator_layers=[128, 128], noise_size=lb.shape[0], tf_session=tf_session, configs={"supress_all_logging": True}) tf_session.run(tf.initialize_local_variables()) gan.pretrain_uniform( outer_iters=spec.goal_gan_spec.gan_pre_train_iters) alg = CREPS(spec.value_features, None, spec.regularizer) policy = copy.deepcopy(spec.init_policy) distribution = None buffer = ExperienceBuffer(spec.goal_gan_spec.buffer_size, 4) gan_buffer = ExperienceBuffer(spec.goal_gan_spec.buffer_size, 2) success_buffer = StateCollection( 1, spec.goal_gan_spec.state_distance_threshold * np.linalg.norm(ub - lb)) # Fill the initial buffer for j in range(0, spec.buffer_size - 1): contexts, thetas, rewards, successes, success_rates, labels, old_contexts, old_thetas, old_rewards, \ old_successes = gan_policy_rollout(gan, policy, env, spec, success_buffer, lb, ub, n_old_samples, n_context_samples, n_resamples) buffer.insert(np.concatenate((contexts, old_contexts), axis=0), np.concatenate((thetas, old_thetas), axis=0), np.concatenate((rewards, old_rewards)), np.concatenate((successes, old_successes))) success_buffer.append( contexts[0:n_context_samples][success_rates == 1., :]) gan_buffer.insert(contexts[0:n_context_samples], labels[:, None]) it_fn = partial(goal_gan_iteration_function, env, spec, policies, average_rewards, average_successes, alg, gan, policy, buffer, gan_buffer, success_buffer, lb, ub, n_old_samples, n_context_samples, n_resamples, seed) elif algorithm == "saggriac": if isinstance(spec.init_dist, KLJoint): lb = np.copy(spec.init_dist.distribution.lower_bounds) ub = np.copy(spec.init_dist.distribution.upper_bounds) else: lb = np.copy(spec.init_dist.lower_bounds) ub = np.copy(spec.init_dist.upper_bounds) sr = SaggRIAC(len(lb), state_bounds=np.stack((lb, ub)), state_center=lb + ((ub - lb) / 2.), max_goals=spec.sagg_riac_spec.max_goals, max_history=spec.sagg_riac_spec.max_history) policy = copy.deepcopy(spec.init_policy) distribution = None alg = CREPS(spec.value_features, None, spec.regularizer) buffer = ExperienceBuffer(spec.buffer_size, 4) # Create the initial experience for j in range(0, spec.buffer_size - 1): contexts = np.array(sr.sample_states(spec.n_samples)) thetas = np.array([ policy.sample_action(contexts[k, :]) for k in range(0, spec.n_samples) ]) rewards, successes = env.evaluate(contexts, thetas) sr.add_states(contexts, rewards) buffer.insert(contexts, thetas, rewards, successes) it_fn = partial(sagg_riac_iteration_function, env, spec, policies, average_rewards, average_successes, alg, sr, policy, buffer, seed) else: if algorithm == "creps": feature_mean = np.mean(spec.value_features( spec.target_dist.sample(n_samples=10 * spec.n_samples)), axis=0) alg = CREPS(spec.value_features, feature_mean, spec.regularizer) else: alg = SPRL(spec.value_features, spec.target_dist.get_log_pdf, spec.regularizer) # We copy the initial distribution and the policy since we may run multiple iterations if algorithm == "creps": distribution = None policy = copy.deepcopy(spec.init_policy) else: itl_distribution = copy.deepcopy(spec.init_dist) distribution = itl_distribution.distribution policy = itl_distribution.policy # We initialize the buffer with data buffer = ExperienceBuffer(spec.buffer_size, 4) for j in range(0, spec.buffer_size - 1): buffer.insert( *env.sample_rewards(spec.n_samples, policy, distribution)) if algorithm == "creps": it_fn = partial(creps_iteration_function, env, spec, policies, average_rewards, average_successes, alg, policy, buffer, seed) else: it_fn = partial(sprl_iteration_function, env, spec, policies, average_rewards, average_successes, alg, itl_distribution, buffer, seed) # This is the actual main loop if visualize: vis = Visualization(spec.target_dist, policy, distribution) vis.visualize(spec.n_iter, it_fn) else: for j in range(0, spec.n_iter): it_fn(j) policies.append(copy.deepcopy(policy)) t_end = time.time() __, __, rewards, successes = env.sample_rewards(spec.n_samples, policy) print( "Seed: %d, Final Reward: %4.2f, Final Success Rate: %1.2f, Training Time: %.2E" % (seed, np.mean(rewards), np.mean(successes), t_end - t_start)) log_data = (policies, average_rewards, average_successes) # If we used GoalGAN, we need to close the session and reset the graph for the next run if algorithm == "goalgan": tf_session.close() tf.reset_default_graph() return log_data
def run_conv(xd, yd, zd, bd, fc_nodes, dropout): """Running a CNN""" y_conv = nn(xd, yd, zd, bd, fc_nodes) # Set variables. y_conv_softmax = tf.nn.softmax(y_conv) # For auROC/auPRC calculations. init = tf.initialize_all_variables() init_local = tf.initialize_local_variables() saver = tf.train.Saver() with tf.Session() as sess: print "In session" sess.run(init) sess.run(init_local) saver.restore(sess, "trained_model.ckpt") # Start enque threads: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) steps = 0 try: while not coord.should_stop(): steps = steps + 1 temp = y_conv_softmax.eval(feed_dict={keep_prob: 1}) if steps == 1: pred = temp else: pred = np.vstack((pred, temp)) except tf.errors.OutOfRangeError: print "Reached End" finally: coord.request_stop() # Wait for the threads to finish coord.join(threads) sess.close() with tf.Session() as sess2: print "In session" sess2.run(init) sess2.run(init_local) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess2, coord=coord) outf = sys.argv[2] steps = 0 try: while not coord.should_stop(): steps += 1 print "Evaluating batch %s" % steps temp = labels_batch.eval() if steps == 1: test_labels = temp #print test_labels[:10] else: test_labels = np.vstack((test_labels, temp)) except tf.errors.OutOfRangeError: print "Reached End" finally: coord.request_stop() coord.join(threads) sess.close() with open(outf, "a") as fout: #fout.write( "Fixing convolution size at 25 and # filters at 32\n") #fout.write( "Model Parameters: dropout: %s\n" % dropout) #print sklearn.metrics.roc_auc_score(test_labels,pred) print len(test_labels) print len(pred) np.savetxt("labels.txt", test_labels) np.savetxt("predictions.txt", pred)
def testInitializedVariableValue(self): with self.test_session() as sess: a = tf.contrib.framework.local_variable([0, 0, 0, 0, 0], name='a') sess.run(tf.initialize_local_variables()) self.assertAllEqual(a.eval(), [0] * 5)
def main(args): network = importlib.import_module(args.model_def, 'inference') subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) if not os.path.isdir(log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) if not os.path.isdir(model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) # Store some git revision info in a text file in the log directory src_path,_ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) np.random.seed(seed=args.seed) train_set = facenet.get_dataset(args.data_dir) print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) pretrained_model = None if args.pretrained_model: pretrained_model = os.path.expanduser(args.pretrained_model) print('Pre-trained model: %s' % pretrained_model) if args.lfw_dir: print('LFW directory: %s' % args.lfw_dir) # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images lfw_paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext) with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) # Get a list of image paths and their labels image_list, label_list = facenet.get_image_paths_and_labels(train_set) # Read data and apply label preserving distortions image_batch, label_batch = facenet.read_and_augument_data(image_list, label_list, args.image_size, args.batch_size, args.max_nrof_epochs, args.random_crop, args.random_flip, args.nrof_preprocess_threads) print('Total number of classes: %d' % len(train_set)) print('Total number of examples: %d' % len(image_list)) # Placeholder for the learning rate learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') # Placeholder for phase_train phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') # Build the inference graph prelogits, _ = network.inference(image_batch, args.keep_probability, phase_train=phase_train_placeholder, weight_decay=args.weight_decay) with tf.variable_scope('Logits'): n = int(prelogits.get_shape()[1]) m = len(train_set) w = tf.get_variable('w', shape=[n,m], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1), regularizer=slim.l2_regularizer(args.weight_decay), trainable=True) b = tf.get_variable('b', [m], initializer=None, trainable=True) logits = tf.matmul(prelogits, w) + b # Add DeCov regularization loss if args.decov_loss_factor>0.0: logits_decov_loss = facenet.decov_loss(logits) * args.decov_loss_factor tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, logits_decov_loss) embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') learning_rate = tf.train.exponential_decay(learning_rate_placeholder, global_step, args.learning_rate_decay_epochs*args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.scalar_summary('learning_rate', learning_rate) # Calculate the average cross entropy loss across the batch cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits, label_batch, name='cross_entropy_per_example') cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') tf.add_to_collection('losses', cross_entropy_mean) # Calculate the total losses regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([cross_entropy_mean] + regularization_losses, name='total_loss') # Build a Graph that trains the model with one batch of examples and updates the model parameters train_op = facenet.train(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.all_variables()) # Create a saver save_variables = list(set(tf.trainable_variables())-set([w])-set([b])) saver = tf.train.Saver(save_variables, max_to_keep=3) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Start running operations on the Graph. gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess.run(tf.initialize_all_variables()) sess.run(tf.initialize_local_variables()) summary_writer = tf.train.SummaryWriter(log_dir, sess.graph) tf.train.start_queue_runners(sess=sess) with sess.as_default(): if pretrained_model: saver.restore(sess, pretrained_model) # Training and validation loop epoch = 0 while epoch < args.max_nrof_epochs: step = sess.run(global_step, feed_dict=None) epoch = step // args.epoch_size # Train for one epoch train(args, sess, epoch, phase_train_placeholder, learning_rate_placeholder, global_step, total_loss, train_op, summary_op, summary_writer, regularization_losses, args.learning_rate_schedule_file) # Evaluate on LFW if args.lfw_dir: start_time = time.time() _, _, accuracy, val, val_std, far = lfw.validate(sess, lfw_paths, actual_issame, args.seed, args.batch_size, image_batch, phase_train_placeholder, embeddings, nrof_folds=args.lfw_nrof_folds) print('Accuracy: %1.3f+-%1.3f' % (np.mean(accuracy), np.std(accuracy))) print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far)) lfw_time = time.time() - start_time # Add validation loss and accuracy to summary summary = tf.Summary() #pylint: disable=maybe-no-member summary.value.add(tag='lfw/accuracy', simple_value=np.mean(accuracy)) summary.value.add(tag='lfw/val_rate', simple_value=val) summary.value.add(tag='time/lfw', simple_value=lfw_time) summary_writer.add_summary(summary, step) # Save variables and the metagraph if it doesn't exist already save_variables_and_metagraph(sess, saver, summary_writer, model_dir, step) return model_dir
##logit layer -> This layer outputs un-normalized class scores w_logit = tf.Variable(tf.random_normal([third_hidden_unit_size,logit_shape],0,1,dtype=tf.float32),dtype=tf.float32, name='w_logit') b_logit = tf.Variable(tf.zeros(shape=logit_shape,dtype=tf.float32),dtype=tf.float32, name='b_logit') final_layer_output = tf.matmul(layer3_output,w_logit,name='layer_logit_matmul') + b_logit loss = tf.nn.sparse_softmax_cross_entropy_with_logits(final_layer_output,y) opt = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) train = opt.minimize(loss) global_step = tf.Variable(0,name = 'global_step',trainable=False) saver = tf.train.Saver() init = tf.group(tf.initialize_all_variables(), tf.initialize_local_variables()) sm = tf.train.SessionManager() with sm.prepare_session("", init_op=init, saver=saver, checkpoint_dir=save_path) as ss: for j in range(epoc): step = ss.run(tf.assign(global_step, global_step+1)) for i in range(steps): ls, tr = ss.run([loss, train],feed_dict={x:X_train[batch_size*i:batch_size*(i+1)],y:y_train[batch_size*i:batch_size*(i+1)]}) if i%10000 ==0: l1,l2,l3 = ss.run([layer1_output,layer2_output,layer3_output],feed_dict={x:X_train[batch_size*i:batch_size*(i+1)],y:y_train[batch_size*i:batch_size*(i+1)]}) l1 = 1 - np.float(len(np.flatnonzero(l1)))/np.float(len(l1.ravel())) l2 = 1 - np.float(len(np.flatnonzero(l2)))/np.float(len(l2.ravel())) l3 = 1 - np.float(len(np.flatnonzero(l3)))/np.float(len(l3.ravel())) log.info("Epoc: %d - Fraction of Zeros in activation layer(Gradient Kill Ration): %.5f, %.5f, %.5f" %(j,l1,l2,l3)) correct_prediction = tf.equal(tf.cast(tf.argmax(final_layer_output,1),tf.int32),y) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) log.info("Epoc: %d - Loss at Step -> %d: %.10f, Acurracy: %.6f" %(j,i,ls.mean(),accuracy.eval(feed_dict={x:X_test, y:y_test})))
def train(): """Train eccentricity mdoel for a number of steps.""" json.dumps(FLAGS.__dict__, os.path.join(FLAGS.train_dir, 'pm{}_lr{:.0e}_c{}'.format(FLAGS.pm, FLAGS.learning_rate, FLAGS.chevron), 'settings.json'), ensure_ascii=True) with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Get images and labels for CIFAR-10. images, labels = ecc.inputs('train', FLAGS.batch_size, FLAGS.num_epochs) # Build a Graph that computes the logits predictions from the # inference model. logits = ecc.inference(images) # Calculate loss. loss = ecc.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = ecc.train(loss, global_step) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.group(tf.initialize_all_variables(), tf.initialize_local_variables()) # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) summary_writer = tf.train.SummaryWriter(train_dir(), sess.graph) print('Settings used are:') f = FLAGS.__dict__['__flags'] for key in sorted(f.keys()): print('{} : {}, type {}'.format(key, f[key], type(f[key]))) # debug_print = tf.Print(images, [images, tf.shape(images), tf.reduce_max(images), tf.reduce_min(images)], message="Images at runtime are") try: step = 0 while not coord.should_stop(): start_time = time.time() _, loss_value = sess.run([train_op, loss]) # sess.run(debug_print) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 1000 == 0: checkpoint_path = os.path.join(train_dir(), 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) step += 1 except tf.errors.OutOfRangeError: checkpoint_path = os.path.join(train_dir(), 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step)) finally: # When done, ask the threads to stop. coord.request_stop() # Wait for threads to finish. coord.join(threads) sess.close()
def __init__(self, filename_list, Model, batch_size=32, image_size=256, image_channels=3): #We create a tf variable to hold the global step, this has the effect #that when a checkpoint is created this value is saved. #Making the plots in tensorboard being continued when the model is restored. global_step = tf.Variable(0) increment_step = global_step.assign_add(1) #Create a queue that will be automatically fill by another thread #as we read batches out of it batch = self.batch_queue(filename_list, batch_size, image_size, image_channels) # Create the graph, etc. m = Model(batch) init_op = tf.initialize_all_variables() #This is required to intialize num_epochs for the filename_queue init_local = tf.initialize_local_variables() # Create a saver. saver = tf.train.Saver(keep_checkpoint_every_n_hours=1) # Create a session for running operations in the Graph. sess = tf.Session(config=tf.ConfigProto( gpu_options = tf.GPUOptions(allow_growth=True), log_device_placement=False, allow_soft_placement=True)) # Create a summary writer, add the 'graph' to the event file. log_datetime = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') writer = tf.train.SummaryWriter('./logs/'+log_datetime, sess.graph, flush_secs=30, max_queue=2) # Initialize the variables (like the epoch counter). sess.run([init_op,init_local]) # Start input enqueue threads. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: progress = tqdm() while not coord.should_stop(): # Run training steps or whatever global_step = sess.run(increment_step) progress.update() m.step(sess) if global_step % 10 == 0: m.summarize(sess, writer, global_step) if global_step % 2000 == 0: # Append the step number to the checkpoint name: saver.save(sess, './logs/'+log_datetime, global_step=global_step) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: # When done, ask the threads to stop. coord.request_stop() # Wait for threads to finish. coord.join(threads) sess.close()