def main(argv=None): if not gfile.Exists(FLAGS.train_dir): gfile.MakeDirs(FLAGS.train_dir) graph = tf.Graph() graph.device("/cpu:0") with graph.as_default(): global_step = tf.Variable(0, trainable=False) images, labels = tf_model.inputs(training=True) logits = tf_model.inference(images) loss, accuracy = tf_model.loss(logits, labels) train_op = tf_model.train(loss, global_step) saver = tf.train.Saver(tf.all_variables()) summary_op = tf.merge_all_summaries() sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) with sess.as_default(): if os.path.exists(FLAGS.train_dir + "checkpoint"): ckpt = tf.train.latest_checkpoint(FLAGS.train_dir) saver.restore(sess, ckpt) else: tf.initialize_all_variables().run() tf.train.start_queue_runners() summary_writer = tf.train.SummaryWriter(FLAGS.train_dir) for step in xrange(global_step.eval() + 1, FLAGS.max_steps): start_time = time.time() _, loss_value, accuracy_value = sess.run( [train_op, loss, accuracy]) duration = time.time() - start_time if step % 5 == 0: examples_per_sec = FLAGS.batch_size / duration sec_par_batch = float(duration) format_str = ( "%s: step %d, loss = %.2f, accuracy = %.2f (%.1f examples/sec; %.3f sec/batch)" ) print(format_str % (datetime.now(), step, loss_value, accuracy_value, examples_per_sec, sec_par_batch)) summary_str = summary_op.eval() summary_writer.add_summary(summary_str, step) if step % 50 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, "model.ckpt") saver.save(sess, checkpoint_path, global_step=step) sess.close()
def main(argv=None): if not gfile.Exists(FLAGS.eval_dir): gfile.MakeDirs(FLAGS.eval_dir) with tf.Graph().as_default(): images, labels = tf_model.inputs(training=False) logits = tf_model.inference(images) logits = tf.squeeze(tf.argmax(logits, 1)) saver = tf.train.Saver() graph_def = tf.get_default_graph().as_graph_def() with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) global_step = ckpt.model_checkpoint_path.split("/")[-1].split( "-")[-1] coord = tf.train.Coordinator() threads = [] for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): threads.extend( qr.create_threads(sess, coord=coord, daemon=True, start=True)) accuracy = 0 precision = np.zeros(FLAGS.num_classes) recall = np.zeros(FLAGS.num_classes) confusion = np.zeros((FLAGS.num_classes, FLAGS.num_classes)) for i in xrange(0, FLAGS.num_iterate): batch_accuracy, batch_precision, batch_recall, batch_confusion = score( logits, labels, num_classes=FLAGS.num_classes, sess=sess) accuracy += batch_accuracy precision += batch_precision recall += batch_recall confusion += batch_confusion print(accuracy / FLAGS.num_iterate) print(np.divide(precision, FLAGS.num_iterate)) print(np.divide(recall, FLAGS.num_iterate)) print(confusion) coord.request_stop() coord.join(threads, stop_grace_period_secs=10)
def main(argv=None): if not gfile.Exists(FLAGS.train_dir): gfile.MakeDirs(FLAGS.train_dir) graph = tf.Graph() graph.device("/cpu:0") with graph.as_default(): global_step = tf.Variable(0, trainable=False) images, labels = tf_model.inputs(training=True) logits = tf_model.inference(images) loss, accuracy = tf_model.loss(logits, labels) train_op = tf_model.train(loss, global_step) saver = tf.train.Saver(tf.all_variables()) summary_op = tf.merge_all_summaries() sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) with sess.as_default(): if os.path.exists(FLAGS.train_dir + "checkpoint"): ckpt = tf.train.latest_checkpoint(FLAGS.train_dir) saver.restore(sess, ckpt) else: tf.initialize_all_variables().run() tf.train.start_queue_runners() summary_writer = tf.train.SummaryWriter(FLAGS.train_dir) for step in xrange(global_step.eval()+1, FLAGS.max_steps): start_time = time.time() _, loss_value, accuracy_value = sess.run([train_op, loss, accuracy]) duration = time.time() - start_time if step % 5 == 0: examples_per_sec = FLAGS.batch_size / duration sec_par_batch = float(duration) format_str = ("%s: step %d, loss = %.2f, accuracy = %.2f (%.1f examples/sec; %.3f sec/batch)") print (format_str % (datetime.now(), step, loss_value, accuracy_value, examples_per_sec, sec_par_batch)) summary_str = summary_op.eval() summary_writer.add_summary(summary_str, step) if step % 50 == 0 or (step+1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, "model.ckpt") saver.save(sess, checkpoint_path, global_step=step) sess.close()
def main(argv=None): if not gfile.Exists(FLAGS.eval_dir): gfile.MakeDirs(FLAGS.eval_dir) with tf.Graph().as_default(): images, labels = tf_model.inputs(training=False) logits = tf_model.inference(images) logits = tf.squeeze(tf.argmax(logits, 1)) saver = tf.train.Saver() graph_def = tf.get_default_graph().as_graph_def() with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) global_step = ckpt.model_checkpoint_path.split("/")[-1].split("-")[-1] coord = tf.train.Coordinator() threads = [] for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): threads.extend(qr.create_threads(sess, coord=coord, daemon=True, start=True)) accuracy = 0 precision = np.zeros(FLAGS.num_classes) recall = np.zeros(FLAGS.num_classes) confusion = np.zeros((FLAGS.num_classes, FLAGS.num_classes)) for i in xrange(0, FLAGS.num_iterate): batch_accuracy, batch_precision, batch_recall, batch_confusion = score(logits, labels, num_classes=FLAGS.num_classes, sess=sess) accuracy += batch_accuracy precision += batch_precision recall += batch_recall confusion += batch_confusion print(accuracy / FLAGS.num_iterate) print(np.divide(precision, FLAGS.num_iterate)) print(np.divide(recall, FLAGS.num_iterate)) print(confusion) coord.request_stop() coord.join(threads, stop_grace_period_secs=10)
def produce_likelihoods(): with kaldi_helpers.kaldi_data('./t.ark') as kd: batch1 = kd.read_utterance(-1) u1 = batch1.next() u2 = batch1.next() print(np.shape(u1[1])) with kaldi_helpers.kaldi_data(FLAGS.occupances) as kd: logprioirs = kd.read_counts() with tf.Graph().as_default(): val_images, val_labels = eval_inputs() images = tf.placeholder(tf.float32, shape=(None, 1320)) labels = tf.placeholder(tf.int32, shape=(None)) logits = tf_model.inference(images, 2048, 2048, 2048) loss = tf_model.loss(logits, labels) saver = tf.train.Saver() sess = tf.Session() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) saver.restore(sess, '../../../data/tf_fbank_deltas_nocmvn/cnnmodel-31530') vi, vl = sess.run([val_images, val_labels]) r = sess.run([logits], feed_dict={images: vi}) l = r[0] - logprioirs with kaldi_helpers.kaldi_data('./t_like_u1.ark', 'w') as kd: kd.write_utterance([[u1[0], l]]) r = sess.run([loss], feed_dict={images: vi, labels: vl}) print(r) r = sess.run([loss], feed_dict={images: u1[1], labels: vl}) print(r) l = vi # with kaldi_helpers.kaldi_data('./t_feats_tfr.ark', 'w') as kd: # kd.write_utterance([[u1[0], l]]) coord.request_stop() # Wait for threads to finish. coord.join(threads) sess.close()
def run_training(): """Train MNIST for a number of steps.""" # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): # with tf.variable_scope('training') as scope: # Input images and labels. images, labels = inputs(train=True, batch_size=FLAGS.batch_size, num_epochs=FLAGS.num_epochs) # Eval inputs val_images, val_labels = eval_inputs() # Build a Graph that computes predictions from the inference model. logits = tf_model.inference(images, FLAGS.hidden1, FLAGS.hidden2, FLAGS.hidden3) frame_accuracy = tf_model.evaluation(logits, labels) # Add to the Graph the loss calculation. loss = tf_model.loss(logits, labels) evaluation = tf_model.evaluation(logits, labels) ce_summ = tf.scalar_summary("cross entropy", loss) # with tf.variable_scope("hidden1", reuse=True): # weights_summ_h1=tf.histogram_summary("h1", weights)) lr = tf.Variable(float(FLAGS.learning_rate), name='lr') # Add to the Graph operations that train the model. train_op = tf_model.training(loss, lr) # Create a saver for writing training checkpoints. saver = tf.train.Saver() # The op for initializing the variables. init_op = tf.initialize_all_variables() # Create a session for running operations in the Graph. sess = tf.Session() summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter("./data/", sess.graph_def) # Initialize the variables (the trained variables and the # epoch counter). sess.run(init_op) # Start input enqueue threads. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # print(sess.run([images, labels])) try: epochs_loss = [1000] epochs_fac = [0] lrs = [float(FLAGS.learning_rate)] step = 0 iter = 0 train_batches = int(NUM_TRAIN_SAMPLES / FLAGS.batch_size) val_batches = int(NUM_VAL_SAMPLES / VAL_BATCH_SIZE) # summary_writer.add_graph(sess.graph_def) # tf.train.write_graph(sess.graph_def, './data_g/','graph.pbtxt') while not coord.should_stop(): start_time = time.time() # Run one step of the model. The return values are # the activations from the `train_op` (which is # discarded) and the `loss` op. To inspect the values # of your ops or variables, you may include them in # the list passed to sess.run() and the value tensors # will be returned in the tuple from the call. _, loss_value, fac_value = sess.run([train_op, loss, evaluation], feed_dict={lr: lrs[-1]}) duration = time.time() - start_time # Print an overview fairly often. if step % 100 == 0: summary_str = sess.run([summary_op, loss]) summary_writer.add_summary(summary_str[0], step) print('Step %d: loss = %.2f (%.3f sec), fac = %.2f' % (step, loss_value, duration, fac_value / FLAGS.batch_size)) if step % int(NUM_TRAIN_SAMPLES / FLAGS.batch_size) == 0: saver.save(sess, FLAGS.train_dir + '/cnnmodel', global_step=step) it_loss = it_fac = 0 print('Validating...') for i in range(val_batches): if i % 100 == 0: print('batch: %d' % (i)) vi, vl = sess.run([val_images, val_labels]) # print(vl.mean()) loss_val_value, fac_value = sess.run([loss, frame_accuracy], feed_dict={images: vi, labels: vl}) it_loss += loss_val_value it_fac += fac_value / VAL_BATCH_SIZE epoch_loss = it_loss / val_batches epochs_loss.append(epoch_loss) epoch_fac = it_fac / val_batches epochs_fac.append(epoch_fac) print('Iter %d: cv_loss = %.2f, cv_fac = %.2f' % (iter, epoch_loss, epoch_fac )) update_lrs(lrs, epochs_loss, epochs_fac) iter += 1 print(epochs_loss) print(lrs) step += 1 except tf.errors.OutOfRangeError: print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step)) finally: # When done, ask the threads to stop. coord.request_stop() # Wait for threads to finish. coord.join(threads) sess.close()