def run_training(): # construct the graph with tf.Graph().as_default(): # specify the training data file location #trainfile = os.path.join(FLAGS.data_dir, TRAIN_FILE) # read the images and labels train_dataset, train_labels, img_size, img_size_labels = nn.inputs_ms(batch_size=FLAGS.batch_size, filename=TRAIN_FILE) #images, labels = nn.inputs(batch_size=FLAGS.batch_size, # num_epochs=FLAGS.num_epochs, # filename=trainfile) # run inference on the images results = nn.inference_ms(train_dataset, img_size) # calculate the loss from the results of inference and the labels loss = nn.loss_ms(results, train_labels) # setup the training operations train_op = nn.training(loss, FLAGS.learning_rate, FLAGS.decay_steps, FLAGS.decay_rate) # setup the summary ops to use TensorBoard summary_op = tf.merge_all_summaries() # init to setup the initial values of the weights init_op = tf.group(tf.initialize_all_variables(), tf.initialize_local_variables()) # setup a saver for saving checkpoints saver = tf.train.Saver() # create the session sess = tf.Session() # specify where to write the log files for import to TensorBoard summary_writer = tf.train.SummaryWriter(FLAGS.checkpoint_dir, sess.graph) # initialize the graph sess.run(init_op) # setup the coordinato and threadsr. Used for multiple threads to read data. # Not strictly required since we don't have a lot of data but typically # using multiple threads to read data improves performance coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # loop will continue until we run out of input training cases try: step = 0 while not coord.should_stop(): # start time and run one training iteration start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time # print some output periodically if step % 100 == 0: print('OUTPUT: Step %d: loss = %.3f (%.3f sec)' % (step, loss_value, duration)) # output some data to the log files for tensorboard summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) summary_writer.flush() # less frequently output checkpoint files. Used for evaluating the model if step % 1000 == 0: checkpoint_path = os.path.join(FLAGS.checkpoint_dir, MODEL_FILE) saver.save(sess, checkpoint_path, global_step=step) step += 1 # quit after we run out of input files to read except tf.errors.OutOfRangeError: print('OUTPUT: Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step)) checkpoint_path = os.path.join(FLAGS.checkpoint_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) finally: coord.request_stop() # shut down the threads gracefully coord.join(threads) sess.close()
def run_training(): # construct the graph with tf.Graph().as_default(): # specify the training data file location trainfiles = [] for fi in TRAIN_FILES: trainfiles.append(os.path.join(FLAGS.data_dir, fi)) # trainfile = os.path.join(FLAGS.data_dir, TRAIN_FILE) # read the images and labels x, y_ = nn.inputs(batch_size=FLAGS.batch_size, num_epochs=FLAGS.num_epochs, filenames=trainfiles, ifeval=False) keep_prob = tf.placeholder(tf.float32) z_placeholder = tf.placeholder(tf.float32, [FLAGS.batch_size, z_dimensions]) # run inference on the images y_conv = nn.inference(x, np.array([65, 65, 65]), keep_prob, FLAGS.batch_size) # calculate the loss from the results of inference and the labels loss = nn.loss(y_conv, y_) # caculate the accuracy accuracy = nn.evaluation(y_conv, y_) # setup the training operations train_op = nn.training(loss, FLAGS.learning_rate, FLAGS.decay_steps, FLAGS.decay_rate) # setup the summary ops to use TensorBoard summary_op = tf.summary.merge_all() # init to setup the initial values of the weights #init_op = tf.group(tf.initialize_all_variables(), tf.initialize_local_variables()) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # create the session with tf.Session() as sess: sess.run(init_op) # setup a saver for saving checkpoints saver = tf.train.Saver() summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_dir, sess.graph) # setup the coordinato and threadsr. Used for multiple threads to read data. # Not strictly required since we don't have a lot of data but typically # using multiple threads to read data improves performance coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) start_training_time = time.time() # loop will continue until we run out of input training cases try: step = 0 while not coord.should_stop(): # start time and run one training iteration start_time = time.time() _, l, acc = sess.run( [train_op, loss, accuracy], feed_dict={keep_prob: 0.5}) # Update the discriminator duration = time.time() - start_time # print some output periodically if step % 20 == 0: print( 'OUTPUT: Step %d: loss = %.3f (%.3f sec), accuracy = %.3f' % (step, l, duration, acc)) # output some data to the log files for tensorboard summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) summary_writer.flush() # less frequently output checkpoint files. Used for evaluating the model if step % 500 == 0: checkpoint_path = os.path.join(check_save, 'model.ckpt') saver.save(sess, save_path=checkpoint_path, global_step=step) step += 1 # quit after we run out of input files to read except tf.errors.OutOfRangeError: print('OUTPUT: Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step)) checkpoint_path = os.path.join(check_save, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) finally: coord.request_stop() # shut down the threads gracefully coord.join(threads) sess.close() end_training_time = time.time()
def run_training(): # construct the graph with tf.Graph().as_default(): # specify the training data file location trainfile = os.path.join(FLAGS.data_dir, TRAIN_FILE) # read the images and labels images, labels = nn.inputs(batch_size=FLAGS.batch_size, num_epochs=FLAGS.num_epochs, filename=trainfile) # run inference on the images results = nn.inference(images) # calculate the loss from the results of inference and the labels loss = nn.loss(results, labels) # setup the training operations train_op = nn.training(loss, FLAGS.learning_rate, FLAGS.decay_steps, FLAGS.decay_rate) # setup the summary ops to use TensorBoard summary_op = tf.summary.merge_all() # init to setup the initial values of the weights init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # setup a saver for saving checkpoints saver = tf.train.Saver() # create the session sess = tf.Session() # specify where to write the log files for import to TensorBoard summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_dir, sess.graph) # initialize the graph sess.run(init_op) # setup the coordinato and threadsr. Used for multiple threads to read data. # Not strictly required since we don't have a lot of data but typically # using multiple threads to read data improves performance coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # loop will continue until we run out of input training cases try: step = 0 while not coord.should_stop(): # start time and run one training iteration start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time # print some output periodically if step % 100 == 0: print('OUTPUT: Step %d: loss = %.3f (%.3f sec)' % (step, loss_value, duration)) # output some data to the log files for tensorboard summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) summary_writer.flush() # less frequently output checkpoint files. Used for evaluating the model if step % 1000 == 0: checkpoint_path = os.path.join(FLAGS.checkpoint_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) step += 1 # quit after we run out of input files to read except tf.errors.OutOfRangeError: print('OUTPUT: Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step)) checkpoint_path = os.path.join(FLAGS.checkpoint_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) finally: coord.request_stop() # shut down the threads gracefully coord.join(threads) sess.close()