def main(_): summaries_dir = FLAGS.summaries_dir if summaries_dir == '': summaries_dir = './logs/vgglike_{}_scale{}'.format( FLAGS.dataset, FLAGS.scale) summaries_dir += time.strftime('_%d-%m-%Y_%H:%M:%S') checkpoints_dir = FLAGS.checkpoints_dir if checkpoints_dir == '': checkpoints_dir = './checkpoints/vgglike_{}_scale{}'.format( FLAGS.dataset, FLAGS.scale) checkpoints_dir += time.strftime('_%d-%m-%Y_%H:%M:%S') with tf.Graph().as_default() as graph, tf.device('/cpu:0'): # DATASET QUEUES inputs, shape, n_train_examples, nclass = reader.get_producer( FLAGS.dataset, FLAGS.batch_size, training=True, distorted=True, data_dir=FLAGS.data_dir) images_train, labels_train = inputs inputs, shape, n_test_examples, nclass = reader.get_producer( FLAGS.dataset, FLAGS.batch_size, training=False, data_dir=FLAGS.data_dir) images_test, labels_test = inputs # BUILDING GRAPH devices = ['/gpu:%d' % i for i in range(FLAGS.num_gpus)] lr = tf.placeholder(tf.float32, shape=[], name='learning_rate') wd = tf.placeholder(tf.float32, shape=[], name='weight_decay') tf.summary.scalar('weight_decay', wd) global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) inference = lambda images, is_training, reuse: net_vgglike( images, nclass, FLAGS.scale, is_training, reuse) loss = lambda preds, labels, reuse: metrics.log_loss( preds, labels, reuse, n_train_examples, wd) train_op, test_acc_op, test_loss_op = utils.build_graph( images_train, labels_train, images_test, labels_test, global_step, loss, metrics.accuracy, inference, lr, devices) train_summaries = tf.summary.merge_all() test_acc = tf.placeholder(tf.float32, shape=[], name='test_acc_placeholder') test_acc_summary = tf.summary.scalar('test_accuracy', test_acc) test_loss = tf.placeholder(tf.float32, shape=[], name='test_loss_placeholder') test_loss_summary = tf.summary.scalar('test_loss', test_loss) test_summaries = tf.summary.merge( [test_acc_summary, test_loss_summary]) # SUMMARIES WRITERS train_writer = tf.summary.FileWriter(summaries_dir + '/train', graph) test_writer = tf.summary.FileWriter(summaries_dir + '/test', graph) # TRAINING n_epochs = 200 steps_per_epoch = n_train_examples / (FLAGS.batch_size * FLAGS.num_gpus) + 1 steps_per_test = n_test_examples / (FLAGS.batch_size * FLAGS.num_gpus) + 1 lr_policy = lambda epoch_num: policies.linear_decay(epoch_num, decay_start=100, total_epochs= n_epochs, start_value=1e-3) wd_policy = lambda epoch_num: policies.linear_growth(epoch_num, growth_start=0, total_epochs= n_epochs, start_value=1e-3, end_value=1.0) saver = tf.train.Saver() config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) with tf.Session(config=config) as sess: # initialize all variables sess.run(tf.global_variables_initializer()) # restore checkpoints if it's provided if FLAGS.checkpoint != '': restorer = tf.train.Saver(tf.get_collection('variables')) restorer.restore(sess, FLAGS.checkpoint) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) best_test_acc = 0.0 for epoch_num in range(n_epochs): for step_num in range(steps_per_epoch): _, summary = sess.run([train_op, train_summaries], feed_dict={ lr: lr_policy(epoch_num), wd: wd_policy(epoch_num) }) train_writer.add_summary(summary, global_step.eval()) test_loss_total, test_acc_total = 0.0, 0.0 for step_num in range(steps_per_test): batch_test_acc, batch_test_loss = sess.run( [test_acc_op, test_loss_op]) test_acc_total += batch_test_acc / steps_per_test test_loss_total += batch_test_loss / steps_per_test if test_acc_total >= best_test_acc: saver.save(sess, checkpoints_dir + '/best_model.ckpt') best_test_acc = test_acc_total saver.save(sess, checkpoints_dir + '/cur_model.ckpt') summary = sess.run([test_summaries], feed_dict={ test_acc: test_acc_total, test_loss: test_loss_total }) for s in summary: test_writer.add_summary(s, global_step.eval()) print("Epoch %d test accuracy: %.3f" % (epoch_num, test_acc_total)) coord.request_stop() coord.join(threads)
def main(_): batch_size = FLAGS.batch_size summaries_dir = FLAGS.summaries_dir if summaries_dir == '': summaries_dir = './logs/vgg_do_{}_{}'.format(FLAGS.dataset, FLAGS.suffix) summaries_dir += time.strftime('_%d-%m-%Y_%H:%M:%S') checkpoints_dir = FLAGS.checkpoints_dir if checkpoints_dir == '': checkpoints_dir = './checkpoints/vgg_do_{}_{}'.format(FLAGS.dataset, FLAGS.suffix) checkpoints_dir += time.strftime('_%d-%m-%Y_%H:%M:%S') with tf.Graph().as_default() as graph, tf.device('/gpu:0'): # LOADING DATA data, len_train, len_test, input_shape, nclass = reader.load(FLAGS.dataset) X_train, y_train, X_test, y_test = data # BUILDING GRAPH images = tf.placeholder(tf.float32, shape=[batch_size, input_shape[1], input_shape[2], input_shape[3]], name='images') labels = tf.placeholder(tf.int32, shape=[batch_size], name='labels') lr = tf.placeholder(tf.float32, shape=[], name='learning_rate') wd = tf.placeholder(tf.float32, shape=[], name='weight_decay') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) inference = lambda x, reuse, is_training, stohastic: net_vgglike(x, nclass, wd, is_training, stohastic, reuse) loss = lambda logits, y: metrics.log_loss(logits, y, len_train) train_op, probs_train, probs_test_det, probs_test_stoh, train_loss = utils.build_graph(images, labels, loss, inference, lr, global_step) train_summaries = tf.summary.merge_all() train_acc_plc = tf.placeholder(tf.float32, shape=[], name='train_acc_placeholder') train_acc_summary = tf.summary.scalar('train_accuracy_stoch', train_acc_plc) test_acc_plc = tf.placeholder(tf.float32, shape=[], name='test_acc_placeholder') test_acc_summary = tf.summary.scalar('test_accuracy_det', test_acc_plc) test_summaries = tf.summary.merge([train_acc_summary, test_acc_summary]) # SUMMARIES WRITERS train_writer = tf.summary.FileWriter(summaries_dir + '/train', graph) test_writer = tf.summary.FileWriter(summaries_dir + '/test', graph) # TRAINING n_epochs = 550 ensemble_size = 5 lr_policy = lambda epoch_num: policies.linear_decay( epoch_num, decay_start=0, total_epochs=n_epochs, start_value=1e-3) wd_policy = lambda epoch_num: FLAGS.l2 steps_per_train = len_train/batch_size steps_per_test = len_test/batch_size saver = tf.train.Saver() config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) with tf.Session(config=config) as sess: # initialize all variables sess.run(tf.global_variables_initializer()) # restore checkpoints if it's provided if FLAGS.checkpoint != '': restorer = tf.train.Saver(tf.get_collection('variables')) restorer.restore(sess, FLAGS.checkpoint) start_time = time.time() for epoch_num in range(n_epochs): if epoch_num > 500: ensemble_size = 10 if epoch_num > 540: ensemble_size = 100 train_acc = 0.0 train_loss_ = 0.0 for batch_images, batch_labels in reader.batch_iterator_train_crop_flip(X_train, y_train, batch_size): _, train_probs, summary, train_lossb = sess.run( [train_op, probs_train, train_summaries, train_loss], feed_dict={lr: lr_policy(epoch_num), images: batch_images, labels: batch_labels}) train_writer.add_summary(summary, global_step.eval()) train_loss_ += train_lossb / steps_per_train train_acc += metrics.accurracy_np(train_probs, batch_labels)/steps_per_train test_acc_det, test_acc_stoch, test_acc_ens = 0.0, 0.0, 0.0 for i in range(steps_per_test): batch_images = X_test[i*batch_size:(i+1)*batch_size] batch_labels = y_test[i*batch_size:(i+1)*batch_size] test_probs_stoch = np.zeros([batch_size, nclass]) test_probs_det = np.zeros([batch_size, nclass]) test_probs_ens = np.zeros([batch_size, nclass]) for sample_num in range(ensemble_size): probs_batch_stoch = sess.run([probs_test_stoh], feed_dict={images: batch_images, labels: batch_labels})[0] test_probs_ens += probs_batch_stoch/ensemble_size if sample_num == 0: test_probs_det = sess.run([probs_test_det], feed_dict={images: batch_images, labels: batch_labels})[0] test_probs_stoch = probs_batch_stoch test_acc_det += metrics.accurracy_np(test_probs_det, batch_labels)/steps_per_test test_acc_stoch += metrics.accurracy_np(test_probs_stoch, batch_labels)/steps_per_test test_acc_ens += metrics.accurracy_np(test_probs_ens, batch_labels)/steps_per_test saver.save(sess, checkpoints_dir + '/cur_model.ckpt') epoch_time, start_time = int(time.time() - start_time), time.time() print 'epoch_num %3d' % epoch_num, print 'train_loss %.3f' % train_loss_, print 'train_acc %.3f' % train_acc, print 'test_acc_det %.3f' % test_acc_det, print 'test_acc_stoch %.3f' % test_acc_stoch, print 'test_acc_ens %.3f' % test_acc_ens, print 'epoch_time %.3f' % epoch_time
def main(_): batch_size = FLAGS.batch_size summaries_dir = FLAGS.summaries_dir if summaries_dir == '': summaries_dir = './logs/lenet5_{}_l2{}'.format(FLAGS.dataset, FLAGS.l2) summaries_dir += time.strftime('_%d-%m-%Y_%H:%M:%S') checkpoints_dir = FLAGS.checkpoints_dir if checkpoints_dir == '': checkpoints_dir = './checkpoints/lenet5_{}_l2{}'.format(FLAGS.dataset, FLAGS.l2) checkpoints_dir += time.strftime('_%d-%m-%Y_%H:%M:%S') with tf.Graph().as_default() as graph, tf.device('/cpu:0'): # LOADING DATA data, len_train, len_test, input_shape, nclass = reader.load(FLAGS.dataset) X_train, y_train, X_test, y_test = data # BUILDING GRAPH images = tf.placeholder(tf.float32, shape=input_shape, name='images') labels = tf.placeholder(tf.int32, shape=[None], name='labels') lr = tf.placeholder(tf.float32, shape=[], name='learning_rate') tf.summary.scalar('learning rate', lr) optimizer = tf.train.AdamOptimizer(learning_rate=lr, beta1=0.95) global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) logits_op = lenet5(images, nclass) loss_op = metrics.log_loss(logits_op, labels, reuse=False, num_examples=len_train, l2_weight=FLAGS.l2) accuracy_op = metrics.accuracy(logits_op, labels) tf.summary.scalar('train accuracy', accuracy_op) train_op = optimizer.minimize(loss_op, global_step=global_step) train_summaries = tf.summary.merge_all() test_acc = tf.placeholder(tf.float32, shape=[], name='test_acc_placeholder') test_acc_summary = tf.summary.scalar('test accuracy', test_acc) test_loss = tf.placeholder(tf.float32, shape=[], name='test_loss_placeholder') test_loss_summary = tf.summary.scalar('test loss', test_loss) test_summaries = tf.summary.merge([test_acc_summary, test_loss_summary]) # SUMMARIES WRITERS train_writer = tf.summary.FileWriter(summaries_dir + '/train', graph) test_writer = tf.summary.FileWriter(summaries_dir + '/test', graph) # TRAINING n_epochs = 200 lr_policy = lambda epoch_num: policies.linear_decay_policy(epoch_num, decay_start=100, total_epochs=n_epochs, start_value=1e-3) saver = tf.train.Saver() config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) with tf.Session(config=config) as sess: # initialize all variables sess.run(tf.global_variables_initializer()) # restore checkpoints if it's provided if FLAGS.checkpoint != '': restorer = tf.train.Saver(tf.get_collection('variables')) restorer.restore(sess, FLAGS.checkpoint) best_test_acc = 0.0 for epoch_num in range(n_epochs): for i in range(len_train/batch_size+1): batch_images, batch_labels = X_train[i*batch_size:(i+1)*batch_size], \ y_train[i*batch_size:(i+1)*batch_size] _, summary = sess.run([train_op, train_summaries], feed_dict={lr: lr_policy(epoch_num), images: batch_images, labels: batch_labels}) train_writer.add_summary(summary, global_step.eval()) test_loss_total, test_acc_total = 0.0, 0.0 steps_per_test = len_test/batch_size+1 for i in range(steps_per_test): batch_images, batch_labels = X_test[i*batch_size:(i+1)*batch_size], \ y_test[i*batch_size:(i+1)*batch_size] batch_test_acc, batch_test_loss = sess.run([accuracy_op, loss_op], feed_dict={lr: lr_policy(epoch_num), images: batch_images, labels: batch_labels}) test_acc_total += batch_test_acc/steps_per_test test_loss_total += batch_test_loss/steps_per_test if test_acc_total >= best_test_acc: saver.save(sess, checkpoints_dir + '/best_model.ckpt') best_test_acc = test_acc_total saver.save(sess, checkpoints_dir + '/cur_model.ckpt') summary = sess.run([test_summaries], feed_dict={test_acc: test_acc_total, test_loss: test_loss_total}) for s in summary: test_writer.add_summary(s, global_step.eval())