def main(_): batch_size = FLAGS.batch_size summaries_dir = FLAGS.summaries_dir if summaries_dir == '': summaries_dir = './logs/lenet5_sbp_{}_l2{}'.format( FLAGS.dataset, FLAGS.l2) summaries_dir += time.strftime('_%d-%m-%Y_%H:%M:%S') checkpoints_dir = FLAGS.checkpoints_dir if checkpoints_dir == '': checkpoints_dir = './checkpoints/lenet5_sbp_{}_l2{}'.format( FLAGS.dataset, FLAGS.l2) checkpoints_dir += time.strftime('_%d-%m-%Y_%H:%M:%S') with tf.Graph().as_default() as graph, tf.device('/cpu:0'): with tf.variable_scope(tf.get_variable_scope()) as scope: # LOADING DATA data, len_train, len_test, input_shape, nclass = reader.load( FLAGS.dataset) X_train, y_train, X_test, y_test = data # BUILDING GRAPH images = tf.placeholder(tf.float32, shape=input_shape, name='images') labels = tf.placeholder(tf.int32, shape=[None], name='labels') lr = tf.placeholder(tf.float32, shape=[], name='learning_rate') tf.summary.scalar('learning rate', lr) optimizer = tf.train.AdamOptimizer(learning_rate=lr, beta1=0.95) global_step = tf.get_variable( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) logits_op_train = lenet5(images, nclass, True, False) tf.get_variable_scope().reuse_variables() logits_op_test = lenet5(images, nclass, False, True) loss_op_train = metrics.sgvlb(logits_op_train, labels, reuse=False, num_examples=len_train, l2_weight=FLAGS.l2) tf.summary.scalar('train_loss', loss_op_train) loss_op_test = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits_op_test, labels=labels)) accuracy_op_train = metrics.accuracy(logits_op_train, labels) accuracy_op_test = metrics.accuracy(logits_op_test, labels) tf.summary.scalar('train_accuracy', accuracy_op_train) train_op = optimizer.minimize(loss_op_train, global_step=global_step) train_summaries = tf.summary.merge_all() test_acc = tf.placeholder(tf.float32, shape=[], name='test_acc_placeholder') test_acc_summary = tf.summary.scalar('test accuracy', test_acc) test_loss = tf.placeholder(tf.float32, shape=[], name='test_loss_placeholder') test_loss_summary = tf.summary.scalar('test loss', test_loss) test_summaries = tf.summary.merge( [test_acc_summary, test_loss_summary]) # SUMMARIES WRITERS train_writer = tf.summary.FileWriter(summaries_dir + '/train', graph) test_writer = tf.summary.FileWriter(summaries_dir + '/test', graph) # TRAINING n_epochs = 200 lr_policy = lambda epoch_num: policies.linear_decay(epoch_num, decay_start=100, total_epochs= n_epochs, start_value=1e-3) saver = tf.train.Saver() config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) with tf.Session(config=config) as sess: # initialize all variables sess.run(tf.global_variables_initializer()) # restore checkpoint net_variables = filter(lambda v: 'sbp' not in v.name.lower(), tf.get_collection('variables')) net_variables = filter(lambda v: 'adam' not in v.name.lower(), net_variables) restorer = tf.train.Saver(net_variables) restorer.restore(sess, FLAGS.checkpoint) best_test_acc = 0.0 for epoch_num in range(n_epochs): for i in range(len_train / batch_size + 1): batch_images, batch_labels = X_train[i*batch_size:(i+1)*batch_size], \ y_train[i*batch_size:(i+1)*batch_size] _, summary = sess.run( [train_op, train_summaries], feed_dict={ lr: lr_policy(epoch_num), images: batch_images, labels: batch_labels }) train_writer.add_summary(summary, global_step.eval()) test_loss_total, test_acc_total = 0.0, 0.0 steps_per_test = len_test / batch_size + 1 for i in range(steps_per_test): batch_images, batch_labels = X_test[i*batch_size:(i+1)*batch_size], \ y_test[i*batch_size:(i+1)*batch_size] batch_test_acc, batch_test_loss = sess.run( [accuracy_op_test, loss_op_test], feed_dict={ lr: lr_policy(epoch_num), images: batch_images, labels: batch_labels }) test_acc_total += batch_test_acc / steps_per_test test_loss_total += batch_test_loss / steps_per_test if test_acc_total >= best_test_acc: saver.save(sess, checkpoints_dir + '/best_model.ckpt') best_test_acc = test_acc_total saver.save(sess, checkpoints_dir + '/cur_model.ckpt') summary = sess.run([test_summaries], feed_dict={ test_acc: test_acc_total, test_loss: test_loss_total }) for s in summary: test_writer.add_summary(s, global_step.eval())
def main(_): batch_size = FLAGS.batch_size summaries_dir = FLAGS.summaries_dir if summaries_dir == '': summaries_dir = './logs/vgg_pt_{}_{}'.format(FLAGS.dataset, FLAGS.suffix) summaries_dir += time.strftime('_%d-%m-%Y_%H:%M:%S') checkpoints_dir = FLAGS.checkpoints_dir if checkpoints_dir == '': checkpoints_dir = './checkpoints/vgg_pt_{}_{}'.format(FLAGS.dataset, FLAGS.suffix) checkpoints_dir += time.strftime('_%d-%m-%Y_%H:%M:%S') with tf.Graph().as_default() as graph, tf.device('/gpu:0'): # LOADING DATA data, len_train, len_test, input_shape, nclass = reader.load(FLAGS.dataset) X_train, y_train, X_test, y_test = data # BUILDING GRAPH images = tf.placeholder(tf.float32, shape=[batch_size, input_shape[1], input_shape[2], input_shape[3]], name='images') labels = tf.placeholder(tf.int32, shape=[batch_size], name='labels') lr = tf.placeholder(tf.float32, shape=[], name='learning_rate') wd = tf.placeholder(tf.float32, shape=[], name='weight_decay') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) inference = lambda x, reuse, is_training, stohastic: net_vgglike(x, nclass, wd, is_training, stohastic, reuse) loss = lambda logits, y: metrics.sgvlb(logits, y, len_train) train_op, probs_train, probs_test_det, probs_test_stoh, train_loss = utils.build_graph(images, labels, loss, inference, lr, global_step) train_summaries = tf.summary.merge_all() train_acc_plc = tf.placeholder(tf.float32, shape=[], name='train_acc_placeholder') train_acc_summary = tf.summary.scalar('train_accuracy_stoch', train_acc_plc) test_acc_plc = tf.placeholder(tf.float32, shape=[], name='test_acc_placeholder') test_acc_summary = tf.summary.scalar('test_accuracy_det', test_acc_plc) test_summaries = tf.summary.merge([train_acc_summary, test_acc_summary]) # SUMMARIES WRITERS train_writer = tf.summary.FileWriter(summaries_dir + '/train', graph) test_writer = tf.summary.FileWriter(summaries_dir + '/test', graph) # TRAINING n_epochs = 550 ensemble_size = 5 lr_policy = lambda epoch_num: policies.linear_decay( epoch_num, decay_start=0, total_epochs=n_epochs, start_value=1e-3) steps_per_train = len_train/batch_size steps_per_test = len_test/batch_size saver = tf.train.Saver() config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) with tf.Session(config=config) as sess: # initialize all variables sess.run(tf.global_variables_initializer()) # restore checkpoints if it's provided if FLAGS.checkpoint != '': restorer = tf.train.Saver(tf.get_collection('variables')) restorer.restore(sess, FLAGS.checkpoint) start_time = time.time() la = tf.get_collection('log_alpha', scope=None) print la for epoch_num in range(n_epochs): train_acc = 0.0 if epoch_num > 500: ensemble_size = 10 if epoch_num > 540: ensemble_size = 100 train_loss_ = 0 for batch_images, batch_labels in reader.batch_iterator_train_crop_flip(X_train, y_train, batch_size): _, train_probs, summary, train_lossb = sess.run([train_op, probs_train, train_summaries, train_loss], feed_dict={lr: lr_policy(epoch_num), images: batch_images, labels: batch_labels}) train_writer.add_summary(summary, global_step.eval()) train_loss_ += train_lossb/steps_per_train train_acc += metrics.accurracy_np(train_probs, batch_labels)/steps_per_train test_acc_det, test_acc_stoch, test_acc_ens = 0.0, 0.0, 0.0 for i in range(steps_per_test): batch_images = X_test[i*batch_size:(i+1)*batch_size] batch_labels = y_test[i*batch_size:(i+1)*batch_size] test_probs_stoch = np.zeros([batch_size, nclass]) test_probs_det = np.zeros([batch_size, nclass]) test_probs_ens = np.zeros([batch_size, nclass]) for sample_num in range(ensemble_size): probs_batch_stoch = sess.run([probs_test_stoh], feed_dict={images: batch_images, labels: batch_labels})[0] test_probs_ens += probs_batch_stoch/ensemble_size if sample_num == 0: test_probs_det, la_values = sess.run([probs_test_det, la], feed_dict={images: batch_images, labels: batch_labels}) test_probs_stoch = probs_batch_stoch test_acc_det += metrics.accurracy_np(test_probs_det, batch_labels)/steps_per_test test_acc_stoch += metrics.accurracy_np(test_probs_stoch, batch_labels)/steps_per_test test_acc_ens += metrics.accurracy_np(test_probs_ens, batch_labels)/steps_per_test saver.save(sess, checkpoints_dir + 'cifar100/cur_model.ckpt') epoch_time, start_time = int(time.time() - start_time), time.time() print 'epoch_num %3d' % epoch_num, print 'train_loss %.3f' % train_loss_, print 'train_acc %.3f' % train_acc, print 'test_acc_det %.3f' % test_acc_det, print 'test_acc_stoch %.3f' % test_acc_stoch, print 'test_acc_ens %.3f' % test_acc_ens, print 'epoch_time %.3f' % epoch_time, print 'la_values', la_values
def main(_): batch_size = FLAGS.batch_size summaries_dir = FLAGS.summaries_dir if summaries_dir == '': summaries_dir = './logs/vgg_do_{}_{}'.format(FLAGS.dataset, FLAGS.suffix) summaries_dir += time.strftime('_%d-%m-%Y_%H:%M:%S') checkpoints_dir = FLAGS.checkpoints_dir if checkpoints_dir == '': checkpoints_dir = './checkpoints/vgg_do_{}_{}'.format(FLAGS.dataset, FLAGS.suffix) checkpoints_dir += time.strftime('_%d-%m-%Y_%H:%M:%S') with tf.Graph().as_default() as graph, tf.device('/gpu:0'): # LOADING DATA data, len_train, len_test, input_shape, nclass = reader.load(FLAGS.dataset) X_train, y_train, X_test, y_test = data # BUILDING GRAPH images = tf.placeholder(tf.float32, shape=[batch_size, input_shape[1], input_shape[2], input_shape[3]], name='images') labels = tf.placeholder(tf.int32, shape=[batch_size], name='labels') lr = tf.placeholder(tf.float32, shape=[], name='learning_rate') wd = tf.placeholder(tf.float32, shape=[], name='weight_decay') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) inference = lambda x, reuse, is_training, stohastic: net_vgglike(x, nclass, wd, is_training, stohastic, reuse) loss = lambda logits, y: metrics.log_loss(logits, y, len_train) train_op, probs_train, probs_test_det, probs_test_stoh, train_loss = utils.build_graph(images, labels, loss, inference, lr, global_step) train_summaries = tf.summary.merge_all() train_acc_plc = tf.placeholder(tf.float32, shape=[], name='train_acc_placeholder') train_acc_summary = tf.summary.scalar('train_accuracy_stoch', train_acc_plc) test_acc_plc = tf.placeholder(tf.float32, shape=[], name='test_acc_placeholder') test_acc_summary = tf.summary.scalar('test_accuracy_det', test_acc_plc) test_summaries = tf.summary.merge([train_acc_summary, test_acc_summary]) # SUMMARIES WRITERS train_writer = tf.summary.FileWriter(summaries_dir + '/train', graph) test_writer = tf.summary.FileWriter(summaries_dir + '/test', graph) # TRAINING n_epochs = 550 ensemble_size = 5 lr_policy = lambda epoch_num: policies.linear_decay( epoch_num, decay_start=0, total_epochs=n_epochs, start_value=1e-3) wd_policy = lambda epoch_num: FLAGS.l2 steps_per_train = len_train/batch_size steps_per_test = len_test/batch_size saver = tf.train.Saver() config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) with tf.Session(config=config) as sess: # initialize all variables sess.run(tf.global_variables_initializer()) # restore checkpoints if it's provided if FLAGS.checkpoint != '': restorer = tf.train.Saver(tf.get_collection('variables')) restorer.restore(sess, FLAGS.checkpoint) start_time = time.time() for epoch_num in range(n_epochs): if epoch_num > 500: ensemble_size = 10 if epoch_num > 540: ensemble_size = 100 train_acc = 0.0 train_loss_ = 0.0 for batch_images, batch_labels in reader.batch_iterator_train_crop_flip(X_train, y_train, batch_size): _, train_probs, summary, train_lossb = sess.run( [train_op, probs_train, train_summaries, train_loss], feed_dict={lr: lr_policy(epoch_num), images: batch_images, labels: batch_labels}) train_writer.add_summary(summary, global_step.eval()) train_loss_ += train_lossb / steps_per_train train_acc += metrics.accurracy_np(train_probs, batch_labels)/steps_per_train test_acc_det, test_acc_stoch, test_acc_ens = 0.0, 0.0, 0.0 for i in range(steps_per_test): batch_images = X_test[i*batch_size:(i+1)*batch_size] batch_labels = y_test[i*batch_size:(i+1)*batch_size] test_probs_stoch = np.zeros([batch_size, nclass]) test_probs_det = np.zeros([batch_size, nclass]) test_probs_ens = np.zeros([batch_size, nclass]) for sample_num in range(ensemble_size): probs_batch_stoch = sess.run([probs_test_stoh], feed_dict={images: batch_images, labels: batch_labels})[0] test_probs_ens += probs_batch_stoch/ensemble_size if sample_num == 0: test_probs_det = sess.run([probs_test_det], feed_dict={images: batch_images, labels: batch_labels})[0] test_probs_stoch = probs_batch_stoch test_acc_det += metrics.accurracy_np(test_probs_det, batch_labels)/steps_per_test test_acc_stoch += metrics.accurracy_np(test_probs_stoch, batch_labels)/steps_per_test test_acc_ens += metrics.accurracy_np(test_probs_ens, batch_labels)/steps_per_test saver.save(sess, checkpoints_dir + '/cur_model.ckpt') epoch_time, start_time = int(time.time() - start_time), time.time() print 'epoch_num %3d' % epoch_num, print 'train_loss %.3f' % train_loss_, print 'train_acc %.3f' % train_acc, print 'test_acc_det %.3f' % test_acc_det, print 'test_acc_stoch %.3f' % test_acc_stoch, print 'test_acc_ens %.3f' % test_acc_ens, print 'epoch_time %.3f' % epoch_time
def main(_): batch_size = FLAGS.batch_size summaries_dir = FLAGS.summaries_dir if summaries_dir == '': summaries_dir = './logs/lenet5_{}_{}'.format(FLAGS.dataset, FLAGS.suffix) summaries_dir += time.strftime('_%d-%m-%Y_%H:%M:%S') checkpoints_dir = FLAGS.checkpoints_dir if checkpoints_dir == '': checkpoints_dir = './checkpoints/lenet5_{}_{}'.format(FLAGS.dataset, FLAGS.suffix) checkpoints_dir += time.strftime('_%d-%m-%Y_%H:%M:%S') with tf.Graph().as_default() as graph, tf.device('/gpu:0'): # LOADING DATA data, len_train, len_test, input_shape, nclass = reader.load(FLAGS.dataset) X_train, y_train, X_test, y_test = data # BUILDING GRAPH images = tf.placeholder(tf.float32, shape=[batch_size, input_shape[1], input_shape[2], input_shape[3]], name='images') labels = tf.placeholder(tf.int32, shape=[batch_size], name='labels') lr = tf.placeholder(tf.float32, shape=[], name='learning_rate') wd = tf.placeholder(tf.float32, shape=[], name='weight_decay') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) inference = lambda x, reuse: lenet5(x, nclass, wd, reuse) loss = lambda logits, y: metrics.log_loss(logits, y, len_train) train_op, probs = utils.build_graph(images, labels, loss, inference, lr, global_step) train_summaries = tf.summary.merge_all() train_acc_plc = tf.placeholder(tf.float32, shape=[], name='train_acc_placeholder') train_acc_summary = tf.summary.scalar('train_accuracy_stoch', train_acc_plc) test_acc_plc = tf.placeholder(tf.float32, shape=[], name='test_acc_placeholder') test_acc_summary = tf.summary.scalar('test_accuracy_det', test_acc_plc) test_summaries = tf.summary.merge([train_acc_summary, test_acc_summary]) # SUMMARIES WRITERS train_writer = tf.summary.FileWriter(summaries_dir + '/train', graph) test_writer = tf.summary.FileWriter(summaries_dir + '/test', graph) # TRAINING n_epochs = 50 lr_policy = lambda epoch_num: policies.linear_decay( epoch_num, decay_start=0, total_epochs=n_epochs, start_value=1e-3) wd_policy = lambda epoch_num: FLAGS.l2 steps_per_train = len_train/batch_size steps_per_test = len_test/batch_size saver = tf.train.Saver() config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: # initialize all variables sess.run(tf.global_variables_initializer()) # restore checkpoints if it's provided if FLAGS.checkpoint != '': restorer = tf.train.Saver(tf.get_collection('variables')) restorer.restore(sess, FLAGS.checkpoint) start_time = time.time() for epoch_num in range(n_epochs): train_acc = 0.0 for i in range(steps_per_train): batch_images, batch_labels = X_train[i*batch_size:(i+1)*batch_size], \ y_train[i*batch_size:(i+1)*batch_size] _, probs_batch, summary = sess.run([train_op, probs, train_summaries], feed_dict={lr: lr_policy(epoch_num), wd: wd_policy(epoch_num), images: batch_images, labels: batch_labels}) train_writer.add_summary(summary, global_step.eval()) train_acc += metrics.accurracy_np(probs_batch, batch_labels)/steps_per_train test_acc = 0.0 for i in range(steps_per_test): batch_images = X_test[i*batch_size:(i+1)*batch_size] batch_labels = y_test[i*batch_size:(i+1)*batch_size] probs_batch = sess.run([probs], feed_dict={images: batch_images, labels: batch_labels})[0] test_acc += metrics.accurracy_np(probs_batch, batch_labels)/steps_per_test saver.save(sess, checkpoints_dir + '/cur_model.ckpt') summary = sess.run([test_summaries], feed_dict={test_acc_plc: test_acc, train_acc_plc: train_acc}) for s in summary: test_writer.add_summary(s, global_step.eval()) epoch_time, start_time = int(time.time() - start_time), time.time() print 'epoch_num %3d' % epoch_num, print 'train_acc %.3f' % train_acc, print 'test_acc %.3f' % test_acc, print 'epoch_time %.3f' % epoch_time
def main(_): summaries_dir = FLAGS.summaries_dir if summaries_dir == '': summaries_dir = './logs/vgglike_sbp_{}_scale{}'.format( FLAGS.dataset, FLAGS.scale) summaries_dir += time.strftime('_%d-%m-%Y_%H:%M:%S') checkpoints_dir = FLAGS.checkpoints_dir if checkpoints_dir == '': checkpoints_dir = './checkpoints/vgglike_sbp_{}_scale{}'.format( FLAGS.dataset, FLAGS.scale) checkpoints_dir += time.strftime('_%d-%m-%Y_%H:%M:%S') checkpoint_path = FLAGS.checkpoint batch_size = FLAGS.batch_size with tf.Graph().as_default() as graph, tf.device('/cpu:0'): # DATASET QUEUES inputs, shape, n_train_examples, nclass = reader.get_producer( FLAGS.dataset, batch_size, training=True, distorted=True) images_train, labels_train = inputs inputs, shape, n_test_examples, nclass = reader.get_producer( FLAGS.dataset, batch_size, training=False) images_test, labels_test = inputs # BUILDING GRAPH devices = ['/gpu:%d' % i for i in range(FLAGS.num_gpus)] lr = tf.placeholder(tf.float32, shape=[], name='learning_rate') wd = tf.placeholder(tf.float32, shape=[], name='weight_decay') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) inference = lambda images, is_training, reuse: net_vgglike( images, nclass, FLAGS.scale, is_training, reuse) loss = lambda preds, labels, reuse: metrics.sgvlb( preds, labels, reuse, n_train_examples, l2_weight=wd) operations = utils.build_graph_sbp(images_train, labels_train, images_test, labels_test, global_step, loss, metrics.accuracy, inference, lr, 10.0, devices) train_op, test_acc_op, test_loss_op = operations train_summaries = tf.summary.merge_all() test_acc = tf.placeholder(tf.float32, shape=[], name='test_acc_placeholder') test_acc_summary = tf.summary.scalar('test_accuracy', test_acc) test_loss = tf.placeholder(tf.float32, shape=[], name='test_loss_placeholder') test_loss_summary = tf.summary.scalar('test_loss', test_loss) test_summaries = tf.summary.merge( [test_acc_summary, test_loss_summary]) # SUMMARIES WRITERS train_writer = tf.summary.FileWriter(summaries_dir + '/train', graph) test_writer = tf.summary.FileWriter(summaries_dir + '/test', graph) # TRAINING n_epochs = 300 steps_per_epoch = n_train_examples / (batch_size * FLAGS.num_gpus) + 1 steps_per_test = n_test_examples / (batch_size * FLAGS.num_gpus) + 1 lr_policy = lambda epoch_num: policies.linear_decay(epoch_num, decay_start=250, total_epochs= n_epochs, start_value=1e-5) wd_policy = lambda epoch_num: 0.0 checkpoint_saver = tf.train.Saver() config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) # restoring variables = filter(lambda v: 'adam' not in v.name.lower(), tf.get_collection('variables')) variables = filter(lambda v: 'beta1_power_1' not in v.name.lower(), variables) variables = filter(lambda v: 'beta2_power_1' not in v.name.lower(), variables) net_variables = filter(lambda v: 'sbp' not in v.name.lower(), variables) try: saver = tf.train.Saver(variables) saver.restore(sess, checkpoint_path) except tf.errors.NotFoundError as e: print 'variational variables are not found\nrestoring only net variables' saver = tf.train.Saver(net_variables) saver.restore(sess, checkpoint_path) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) best_test_acc = 0.0 for epoch_num in range(n_epochs): for step_num in range(steps_per_epoch): _, summary = sess.run([train_op, train_summaries], feed_dict={ lr: lr_policy(epoch_num), wd: wd_policy(epoch_num) }) train_writer.add_summary(summary, global_step.eval()) test_loss_total, test_acc_total = 0.0, 0.0 for step_num in range(steps_per_test): batch_test_acc, batch_test_loss = sess.run( [test_acc_op, test_loss_op]) test_acc_total += batch_test_acc / steps_per_test test_loss_total += batch_test_loss / steps_per_test if test_acc_total >= best_test_acc: checkpoint_saver.save(sess, checkpoints_dir + '/best_model.ckpt') best_test_acc = test_acc_total checkpoint_saver.save(sess, checkpoints_dir + '/cur_model.ckpt') summary = sess.run([test_summaries], feed_dict={ test_acc: test_acc_total, test_loss: test_loss_total }) for s in summary: test_writer.add_summary(s, global_step.eval()) coord.request_stop() coord.join(threads)
def main(_): summaries_dir = FLAGS.summaries_dir if summaries_dir == '': summaries_dir = './logs/vgglike_ssl_{}'.format(FLAGS.dataset) summaries_dir += time.strftime('_%d-%m-%Y_%H:%M:%S') checkpoints_dir = FLAGS.checkpoints_dir if checkpoints_dir == '': checkpoints_dir = './checkpoints/vgglike_ssl_{}'.format(FLAGS.dataset) checkpoints_dir += time.strftime('_%d-%m-%Y_%H:%M:%S') with tf.Graph().as_default() as graph, tf.device('/cpu:0'): # DATASET QUEUES inputs, shape, n_train_examples, nclass = reader.get_producer( FLAGS.dataset, FLAGS.batch_size, training=True, distorted=True, data_dir=FLAGS.data_dir) images_train, labels_train = inputs inputs, shape, n_test_examples, nclass = reader.get_producer( FLAGS.dataset, FLAGS.batch_size, training=False, data_dir=FLAGS.data_dir) images_test, labels_test = inputs # BUILDING GRAPH devices = ['/gpu:%d' % i for i in range(FLAGS.num_gpus)] lr = tf.placeholder(tf.float32, shape=[], name='learning_rate') wd = tf.placeholder(tf.float32, shape=[], name='weight_decay') group_weight = tf.placeholder(tf.float32, shape=[], name='group_weight') tf.summary.scalar('weight_decay', wd) global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) num_filters = [ 64, 64, 128, 128, 256, 256, 256, 512, 512, 512, 512, 512, 512 ] inference = lambda images, is_training, reuse: net_vgglike( images, nclass, num_filters, is_training, reuse) loss = lambda preds, labels, reuse: metrics.ssl_loss( preds, labels, reuse, n_train_examples, wd, group_weight) train_op, test_acc_op, test_loss_op = utils.build_graph( images_train, labels_train, images_test, labels_test, global_step, loss, metrics.accuracy, inference, lr, devices) train_summaries = tf.summary.merge_all() test_acc = tf.placeholder(tf.float32, shape=[], name='test_acc_placeholder') test_acc_summary = tf.summary.scalar('test_accuracy', test_acc) test_loss = tf.placeholder(tf.float32, shape=[], name='test_loss_placeholder') test_loss_summary = tf.summary.scalar('test_loss', test_loss) test_summaries = tf.summary.merge( [test_acc_summary, test_loss_summary]) # SUMMARIES WRITERS train_writer = tf.summary.FileWriter(summaries_dir + '/train', graph) test_writer = tf.summary.FileWriter(summaries_dir + '/test', graph) saver = tf.train.Saver() final_variables = {} assign_ops = [] reduced_size_ops = [] variables = filter(lambda v: 'adam' not in v.name.lower(), tf.get_collection('variables')) not_conv_variables = filter(lambda v: not v.name.startswith('conv_'), variables) map(lambda v: final_variables.__setitem__(v.name[:-2], v), not_conv_variables) for conv_number in range(1, 14): conv_vars = filter( lambda v: v.name.startswith('conv_%d/' % conv_number), variables) kernel = filter(lambda v: 'kernel' in v.name, conv_vars)[0] biases = filter(lambda v: 'biases' in v.name, conv_vars)[0] loss = tf.sqrt( tf.reduce_sum(tf.nn.bias_add(kernel, biases)**2, [0, 1, 2]) + 1e-16) mask = tf.cast(tf.greater(loss, 1e-5 * tf.ones_like(loss)), tf.bool) reduced_size_ops.append(tf.reduce_sum(tf.cast(mask, tf.int32))) reduced_kernel = tf.transpose( tf.boolean_mask(tf.transpose(kernel, [3, 0, 1, 2]), mask), [1, 2, 3, 0]) if conv_number > 1: reduced_kernel = tf.transpose( tf.boolean_mask(tf.transpose(reduced_kernel, [2, 0, 1, 3]), previous_mask), [1, 2, 0, 3]) assign_ops.append( tf.assign(kernel, reduced_kernel, validate_shape=False)) final_variables[kernel.name[:-2]] = kernel flat_vars = filter(lambda v: 'kernel' not in v.name, conv_vars) for v in flat_vars: reduced_v = tf.boolean_mask(v, mask) assign_ops.append(tf.assign(v, reduced_v, validate_shape=False)) final_variables[v.name[:-2]] = v previous_mask = mask final_saver = tf.train.Saver(final_variables) # TRAINING n_epochs = 200 steps_per_epoch = n_train_examples / (FLAGS.batch_size * FLAGS.num_gpus) + 1 steps_per_test = n_test_examples / (FLAGS.batch_size * FLAGS.num_gpus) + 1 lr_policy = lambda epoch_num: policies.linear_decay( epoch_num, decay_start=100, total_epochs=n_epochs, start_value=1e-4) wd_policy = lambda epoch_num: 1.0 gw_policy = lambda epoch_num: 1.0 config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) with tf.Session(config=config, graph=graph) as sess: # initialize all variables sess.run(tf.global_variables_initializer()) # restore checkpoints if it's provided if FLAGS.checkpoint != '': variables_to_restore = filter(lambda v: 'reduced' not in v.name, tf.get_collection('variables')) restorer = tf.train.Saver(variables_to_restore) restorer.restore(sess, FLAGS.checkpoint) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) best_test_acc = 0.0 for epoch_num in range(1): for step_num in range(steps_per_epoch): _, summary = sess.run( [train_op, train_summaries], feed_dict={ lr: lr_policy(epoch_num), wd: wd_policy(epoch_num), group_weight: gw_policy(epoch_num) }) train_writer.add_summary(summary, global_step.eval()) test_loss_total, test_acc_total = 0.0, 0.0 for step_num in range(steps_per_test): batch_test_acc, batch_test_loss = sess.run( [test_acc_op, test_loss_op]) test_acc_total += batch_test_acc / steps_per_test test_loss_total += batch_test_loss / steps_per_test if test_acc_total >= best_test_acc: saver.save(sess, checkpoints_dir + '/best_model.ckpt') best_test_acc = test_acc_total saver.save(sess, checkpoints_dir + '/cur_model.ckpt') summary = sess.run([test_summaries], feed_dict={ test_acc: test_acc_total, test_loss: test_loss_total }) for s in summary: test_writer.add_summary(s, global_step.eval()) print("Epoch %d test accuracy: %.3f" % (epoch_num, test_acc_total)) reduced_filter_sizes = sess.run(reduced_size_ops) print reduced_filter_sizes _ = sess.run(assign_ops) final_saver.save(sess, checkpoints_dir + '/final_model.ckpt') coord.request_stop() coord.join(threads)
def main(_): summaries_dir = FLAGS.summaries_dir if summaries_dir == '': summaries_dir = './logs/vgglike_{}_scale{}'.format( FLAGS.dataset, FLAGS.scale) summaries_dir += time.strftime('_%d-%m-%Y_%H:%M:%S') checkpoints_dir = FLAGS.checkpoints_dir if checkpoints_dir == '': checkpoints_dir = './checkpoints/vgglike_{}_scale{}'.format( FLAGS.dataset, FLAGS.scale) checkpoints_dir += time.strftime('_%d-%m-%Y_%H:%M:%S') with tf.Graph().as_default() as graph, tf.device('/cpu:0'): # DATASET QUEUES inputs, shape, n_train_examples, nclass = reader.get_producer( FLAGS.dataset, FLAGS.batch_size, training=True, distorted=True, data_dir=FLAGS.data_dir) images_train, labels_train = inputs inputs, shape, n_test_examples, nclass = reader.get_producer( FLAGS.dataset, FLAGS.batch_size, training=False, data_dir=FLAGS.data_dir) images_test, labels_test = inputs # BUILDING GRAPH devices = ['/gpu:%d' % i for i in range(FLAGS.num_gpus)] lr = tf.placeholder(tf.float32, shape=[], name='learning_rate') wd = tf.placeholder(tf.float32, shape=[], name='weight_decay') tf.summary.scalar('weight_decay', wd) global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) inference = lambda images, is_training, reuse: net_vgglike( images, nclass, FLAGS.scale, is_training, reuse) loss = lambda preds, labels, reuse: metrics.log_loss( preds, labels, reuse, n_train_examples, wd) train_op, test_acc_op, test_loss_op = utils.build_graph( images_train, labels_train, images_test, labels_test, global_step, loss, metrics.accuracy, inference, lr, devices) train_summaries = tf.summary.merge_all() test_acc = tf.placeholder(tf.float32, shape=[], name='test_acc_placeholder') test_acc_summary = tf.summary.scalar('test_accuracy', test_acc) test_loss = tf.placeholder(tf.float32, shape=[], name='test_loss_placeholder') test_loss_summary = tf.summary.scalar('test_loss', test_loss) test_summaries = tf.summary.merge( [test_acc_summary, test_loss_summary]) # SUMMARIES WRITERS train_writer = tf.summary.FileWriter(summaries_dir + '/train', graph) test_writer = tf.summary.FileWriter(summaries_dir + '/test', graph) # TRAINING n_epochs = 200 steps_per_epoch = n_train_examples / (FLAGS.batch_size * FLAGS.num_gpus) + 1 steps_per_test = n_test_examples / (FLAGS.batch_size * FLAGS.num_gpus) + 1 lr_policy = lambda epoch_num: policies.linear_decay(epoch_num, decay_start=100, total_epochs= n_epochs, start_value=1e-3) wd_policy = lambda epoch_num: policies.linear_growth(epoch_num, growth_start=0, total_epochs= n_epochs, start_value=1e-3, end_value=1.0) saver = tf.train.Saver() config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) with tf.Session(config=config) as sess: # initialize all variables sess.run(tf.global_variables_initializer()) # restore checkpoints if it's provided if FLAGS.checkpoint != '': restorer = tf.train.Saver(tf.get_collection('variables')) restorer.restore(sess, FLAGS.checkpoint) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) best_test_acc = 0.0 for epoch_num in range(n_epochs): for step_num in range(steps_per_epoch): _, summary = sess.run([train_op, train_summaries], feed_dict={ lr: lr_policy(epoch_num), wd: wd_policy(epoch_num) }) train_writer.add_summary(summary, global_step.eval()) test_loss_total, test_acc_total = 0.0, 0.0 for step_num in range(steps_per_test): batch_test_acc, batch_test_loss = sess.run( [test_acc_op, test_loss_op]) test_acc_total += batch_test_acc / steps_per_test test_loss_total += batch_test_loss / steps_per_test if test_acc_total >= best_test_acc: saver.save(sess, checkpoints_dir + '/best_model.ckpt') best_test_acc = test_acc_total saver.save(sess, checkpoints_dir + '/cur_model.ckpt') summary = sess.run([test_summaries], feed_dict={ test_acc: test_acc_total, test_loss: test_loss_total }) for s in summary: test_writer.add_summary(s, global_step.eval()) print("Epoch %d test accuracy: %.3f" % (epoch_num, test_acc_total)) coord.request_stop() coord.join(threads)