return loss, metrics_acc print( "====================================BUILDING UP TRAINING GRAPH====================================" ) train_graph = tf.Graph() with train_graph.as_default(): global_step = tf.Variable(0, trainable=False, name='global_step', dtype=tf.int64) train_dataset = get_dataset( FLAGS.train_dir, FLAGS.batch_size * GPU_NUMS, FLAGS.epoch_num, reshape_size=[FLAGS.reshape_height, FLAGS.reshape_weight], augment_func=inception_augmentation, num_readers=4) _, train_image, train_labels = get_next_batch(train_dataset) # TODO: MINUS train_labels by 1 train_labels -= 1 learning_rate = tf.train.polynomial_decay( learning_rate=FLAGS.weight_learning_rate, global_step=global_step, decay_steps=FLAGS.max_iter_epoch * TRAIN_EPOCH_STEPS, power=FLAGS.decay_rate, end_learning_rate=FLAGS.end_learning_rate) # learning_rate = FLAGS.weight_learning_rate
return loss, accuracy print( "====================================BUILDING UP TRAINING GRAPH====================================" ) eval_graph = tf.Graph() with eval_graph.as_default(): eval_step = tf.Variable(0, trainable=False, name='global_step', dtype=tf.int64) eval_dataset = get_dataset( FLAGS.valid_dir, FLAGS.batch_size * GPU_NUMS, FLAGS.epoch_num, reshape_size=[FLAGS.reshape_height, FLAGS.reshape_width], augment_func=eval_preprocess, shuffle=None, num_readers=4) _, eval_image, eval_labels = get_next_batch(eval_dataset) # TODO: minus 1 in resnet case eval_labels -= 1 eval_gather_losses = [] eval_gather_accs = [] # ema = tf.train.ExponentialMovingAverage(FLAGS.ema_decay) first_clone_scope = None with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): for gpu_id in range(GPU_NUMS):
print('Deploying Model on CPU') # set up step sess = tf.Session(config=config) EPOCH_STEPS = int(ceil(FLAGS.epoch_len / FLAGS.batch_size / GPU_NUMS)) with tf.device(store_device): global_step = tf.Variable(0, trainable=False, name='global_step', dtype=tf.int64) # read data reshape_size = [FLAGS.reshape_height, FLAGS.reshape_weight] dataset = get_dataset(FLAGS.data_dir, FLAGS.batch_size * GPU_NUMS, FLAGS.epoch_num, reshape_size, augment_func=inception_augmentation, num_readers=4) name, image, label = get_next_batch(dataset) def inference_to_loss(gpu_id): image_batch = image[gpu_id * FLAGS.batch_size:(gpu_id + 1) * FLAGS.batch_size, ...] label_batch = label[gpu_id * FLAGS.batch_size:(gpu_id + 1) * FLAGS.batch_size, ...] with arg_scope(xception_arg_scope_gn(weight_decay=FLAGS.weight_decay)): net, end_points = xception_65(image_batch, global_pool=True, num_classes=FLAGS.num_classes, is_training=True,