def train(): with tf.Graph().as_default(): with tf.device('/cpu:0'): audio_pl, labels_pl = MODEL.placeholder_inputs( BATCH_SIZE, NUM_SEGS, FREQBINS, TIMEBINS, NUM_CLASSES) is_training_pl = tf.placeholder(tf.bool, shape=()) # Note the global_step=batch parameter to minimize. # That tells the optimizer to helpfully increment the 'batch' parameter # for you every time it trains. batch = tf.get_variable('batch', [], initializer=tf.constant_initializer(0), trainable=False) bn_decay = get_bn_decay(batch) tf.summary.scalar('bn_decay', bn_decay) print("--- Get training operator") # Get training operator learning_rate = get_learning_rate(batch) tf.summary.scalar('learning_rate', learning_rate) if OPTIMIZER == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=MOMENTUM, use_nesterov=True) elif OPTIMIZER == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate) MODEL.get_model(audio_pl, num_classes=NUM_CLASSES if not DEBUG else 10, is_training=is_training_pl, bn_decay=bn_decay, weight_decay=WEIGHT_DECAY, sn=SN, pool_t=POOL_T, freeze_bn=FREEZE_BN) tower_grads = [] pred_gpu = [] total_loss_gpu = [] for i in range(NUM_GPUS): with tf.variable_scope(tf.get_variable_scope(), reuse=True): with tf.device('/gpu:%d' % (i)), tf.name_scope( 'gpu_%d' % (i)) as scope: # Evenly split input data to each GPU vd_batch = tf.slice( audio_pl, [i * DEVICE_BATCH_SIZE, 0, 0, 0, 0], [DEVICE_BATCH_SIZE, -1, -1, -1, -1]) #for one-hot labels #label_batch = tf.slice(labels_pl, # [i*DEVICE_BATCH_SIZE, NUM_CLASSES], [DEVICE_BATCH_SIZE, NUM_CLASSES]) label_batch = tf.slice(labels_pl, [i * DEVICE_BATCH_SIZE], [DEVICE_BATCH_SIZE]) pred, end_points = MODEL.get_model( vd_batch, num_classes=NUM_CLASSES if not DEBUG else 10, is_training=is_training_pl, bn_decay=bn_decay, weight_decay=WEIGHT_DECAY, sn=SN, pool_t=POOL_T, freeze_bn=FREEZE_BN) MODEL.get_loss(pred, label_batch, end_points) losses = tf.get_collection('losses', scope) total_loss = tf.add_n(losses, name='total_loss') for l in losses + [total_loss]: tf.summary.scalar(l.op.name, l) grads = optimizer.compute_gradients(total_loss) tower_grads.append(grads) pred_gpu.append(pred) total_loss_gpu.append(total_loss) pred = tf.concat(pred_gpu, 0) total_loss = tf.reduce_mean(total_loss_gpu) grads = average_gradients(tower_grads) train_op = optimizer.apply_gradients(grads, global_step=batch) correct = tf.equal(tf.argmax(pred, 1), tf.to_int64(labels_pl)) accuracy = tf.reduce_sum(tf.cast(correct, tf.float32)) / float(BATCH_SIZE) tf.summary.scalar('accuracy', accuracy) # Add ops to save all the variables. saver_save = tf.train.Saver(max_to_keep=50) # Create a session config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True config.log_device_placement = False sess = tf.Session(config=config) # Add summary writers merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'), sess.graph) test_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'test'), sess.graph) # Init variables init = tf.global_variables_initializer() sess.run(init) # Restore variables from disk. if MODEL_PATH is not None: if 'npz' not in MODEL_PATH: sr = SaverRestore(MODEL_PATH, log_string, ignore=['batch:0'] if RESET_LR else []) sr.run_init(sess) log_string("Model restored.") else: dict_file = np.load(MODEL_PATH) dict_for_restore = {} dict_file_keys = dict_file.keys() for k in dict_file_keys: dict_for_restore[k] = dict_file[k] dict_for_restore = MODEL.name_mapping(dict_for_restore, debug=DEBUG) dict_for_restore = MODEL.convert_2d_3d(dict_for_restore) dr = DictRestore(dict_for_restore, log_string) dr.run_init(sess) log_string("npz file restored.") ops = { 'audio_pl': audio_pl, 'labels_pl': labels_pl, 'is_training_pl': is_training_pl, 'pred': pred, 'loss': total_loss, 'train_op': train_op, 'merged': merged, 'step': batch, 'end_points': end_points } best_acc = -1 for epoch in range(MAX_EPOCH): log_string('**** EPOCH %03d ****' % (epoch)) log_string('learning_rate: {}'.format(sess.run(learning_rate))) sys.stdout.flush() train_one_epoch(sess, ops, train_writer, train_loader) # Save the variables to disk. if epoch % 1 == 0: save_path = saver_save.save( sess, os.path.join(LOG_DIR, "model-{}.ckpt".format(epoch))) log_string("Model saved in file: %s" % save_path) eval_one_epoch(sess, ops, test_writer, val_loader)
def evaluate(): with tf.Graph().as_default(): is_training = False if FCN == 3: pl_bsize = 10 elif FCN == 6: pl_bsize = 8 elif FCN == 8: pl_bsize = 12 elif FCN == 1: pl_bsize = 10 elif FCN == 5: pl_bsize = 5 elif FCN == 10: pl_bsize = 10 else: pl_bsize = 1 assert (pl_bsize % NUM_GPUS == 0) DEVICE_BATCH_SIZE = pl_bsize // NUM_GPUS video_pl, labels_pl = MODEL.placeholder_inputs(pl_bsize, NUM_FRAMES, HEIGHT, WIDTH, evaluate=True) is_training_pl = tf.placeholder(tf.bool, shape=()) MODEL.get_model(video_pl, is_training_pl, NUM_CLASSES) pred_gpu = [] for i in range(NUM_GPUS): with tf.variable_scope(tf.get_variable_scope(), reuse=True): with tf.device('/gpu:%d' % (i)) as scope: vd_batch = tf.slice(video_pl, [i * DEVICE_BATCH_SIZE, 0, 0, 0, 0], [DEVICE_BATCH_SIZE, -1, -1, -1, -1]) label_batch = tf.slice(labels_pl, [i * DEVICE_BATCH_SIZE], [DEVICE_BATCH_SIZE]) pred, end_points = MODEL.get_model(vd_batch, is_training_pl, NUM_CLASSES) pred_gpu.append(pred) pred = tf.concat(pred_gpu, 0) saver = tf.train.Saver() # Create a session config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True config.log_device_placement = False sess = tf.Session(config=config) # Init variables init = tf.global_variables_initializer() sess.run(init) # Restore variables from disk. if MODEL_PATH is not None: if 'npz' not in MODEL_PATH: saver.restore(sess, MODEL_PATH) log_string("Model restored.") else: dict_file = np.load(MODEL_PATH) dict_for_restore = {} dict_file_keys = dict_file.keys() for k in dict_file_keys: dict_for_restore[k] = dict_file[k] dict_for_restore = MODEL.name_mapping(dict_for_restore) dict_for_restore = MODEL.convert_2d_3d(dict_for_restore) dr = DictRestore(dict_for_restore, log_string) dr.run_init(sess) log_string("npz file restored.") ops = { 'video_pl': video_pl, 'labels_pl': labels_pl, 'is_training_pl': is_training_pl, 'pred': pred } eval_one_epoch(sess, ops, val_loader)