def run_experiment(self, **kwargs): if self.options.opt: self.optimize() seed = self.config['general']['seed'] np.random.seed(seed) tf.random.set_seed(seed) if kwargs: self.adjust_pars(kwargs) self.data = Data(self.config, self.options, self.pars) if self.options.mlflow: mlflow.set_experiment(self.config['general']['experiment']) mlflow.start_run() self.log_pars() mlflow.set_tags({'seed': seed, 'mode': self.options.mode}) if self.feature_type == 'combined' and self.fusion == 'late': y_true, y_pred, mae = self.train_bimodal() else: _, y_pred, _, y_true, mae = self.train_model(self.feature_type) if self.options.verbose: run_validation(y_true, y_pred) if self.options.mlflow: mlflow.end_run() return -mae
def run_testing(model_name): # save testing result in this dir: test_result_dir = FLAGS.test_result_dir.format(model_name) if not os.path.exists(test_result_dir): os.makedirs(test_result_dir) test_result_file = open(test_result_dir + '/test_result' + '.txt', 'a') # should be 'a' not 'w'. # build graph: (graph, init_op, _, logits_op, _, correct_count_op, _, _, _, val_videos_op, val_actor_labels_op, test_videos_op, test_actor_labels_op, videos_placeholder, utility_labels_placeholder, budget_labels_placeholder, istraining_placeholder, varlist_budget, varlist_degrad) = build_graph(model_name) # session config: config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) # config.gpu_options.allow_growth = True # run session: with tf.Session(graph=graph, config=config) as sess: # load degrade and budget model ckpts: bn_moving_vars = [ g for g in tf.global_variables() if 'moving_mean' in g.name ] bn_moving_vars += [ g for g in tf.global_variables() if 'moving_variance' in g.name ] two_fold_eval_ckpt_dir = os.path.join(FLAGS.two_fold_eval_ckpt_dir, model_name) varlist = tf.trainable_variables() varlist += bn_moving_vars restore_model_ckpt(sess=sess, ckpt_dir=two_fold_eval_ckpt_dir, varlist=varlist) # end loading ckpts test_correct_num_lst, test_acc_lst, total_v = run_validation( sess=sess, right_count_op_list=[correct_count_op], placeholder_list=[ videos_placeholder, utility_labels_placeholder, budget_labels_placeholder, istraining_placeholder ], batch_size=train_batch_size, dataset='test', istraining=True) # print and write testing result: test_result_str = model_name + '\n' + 'test acc: %.2f, test_correct_num:%d, total_v:%d\n' % ( test_acc_lst[0], test_correct_num_lst[0], total_v) print(test_result_str) test_result_file.write(test_result_str) print("done")
def run_pretraining_fT(start_from_trained_model): ''' Initialize f_T on pretrained f_d Args: start_from_trained_model: boolean. If False, use sports1M initialized fT. If true, use pretrained fT. ''' degradation_ckpt_dir = os.path.join(COMMON_FLAGS.pretrain_dir, 'degradation_models') target_ckpt_dir = os.path.join(COMMON_FLAGS.pretrain_dir, 'target_models') if not os.path.exists(target_ckpt_dir): os.makedirs(target_ckpt_dir) # define graph: graph = tf.Graph() with graph.as_default(): # global step: global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) # placeholder inputs: videos_placeholder, utility_labels_placeholder, budget_labels_placeholder, dropout_placeholder, istraining_placeholder = \ placeholder_inputs(TRAIN_BATCH_SIZE * FLAGS.GPU_NUM) tower_grads_degrad, tower_grads_utility = [], [] # Compute Acc logits_utility_lst = [] # Compute Loss loss_utility_lst = [] # optimizations: opt_degrad = tf.train.AdamOptimizer(1e-3) opt_utility = tf.train.AdamOptimizer(1e-5) with tf.variable_scope(tf.get_variable_scope()): for gpu_index in range(0, FLAGS.GPU_NUM): with tf.device('/gpu:%d' % gpu_index): print('/gpu:%d' % gpu_index) with tf.name_scope('%s_%d' % ('gpu', gpu_index)) as scope: videos = videos_placeholder[gpu_index * TRAIN_BATCH_SIZE : (gpu_index + 1) * TRAIN_BATCH_SIZE] utility_labels = utility_labels_placeholder[gpu_index * TRAIN_BATCH_SIZE : (gpu_index + 1) * TRAIN_BATCH_SIZE] loss_utility, logits_utility = create_architecture_pretraining_fT(scope, videos, utility_labels, dropout_placeholder) logits_utility_lst.append(logits_utility) loss_utility_lst.append(loss_utility) varlist_degrad = [v for v in tf.trainable_variables() if any(x in v.name for x in ["DegradationModule"])] varlist_utility = [v for v in tf.trainable_variables() if any(x in v.name for x in ["UtilityModule"])] grads_degrad = opt_degrad.compute_gradients(loss_utility, varlist_degrad) grads_utility = opt_utility.compute_gradients(loss_utility, varlist_utility) tower_grads_degrad.append(grads_degrad) tower_grads_utility.append(grads_utility) # Reuse variables for the next tower. tf.get_variable_scope().reuse_variables() loss_utility_op = tf.reduce_mean(loss_utility_lst, name='softmax') logits_utility = tf.concat(logits_utility_lst, 0) right_count_utility_op = correct_num(logits_utility, utility_labels_placeholder) zero_ops_degrad, accum_ops_degrad, apply_gradient_op_degrad = create_grad_accum_for_late_update( opt_degrad, tower_grads_degrad, varlist_degrad, FLAGS.n_minibatches, global_step, decay_with_global_step=False) zero_ops_utility, accum_ops_utility, apply_gradient_op_utility = create_grad_accum_for_late_update( opt_utility, tower_grads_utility, varlist_utility, FLAGS.n_minibatches, global_step, decay_with_global_step=False) tr_videos_op, tr_action_labels_op, _ = create_videos_reading_ops(is_train=True, is_val=False, GPU_NUM=FLAGS.GPU_NUM, BATCH_SIZE=TRAIN_BATCH_SIZE) # val_videos_op, val_action_labels_op, _ = create_videos_reading_ops(is_train=False, is_val=True, GPU_NUM=FLAGS.GPU_NUM, BATCH_SIZE=TRAIN_BATCH_SIZE) init_op = tf.group(tf.local_variables_initializer(), tf.global_variables_initializer()) # session config: config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) config.gpu_options.allow_growth = True # session: with tf.Session(graph=graph, config=config) as sess: sess.run(init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # Create a saver for writing training checkpoints. restore_model_ckpt(sess=sess, ckpt_dir=degradation_ckpt_dir, varlist=varlist_degrad) if start_from_trained_model: restore_model_ckpt(sess=sess, ckpt_dir=target_ckpt_dir, varlist=varlist_utility) else: # variable name and output tensor shape of the last layer are different between ckpt and our graph, so we must convert: temp_varlist = [v for v in varlist_utility if not any(x in v.name.split('/')[1] for x in ["out", "d2"])] temp_vardict = {v.name[:-2].replace('UtilityModule', 'var_name'): v for v in temp_varlist} restore_model_ckpt(sess=sess, ckpt_dir=COMMON_FLAGS.PRETRAINED_C3D, varlist=temp_vardict) # saver: saver = tf.train.Saver(varlist_utility, max_to_keep=1) save_checkpoint_path = os.path.join(target_ckpt_dir, 'model.ckpt') # train: for step in range(MAX_STEPS): start_time = time.time() sess.run([zero_ops_utility, zero_ops_degrad]) loss_utility_lst = [] for _ in itertools.repeat(None, FLAGS.n_minibatches): tr_videos, tr_videos_labels = sess.run([tr_videos_op, tr_action_labels_op]) _, loss_utility = sess.run([accum_ops_utility, loss_utility_op], feed_dict={videos_placeholder: tr_videos, utility_labels_placeholder: tr_videos_labels, dropout_placeholder: 1.0}) loss_utility_lst.append(loss_utility) sess.run([apply_gradient_op_utility, apply_gradient_op_degrad]) loss_summary = 'Utility Module + Degradation Module, Step: {:4d}, time: {:.4f}, utility loss: {:.8f}'.format( step, time.time() - start_time, np.mean(loss_utility_lst)) print(loss_summary) # validation on utility task: if step % VAL_STEP == 0: start_time = time.time() test_correct_num_lst, test_acc_lst, total_v = run_validation(sess=sess, right_count_op_list=[right_count_utility_op], placeholder_list=[videos_placeholder, utility_labels_placeholder, budget_labels_placeholder, dropout_placeholder, istraining_placeholder], batch_size=TRAIN_BATCH_SIZE*FLAGS.GPU_NUM, dataset='val') test_summary = "Step: {:4d}, time: {:.4f}, validation utility correct num: {:.8f}, accuracy: {:.5f}".format( step, time.time() - start_time, test_correct_num_lst[0], test_acc_lst[0]) print(test_summary) # save model: if step % SAVE_STEP == 0 or (step + 1) == MAX_STEPS: saver.save(sess, save_checkpoint_path, global_step=step) coord.request_stop() coord.join(threads) print("done")
def pretrain_fb_kbeam(start_from_trained_model): ''' pretrain_fb_kbeam Args: start_from_trained_model: boolean. If False, use random initialized fb. If true, use pretrained fb. ''' # mkdir: degradation_ckpt_dir = os.path.join(COMMON_FLAGS.pretrain_dir, 'degradation_models') budget_ckpt_dir = ['']*_K for i in range(_K): budget_ckpt_dir[i] = os.path.join(COMMON_FLAGS.pretrain_dir, 'budget_k%d_new' % (FLAGS.base_idx+i)) if not os.path.isdir(budget_ckpt_dir[i]): os.mkdir(budget_ckpt_dir[i]) # define graph graph = tf.Graph() with graph.as_default(): # global step: global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) # placeholder inputs: videos_placeholder, utility_labels_placeholder, budget_labels_placeholder, dropout_placeholder, istraining_placeholder = \ placeholder_inputs(TRAIN_BATCH_SIZE * FLAGS.GPU_NUM) # initialize some lists, each element coresponds to one gpu: tower_grads_budget, logits_budget_lst, loss_budget_lst = [[] for i in range(_K)], [[] for i in range(_K)], [[] for i in range(_K)] # budget # Optimizer for the 3 components respectively opt_budget = tf.train.AdamOptimizer(FLAGS.budget_lr) with tf.variable_scope(tf.get_variable_scope()): for gpu_index in range(0, FLAGS.GPU_NUM): with tf.device('/gpu:%d' % gpu_index): print('/gpu:%d' % gpu_index) with tf.name_scope('%s_%d' % ('gpu', gpu_index)) as scope: # placeholder inputs: videos = videos_placeholder[gpu_index * TRAIN_BATCH_SIZE:(gpu_index + 1) * TRAIN_BATCH_SIZE] budget_labels = budget_labels_placeholder[gpu_index * TRAIN_BATCH_SIZE:(gpu_index + 1) * TRAIN_BATCH_SIZE] # output of the graph: loss_budget, logits_budget = \ create_architecture_adversarial(scope, videos, budget_labels, istraining_placeholder) # Reuse variables for the next tower. tf.get_variable_scope().reuse_variables() # degrade: varlist_degrad = [v for v in tf.trainable_variables() if "DegradationModule" in v.name] # bn varlist varlist_bn = [g for g in tf.global_variables() if 'moving_mean' in g.name] varlist_bn += [g for g in tf.global_variables() if 'moving_variance' in g.name] # budget: varlist_budget = [[] for i in range(_K)] varlist_budget_bn = [[] for i in range(_K)] ### Append elements on each GPU to lists: for i in range(_K): # loss and logits: loss_budget_lst[i].append(loss_budget[i]) logits_budget_lst[i].append(logits_budget[i]) # gradients: varlist_budget[i] = [v for v in tf.trainable_variables() if "BudgetModule_%d" % (FLAGS.base_idx+i) in v.name] varlist_budget_bn[i] = [v for v in varlist_bn if "BudgetModule_%d" % (FLAGS.base_idx+i) in v.name] grads_budget = opt_budget.compute_gradients(loss_budget[i], varlist_budget[i]) tower_grads_budget[i].append(grads_budget) ### End appending elements on each GPU to lists. ### Average or concat Operations/Tnesors in a list to a single Operation/Tensor: ## L_b # budget: loss_budget_op, accuracy_budget_op, right_count_budget_op = [None,]*_K, [None,]*_K, [None,]*_K zero_ops_budget, accum_ops_budget, apply_gradient_op_budget = [None,]*_K, [None,]*_K, [None,]*_K for i in range(_K): loss_budget_op[i] = tf.reduce_mean(loss_budget_lst[i], name='softmax') # Lb _logits_budget = tf.concat(logits_budget_lst[i], 0) accuracy_budget_op[i] = accuracy(_logits_budget, budget_labels_placeholder) right_count_budget_op[i] = correct_num(_logits_budget, budget_labels_placeholder) zero_ops_budget[i], accum_ops_budget[i], apply_gradient_op_budget[i] = create_grad_accum_for_late_update( opt_budget, tower_grads_budget[i], varlist_budget[i], FLAGS.n_minibatches, global_step, decay_with_global_step=False) ### End averaging or concatenating Operations/Tnesors in a list to a single Operation/Tensor. # operations for placeholder inputs: tr_videos_op, _, tr_actor_labels_op = create_videos_reading_ops(is_train=True, is_val=False, GPU_NUM=FLAGS.GPU_NUM, BATCH_SIZE=TRAIN_BATCH_SIZE) # saver and summary files: saver_kb = [None]*_K for i in range(_K): # saver used for saving pretrained fb. saver_kb[i] = tf.train.Saver(var_list=varlist_budget[i]+varlist_budget_bn[i], max_to_keep=1) init_op = tf.group(tf.local_variables_initializer(), tf.global_variables_initializer()) # session config: config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) # run session: with tf.Session(graph=graph, config=config) as sess: # initialize: sess.run(init_op) # multi-threads: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # restore d net: restore_model_ckpt(sess=sess, ckpt_dir=degradation_ckpt_dir, varlist=varlist_degrad) # restore b net: if start_from_trained_model: for i in range(_K): restore_model_ckpt(sess, budget_ckpt_dir[i], varlist_budget[i]+varlist_budget_bn[i]) # train for step in range(0, MAX_STEPS): start_time = time.time() sess.run(zero_ops_budget) acc_budget_lst, loss_budget_lst = [[] for i in range(_K)], [[] for i in range(_K)] acc_budget_lst_mean, loss_budget_lst_mean = [None]*_K, [None]*_K # accumulating gradient for late update: for _ in itertools.repeat(None, FLAGS.n_minibatches): # placeholder inputs: tr_videos, tr_actor_labels = sess.run([tr_videos_op, tr_actor_labels_op]) # run operations: temp_sess_run_return_list = sess.run(accum_ops_budget + accuracy_budget_op + loss_budget_op, feed_dict={videos_placeholder: tr_videos, budget_labels_placeholder: tr_actor_labels, istraining_placeholder: True}) acc_budget_value = temp_sess_run_return_list[_K : 2*_K] loss_budget_value = temp_sess_run_return_list[2*_K : 3*_K] # append loss and acc for budget model: for i in range(_K): acc_budget_lst[i].append(acc_budget_value[i]) loss_budget_lst[i].append(loss_budget_value[i]) # finish accumulating gradient for late update # find acc and loss mean across all gpus: for i in range(_K): acc_budget_lst_mean[i] = np.mean(acc_budget_lst[i]) loss_budget_lst_mean[i] = np.mean(loss_budget_lst[i]) sess.run([apply_gradient_op_budget]) # update all k wb's # finish update on fb # loss summary: if step % PRINT_STEP == 0: loss_summary = 'step: %4d, time: %.4f, ' \ 'training budget accuracy: %s, budget loss: %s' % ( step, time.time() - start_time, acc_budget_lst_mean, loss_budget_lst_mean) print(loss_summary) # end loss summary if step % VAL_STEP == 0: test_correct_num_lst, test_acc_lst, total_v = run_validation(sess=sess, right_count_op_list=right_count_budget_op, placeholder_list=[videos_placeholder, utility_labels_placeholder, budget_labels_placeholder, dropout_placeholder, istraining_placeholder], batch_size=TRAIN_BATCH_SIZE*FLAGS.GPU_NUM, dataset='val', istraining=True) test_summary = "Step: %d, validation budget correct num: %s, accuracy: %s" % ( step, test_correct_num_lst, test_acc_lst) print(test_summary) # bn_temp_value = sess.run(varlist_bn[-1]) # print('bn_temp_value:', bn_temp_value.shape, bn_temp_value[0:5]) # save model: if step % SAVE_STEP == 0 or (step + 1) == MAX_STEPS: for i in range(_K): saver_kb[i].save(sess, os.path.join(budget_ckpt_dir[i], 'pretrained_fb_k%d.ckpt' % i), global_step=step) # End min step # End part 3 coord.request_stop() coord.join(threads) print("done")
def run_training(start_from_trained_model, model_name): ''' Args: model_name: name of the testing budget model. ''' # Save ckpt of two-fold eval process in this directory: two_fold_eval_ckpt_dir = os.path.join(FLAGS.two_fold_eval_ckpt_dir, model_name) if not os.path.exists(two_fold_eval_ckpt_dir): os.makedirs(two_fold_eval_ckpt_dir) # Save summary files in this dir: summary_dir = os.path.join(FLAGS.summary_dir, model_name) if not os.path.exists(summary_dir): os.makedirs(summary_dir) train_summary_file = open(summary_dir + '/train_summary.txt', 'w') val_summary_file = open(summary_dir + '/val_summary.txt', 'w') # build graph: (graph, init_op, train_op, _, acc_op, correct_count_op, loss_op, tr_videos_op, tr_actor_labels_op, _, _, _, _, videos_placeholder, utility_labels_placeholder, budget_labels_placeholder, istraining_placeholder, varlist_budget, varlist_degrad) = build_graph(model_name) # session configuration: config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) # config.gpu_options.allow_growth = True # run session: with tf.Session(graph=graph, config=config) as sess: ''' In training, first run init_op, then do multi-threads. ''' # initialize variables: sess.run(init_op) # multi threads: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # Load ckpts: # for g in tf.global_variables(): # print(g.name) bn_moving_vars = [ g for g in tf.global_variables() if 'moving_mean' in g.name ] bn_moving_vars += [ g for g in tf.global_variables() if 'moving_variance' in g.name ] if start_from_trained_model: # load all parameters in the graph: restore_model_ckpt(sess=sess, ckpt_dir=two_fold_eval_ckpt_dir, varlist=tf.trainable_variables() + bn_moving_vars) else: # load degrade net: restore_model_ckpt(sess=sess, ckpt_dir=FLAGS.adversarial_ckpt_file_dir, varlist=varlist_degrad) # restore_model_ckpt(sess=sess, ckpt_dir=os.path.join(COMMON_FLAGS.pretrain_dir, 'degradation_models'), varlist=varlist_degrad) # load budget net: if not model_train_from_scratch_map[model_name]: pretrained_budget_model_ckpt_dir = os.path.join( COMMON_FLAGS.hdd_dir, 'two_fold_evaluation', 'pretrained_budget_model', model_dir_map[model_name]) varlist = [ v for v in varlist_budget + bn_moving_vars if not any(x in v.name for x in ["logits"]) ] restore_model_ckpt(sess=sess, ckpt_dir=pretrained_budget_model_ckpt_dir, varlist=varlist) # End loading ckpts. # saver for saving all trainable variables (budget model+degrade model) ckpts: saver = tf.train.Saver(tf.trainable_variables() + bn_moving_vars, max_to_keep=1) best_val_acc = 0 val_acc_lst = [] for step in range(model_max_steps_map[model_name]): # updata on training data: start_time = time.time() train_videos, train_labels = sess.run( [tr_videos_op, tr_actor_labels_op]) _, acc, loss_value = sess.run( [train_op, acc_op, loss_op], feed_dict={ videos_placeholder: train_videos, budget_labels_placeholder: train_labels, istraining_placeholder: True }) assert not np.isnan( np.mean(loss_value)), 'Model diverged with loss = NaN' # print summary: if step % cfg['TRAIN']['PRINT_STEP'] == 0: summary = 'Step: {:4d}, time: {:.4f}, accuracy: {:.5f}, loss: {:.8f}'.format( step, time.time() - start_time, acc, np.mean(loss_value)) print(summary) train_summary_file.write(summary + '\n') # validation on val set and save ckpt: if step % cfg['TRAIN']['VAL_STEP'] == 0 or ( step + 1) == model_max_steps_map[model_name]: test_correct_num_lst, test_acc_lst, total_v = run_validation( sess=sess, right_count_op_list=[correct_count_op], placeholder_list=[ videos_placeholder, utility_labels_placeholder, budget_labels_placeholder, istraining_placeholder ], batch_size=train_batch_size, dataset='val', istraining=True) # start summary: val_acc = test_acc_lst[0] summary = ("Step: %4d, validation accuracy: %.5f, total_v: %d" ) % (step, val_acc, total_v) print('Validation:\n' + summary) val_summary_file.write(summary + '\n') # end summary val_acc_lst.append(val_acc) # start saving model if val_acc > best_val_acc: # update best_val_acc: best_val_acc = val_acc best_acc_step = step print('Get new best val_acc: %f\n' % best_val_acc) # Save checkpoint: checkpoint_path = os.path.join(two_fold_eval_ckpt_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) # end saving model # bn_temp_value = sess.run(bn_moving_vars[-1]) # print('bn_temp_value:', bn_temp_value.shape, bn_temp_value[0:5]) # join multi threads: coord.request_stop() coord.join(threads) print("done") np.save(os.path.join(summary_dir, 'val_acc_lst.npy'), np.array(val_acc_lst))
def pretrain_fb(start_from_trained_model): ''' pretrain_fb_kbeam ''' degradation_ckpt_dir = os.path.join(COMMON_FLAGS.pretrain_dir, 'degradation_models') budget_ckpt_dir = os.path.join(COMMON_FLAGS.pretrain_dir, 'budget_models' + '_temp') if not os.path.isdir(budget_ckpt_dir): os.mkdir(budget_ckpt_dir) # define graph graph = tf.Graph() with graph.as_default(): # global step: global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) # placeholder inputs: videos_placeholder, utility_labels_placeholder, budget_labels_placeholder, dropout_placeholder, istraining_placeholder = \ placeholder_inputs(TRAIN_BATCH_SIZE * FLAGS.GPU_NUM) # initialize some lists, each element coresponds to one gpu: tower_grads_budget, logits_budget_lst, loss_budget_lst = [], [], [] # budget # Optimizer for the 3 components respectively opt_budget = tf.train.AdamOptimizer(FLAGS.budget_lr) with tf.variable_scope(tf.get_variable_scope()): for gpu_index in range(0, FLAGS.GPU_NUM): with tf.device('/gpu:%d' % gpu_index): print('/gpu:%d' % gpu_index) with tf.name_scope('%s_%d' % ('gpu', gpu_index)) as scope: # placeholder inputs: videos = videos_placeholder[gpu_index * TRAIN_BATCH_SIZE:(gpu_index + 1) * TRAIN_BATCH_SIZE] budget_labels = budget_labels_placeholder[gpu_index * TRAIN_BATCH_SIZE:(gpu_index + 1) * TRAIN_BATCH_SIZE] # output of the graph: loss_budget, logits_budget = \ create_architecture_adversarial(scope, videos, budget_labels, istraining_placeholder) # Reuse variables for the next tower. tf.get_variable_scope().reuse_variables() ### Append elements on each GPU to lists: varlist_degrad = [v for v in tf.trainable_variables() if "DegradationModule" in v.name] # budget: varlist_budget = [] # loss and logits: loss_budget_lst.append(loss_budget) logits_budget_lst.append(logits_budget) # varlist: # budget varlist_budget = [v for v in tf.trainable_variables() if "BudgetModule" in v.name] # bn varlist varlist_bn = [g for g in tf.global_variables() if 'moving_mean' in g.name] varlist_bn += [g for g in tf.global_variables() if 'moving_variance' in g.name] # gradients: grads_budget = opt_budget.compute_gradients(loss_budget, varlist_budget) tower_grads_budget.append(grads_budget) ### End appending elements on each GPU to lists. ### Average or concat Operations/Tnesors in a list to a single Operation/Tensor: ## L_b # budget: loss_budget_op = tf.reduce_mean(loss_budget_lst, name='softmax') # Lb _logits_budget = tf.concat(logits_budget_lst, 0) accuracy_budget_op = accuracy(_logits_budget, budget_labels_placeholder) right_count_budget_op = correct_num(_logits_budget, budget_labels_placeholder) zero_ops_budget, accum_ops_budget, apply_gradient_op_budget = create_grad_accum_for_late_update( opt_budget, tower_grads_budget, varlist_budget, FLAGS.n_minibatches, global_step, decay_with_global_step=False) ### End averaging or concatenating Operations/Tnesors in a list to a single Operation/Tensor. # operations for placeholder inputs: tr_videos_op, _, tr_actor_labels_op = create_videos_reading_ops(is_train=True, is_val=False, GPU_NUM=FLAGS.GPU_NUM, BATCH_SIZE=TRAIN_BATCH_SIZE) # saver and summary files: saver = tf.train.Saver(var_list=varlist_budget+varlist_bn, max_to_keep=1) # session config: config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) config.gpu_options.allow_growth = True # run session: with tf.Session(graph=graph, config=config) as sess: # multi-threads: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # initialize: init_op = tf.group(tf.local_variables_initializer(), tf.global_variables_initializer()) sess.run(init_op) # restore d net: restore_model_ckpt(sess, degradation_ckpt_dir, varlist_degrad) # restore b net: if start_from_trained_model: restore_model_ckpt(sess, budget_ckpt_dir, varlist_budget+varlist_bn) # train for step in range(0, MAX_STEPS): start_time = time.time() sess.run(zero_ops_budget) acc_budget_lst, loss_budget_lst = [], [] # accumulating gradient for late update: for _ in itertools.repeat(None, FLAGS.n_minibatches): # placeholder inputs: tr_videos, tr_actor_labels = sess.run([tr_videos_op, tr_actor_labels_op]) # run operations: _, acc_budget_value, loss_budget_value = sess.run([accum_ops_budget, accuracy_budget_op, loss_budget_op], feed_dict={videos_placeholder: tr_videos, budget_labels_placeholder: tr_actor_labels, istraining_placeholder: True}) # append loss and acc for budget model: acc_budget_lst.append(acc_budget_value) loss_budget_lst.append(loss_budget_value) # finish accumulating gradient for late update # find acc and loss mean across all gpus: assert not np.isnan(np.mean(loss_budget_lst)), 'Model diverged with loss = NaN' sess.run([apply_gradient_op_budget]) # update all k wb's # finish update on fb # loss summary: if step % PRINT_STEP == 0: loss_summary = 'step: %4d, time: %.4f, ' \ 'training budget accuracy: %s, budget loss: %s, ' \ % (step, time.time() - start_time, np.mean(acc_budget_lst), np.mean(loss_budget_lst),) print(loss_summary) # end loss summary # val: if step % VAL_STEP == 0: # val set: test_correct_num_lst, test_acc_lst, total_v = run_validation(sess=sess, right_count_op_list=[right_count_budget_op], placeholder_list=[videos_placeholder, utility_labels_placeholder, budget_labels_placeholder, dropout_placeholder, istraining_placeholder], batch_size=TRAIN_BATCH_SIZE*FLAGS.GPU_NUM, dataset='val') test_summary = "Step: %d, validation budget correct num: %s, accuracy: %s" % ( step, test_correct_num_lst, test_acc_lst) print(test_summary) # test set: test_correct_num_lst, test_acc_lst, total_v = run_validation(sess=sess, right_count_op_list=[right_count_budget_op], placeholder_list=[videos_placeholder, utility_labels_placeholder, budget_labels_placeholder, dropout_placeholder, istraining_placeholder], batch_size=TRAIN_BATCH_SIZE*FLAGS.GPU_NUM, dataset='test') test_summary = "Step: %d, testing budget correct num: %s, accuracy: %s" % ( step, test_correct_num_lst, test_acc_lst) print(test_summary) if step % SAVE_STEP == 0 or (step + 1) == MAX_STEPS: saver.save(sess, os.path.join(budget_ckpt_dir, 'pretrained_fb.ckpt'), global_step=step) # End min step # End part 3 coord.request_stop() coord.join(threads) print("done")
def get_results(ith_dir_path, clflist, lstcr_dict, algorithm_descriptor, mode, i=None, generalization_sample_name=None): if mode == "training": grid_search_info_df = pd.DataFrame() additional_metrics_df = pd.DataFrame( ) # additional metrics calculated on last GS step. elif mode == "validation": additional_metrics_df = pd.DataFrame() while len(clflist) > 0: print(mode, ': ', clflist[-1].get_name_str()) if mode == "training": gridsearch_sklearn.run_gridsearch(clflist[-1], lstcr_dict, algorithm_descriptor) grid_search_info_df = \ pd.concat([grid_search_info_df, clflist[-1].gridsearch_df], sort=True, join='outer') additional_metrics_df = \ additional_metrics_df.append(clflist[-1].additional_metrics_sr, ignore_index=True) if algorithm_descriptor.roc_curve_flag is True: csvoutput.roc_curve_csv(ith_dir_path, clflist[-1], mode) elif mode == "validation": validation.run_validation(clflist[-1], lstcr_dict, algorithm_descriptor) csvoutput.prediction_csv(ith_dir_path, clflist[-1], mode) additional_metrics_df = \ additional_metrics_df.append(clflist[-1].additional_metrics_sr, ignore_index=True) if algorithm_descriptor.roc_curve_flag is True: csvoutput.roc_curve_csv(ith_dir_path, clflist[-1], mode) elif mode == "generalization": validation.run_generalization(clflist[-1], lstcr_dict, algorithm_descriptor) csvoutput.prediction_csv( ith_dir_path, clflist[-1], mode, generalization_sample_name=generalization_sample_name) clflist.pop() if mode == "training": csvoutput.grid_search_csv(i, grid_search_info_df, algorithm_descriptor) csvoutput.additional_metrics_csv(ith_dir_path, additional_metrics_df) elif mode == "validation": csvoutput.additional_metrics_csv(ith_dir_path, additional_metrics_df)
def run_adversarial_testing(): start_time = time.time() (graph, init_op, zero_ops_degrad, accum_ops_degrad, apply_gradient_op_degrad, zero_ops_budget, accum_ops_budget, apply_gradient_op_budget, zero_ops_utility, accum_ops_utility, apply_gradient_op_utility, loss_budget_op, accuracy_budget_op, right_count_budget_op, loss_utility_op, accuracy_utility_op, right_count_utility_op, tr_videos_op, tr_action_labels_op, tr_actor_labels_op, videos_placeholder, utility_labels_placeholder, budget_labels_placeholder, istraining_placeholder, varlist_budget, varlist_utility, varlist_degrad, varlist_bn) = build_graph( cfg['TEST']['BATCH_SIZE']) if not os.path.exists(test_result_dir): os.makedirs(test_result_dir) test_result_file = open(test_result_dir + '/EvaluationResuls.txt', 'w', buffering=1) # session config: config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) # config.gpu_options.allow_growth = True # run session: with tf.Session(graph=graph, config=config) as sess: # initialization: init_op = tf.group(tf.local_variables_initializer(), tf.global_variables_initializer()) sess.run(init_op) # initialization part should be put outside the multi-threads part! But why? # loading trained checkpoints: restore_model_ckpt(sess, ckpt_dir, tf.trainable_variables() + varlist_bn) test_correct_num_lst, test_acc_lst, total_v = run_validation( sess=sess, right_count_op_list=[right_count_utility_op] + right_count_budget_op, placeholder_list=[ videos_placeholder, utility_labels_placeholder, budget_labels_placeholder, istraining_placeholder ], batch_size=cfg['TEST']['BATCH_SIZE'] * FLAGS.GPU_NUM, dataset='test', istraining=True) # print and write summary: test_summary = ( 'test_acc_utility: %s, test_correct_num_utility: %s, total_v: %d\n' 'test_acc_budget: %s, test_correct_num_budget: %s, total_v: %d\n' % (test_acc_lst[0], test_correct_num_lst[0], total_v, test_acc_lst[1:], test_correct_num_lst[1:], total_v)) print(test_summary) test_result_file.write(test_summary + '\n') sess.close() finish_time = time.time() print(finish_time - start_time) test_result_file.write(str(finish_time - start_time))
def run_adversarial_training(start_from_trained_model): ''' Algorithm 1 in the paper ''' # Save ckpt of adv-training process in this directory: if not os.path.exists(ckpt_dir): os.makedirs(ckpt_dir) # Save summary files in this dir: if not os.path.exists(summary_dir): os.makedirs(summary_dir) train_summary_file = open(summary_dir + '/train_summary.txt', 'a', buffering=1) validation_summary_file = open(summary_dir + '/validation_summary.txt', 'a', buffering=1) model_restarting_summary_file = open(summary_dir + '/model_restarting_summary.txt', 'a', buffering=1) (graph, init_op, zero_ops_degrad, accum_ops_degrad, apply_gradient_op_degrad, zero_ops_budget, accum_ops_budget, apply_gradient_op_budget, zero_ops_utility, accum_ops_utility, apply_gradient_op_utility, loss_budget_op, accuracy_budget_op, right_count_budget_op, loss_utility_op, accuracy_utility_op, right_count_utility_op, tr_videos_op, tr_action_labels_op, tr_actor_labels_op, videos_placeholder, utility_labels_placeholder, budget_labels_placeholder, istraining_placeholder, varlist_budget, varlist_utility, varlist_degrad, varlist_bn) = build_graph( cfg['TRAIN']['BATCH_SIZE']) # session config: config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) # config.gpu_options.allow_growth = True # Don't do this! # run session: with tf.Session(graph=graph, config=config) as sess: # saver for saving models: saver = tf.train.Saver(var_list=tf.trainable_variables() + varlist_bn, max_to_keep=5) sess.run(init_op) # multi-threads: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # load ckpts: if not start_from_trained_model: # load ckpts from pretrained fd and fT.(By run_pretraining_degrad and run_pretraining_utility functions.) # fd part: restore_model_ckpt( sess, os.path.join(COMMON_FLAGS.pretrain_dir, 'pretrained_fd'), varlist_degrad) # fT part: restore_model_ckpt( sess, os.path.join(COMMON_FLAGS.pretrain_dir, 'pretrained_kbeam/fT'), varlist_utility) # fb part: for i in range(_K): restore_model_ckpt( sess, os.path.join(COMMON_FLAGS.pretrain_dir, 'pretrained_kbeam/fb_k%d' % i), varlist_budget[i]) else: # load ckpts from previous training stage of this run_adversarial_training function. restore_model_ckpt(sess, ckpt_dir, tf.trainable_variables()) # Adversarial training loop: _idx_min = 0 # initialize _idx_min randomly for step in range(cfg['TRAIN']['TOP_MAXSTEP']): # Part 3: train Fb using L_b (cross entropy) # max step: optimize theta_d using L_b(X,Y_B) for L_b_max_step in range(0, cfg['TRAIN']['L_B_MAX_PART_STEP']): start_time = time.time() acc_util_lst, acc_budget_lst, loss_utility_lst, loss_budget_lst = [], [], [], [] sess.run(zero_ops_degrad) # accumulating gradient for late update: for _ in itertools.repeat(None, FLAGS.n_minibatches): # placeholder inputs: tr_videos, tr_action_labels, tr_actor_labels = sess.run([ tr_videos_op, tr_action_labels_op, tr_actor_labels_op ]) # run operations: _, acc_util_value, acc_budget_value, loss_utility_value, loss_budget_value = sess.run( [ accum_ops_degrad[_idx_min], accuracy_utility_op, accuracy_budget_op[_idx_min], loss_utility_op, loss_budget_op[_idx_min] ], feed_dict={ videos_placeholder: tr_videos, utility_labels_placeholder: tr_action_labels, budget_labels_placeholder: tr_actor_labels, istraining_placeholder: True }) # append loss and acc for budget model: acc_util_lst.append(acc_util_value) acc_budget_lst.append(acc_budget_value) loss_utility_lst.append(loss_utility_value) loss_budget_lst.append(loss_budget_value) # finish accumulating gradient for late update # after accumulating gradient, do the update on fd: _ = sess.run([apply_gradient_op_degrad[_idx_min] ]) # update only one wd # finish update on fd assert not np.isnan(np.mean( loss_budget_value)), 'Model diverged with loss = NaN' # loss summary: if L_b_max_step % cfg['TRAIN']['L_B_MAX_PRINT_STEP'] == 0: loss_summary = 'Alternating Training (Budget L_b MAX), Step: {:2d}, L_b_max_step: {:2d} time: {:.2f}, ' \ 'training utility accuracy: {:.5f}, training budget accuracy: {:.5f}, ' \ 'utility loss: {:.8f}, budget loss: {:.8f}'.format( step, L_b_max_step, time.time() - start_time, np.mean(acc_util_lst), np.mean(acc_budget_lst), np.mean(loss_utility_lst), np.mean(loss_budget_lst) ) print(loss_summary) train_summary_file.write(loss_summary + '\n') # end loss summary print() # End max step # min step: optimize theta_b using L_b(X,Y_B) for L_b_min_step in range(0, cfg['TRAIN']['L_B_MIN_PART_STEP']): start_time = time.time() sess.run(zero_ops_budget) acc_budget_lst, loss_budget_lst = [[] for i in range(_K) ], [[] for i in range(_K)] acc_budget_lst_mean, loss_budget_lst_mean = [None] * _K, [ None ] * _K # accumulating gradient for late update: for _ in itertools.repeat(None, FLAGS.n_minibatches): # placeholder inputs: tr_videos, tr_actor_labels = sess.run( [tr_videos_op, tr_actor_labels_op]) # run operations: temp_sess_run_return_list = sess.run( accum_ops_budget + accuracy_budget_op + loss_budget_op, feed_dict={ videos_placeholder: tr_videos, budget_labels_placeholder: tr_actor_labels, istraining_placeholder: True }) acc_budget_value = temp_sess_run_return_list[_K:2 * _K] loss_budget_value = temp_sess_run_return_list[2 * _K:3 * _K] # append loss and acc for budget model: for i in range(_K): acc_budget_lst[i].append(acc_budget_value[i]) loss_budget_lst[i].append(loss_budget_value[i]) # finish accumulating gradient for late update # find acc and loss mean across all gpus: for i in range(_K): acc_budget_lst_mean[i] = np.mean(acc_budget_lst[i]) loss_budget_lst_mean[i] = np.mean(loss_budget_lst[i]) # find min loss: _idx_min = np.argmin(loss_budget_lst_mean) assert not np.isnan(loss_budget_lst_mean[_idx_min] ), 'Model diverged with loss = NaN' # Monitoring fb using training set if L_b_min_step % cfg['TRAIN']['MONITOR_STEP'] == 0: if acc_budget_lst_mean[ _idx_min] >= FLAGS.highest_budget_acc_val: print('pass budget acc bar!\n') train_summary_file.write('pass budget acc bar!\n') break # End monitoring fb on training set. # after accumulating gradient, do the update on fb, if it didn't pass the budget acc bar: sess.run([apply_gradient_op_budget]) # update all k wb's # finish update on fb # loss summary: if L_b_min_step % cfg['TRAIN']['MONITOR_STEP'] == 0: loss_summary = 'Alternating Training (Budget L_b MIN), ' \ 'Step: %2d, L_b_min_step: %4d, time: %.4f, ' \ 'training budget accuracy: %s, budget loss: %s, ' \ 'min_idx: %1d' % ( step, L_b_min_step, time.time() - start_time, acc_budget_lst_mean, loss_budget_lst_mean, _idx_min) print(loss_summary) train_summary_file.write(loss_summary + '\n') # end loss summary print('') # End min step train_summary_file.write('\n') # End part 3 # Part 2: End-to-end train Ft and Fd using L_T # for L_T_step in range(0, cfg['TRAIN']['L_T_MAXSTEP']): L_T_step = 0 plateau_counter = 0 test_acc_util_best = -1 while (plateau_counter < cfg['TRAIN']['L_T_PLATEAUSTEP'] and L_T_step < cfg['TRAIN']['L_T_MAXSTEP']): # Monitoring LT using validation set: if L_T_step % cfg['TRAIN']['MONITOR_STEP'] == 0: print('L_T_step %d monitoring target task:' % L_T_step) train_summary_file.write( 'L_T_step %d monitoring target task: \n' % L_T_step) test_correct_num_lst, test_acc_lst, total_v = run_validation( sess=sess, right_count_op_list=[right_count_utility_op] + right_count_budget_op, placeholder_list=[ videos_placeholder, utility_labels_placeholder, budget_labels_placeholder, istraining_placeholder ], batch_size=cfg['TRAIN']['BATCH_SIZE'] * FLAGS.GPU_NUM, dataset='val', istraining=True) # plateau test_acc_util = test_acc_lst[0] if test_acc_util <= test_acc_util_best: plateau_counter += 1 else: plateau_counter = 0 test_acc_util_best = test_acc_util # print and write summary: test_summary = ( 'val_correct_num_utility: %s, val_correct_num_budget: %s, total_v: %d\n' 'plateau_counter: %d, test_acc_util_best: %.2f\n' % (test_correct_num_lst[0], test_correct_num_lst[1:], total_v, plateau_counter, test_acc_util_best)) print(test_summary) train_summary_file.write(test_summary + '\n') # breaking condition: (if performance on L_T is still good) if test_acc_util >= FLAGS.highest_util_acc_val: print('pass utility acc bar!\n') train_summary_file.write('pass utility acc bar!\n') break # End of monitoring LT # Optimizing LT (if necessary) using training set: (This is one batch=FLAGS.n_minibatches, each minibatch has FLAGS.GPU_NUM*cfg['TRAIN']['BATCH_SIZE'] video clips.) start_time = time.time() sess.run(zero_ops_utility) acc_util_lst, acc_budget_lst, loss_utility_lst, loss_budget_lst = [], [], [], [] # accumulating gradient for late update: for _ in itertools.repeat(None, FLAGS.n_minibatches): tr_videos, tr_action_labels, tr_actor_labels = sess.run([ tr_videos_op, tr_action_labels_op, tr_actor_labels_op ]) _, acc_util_value, acc_budget_value, loss_utility_value, loss_budget_value = sess.run( [ accum_ops_utility, accuracy_utility_op, accuracy_budget_op[_idx_min], loss_utility_op, loss_budget_op[_idx_min] ], feed_dict={ videos_placeholder: tr_videos, utility_labels_placeholder: tr_action_labels, budget_labels_placeholder: tr_actor_labels, istraining_placeholder: True }, options=tf.RunOptions( report_tensor_allocations_upon_oom=True)) acc_util_lst.append(acc_util_value) acc_budget_lst.append(acc_budget_value) loss_utility_lst.append(loss_utility_value) loss_budget_lst.append(loss_budget_value) # finish accumulating gradient for late update # after accumulating gradient, do the update on fT and fd: sess.run([apply_gradient_op_utility]) # finish update on fT and fd assert not np.isnan(np.mean( loss_utility_lst)), 'Model diverged with loss = NaN' # loss summary: loss_summary = 'min LT (Utility), Step: {:4d}, L_T_step: {:4d}, time: {:.2f}, ' \ 'training utility accuracy: {:.5f}, training budget accuracy: {:.5f}, ' \ 'utility loss: {:.8f}, budget loss: {:.8f}'.format( step, L_T_step, time.time() - start_time, np.mean(acc_util_lst), np.mean(acc_budget_lst), np.mean(loss_utility_lst), np.mean(loss_budget_lst) ) print('\n' + loss_summary + '\n') train_summary_file.write(loss_summary + '\n') # end of loss summary L_T_step += 1 # End of optimizing LT. print('') train_summary_file.write('\n') # End part 2 # Do validation (on validation set): if step % cfg['TRAIN']['VAL_STEP'] == 0: print('step %d validation: \n' % step) validation_summary_file.write('step %d validation: \n' % step) test_correct_num_lst, test_acc_lst, total_v = run_validation( sess=sess, right_count_op_list=[right_count_utility_op] + right_count_budget_op, placeholder_list=[ videos_placeholder, utility_labels_placeholder, budget_labels_placeholder, istraining_placeholder ], batch_size=cfg['TRAIN']['BATCH_SIZE'] * FLAGS.GPU_NUM, dataset='val', istraining=True) # print and write summary: test_summary = ('val_correct_num_utility: %s, total_v: %d\n' 'val_correct_num_budget: %s, total_v: %d\n' % (test_correct_num_lst[0], total_v, test_correct_num_lst[1:], total_v)) print(test_summary) validation_summary_file.write(test_summary + '\n') # End evaluation # Save ckpt for kb_adversarial learning: if step % cfg['TRAIN']['SAVE_STEP'] == 0 or ( step + 1) == cfg['TRAIN']['TOP_MAXSTEP']: checkpoint_path = os.path.join(ckpt_dir, 'model.ckpt') print('+++++++++++++ saving model to %s +++++++++++++' % checkpoint_path) saver.save(sess, checkpoint_path, global_step=step) # End evaluation train_summary_file.close() validation_summary_file.close() coord.request_stop() coord.join(threads) print("done")