def train(self): log.infov("Training Starts!") pprint(self.batch_train) max_steps = 50000 output_save_step = 500 for s in range(max_steps): step, accuracy, summary, loss, step_time = \ self.run_single_step(self.batch_train, step=s, is_train=True) # periodic inference accuracy_test = \ self.run_test(self.batch_test, is_train=False) if s % 10 == 0: self.log_step_message(step, accuracy, accuracy_test, loss, step_time) self.summary_writer.add_summary(summary, global_step=step) if self.session.run(self.global_step) % output_save_step == 0: try: save_path = self.saver.save(self.session, os.path.join( self.train_dir, 'model'), global_step=step) log.infov("Saved checkpoint at %d", s) except: log.warning("Error while saving checkpoint. Continuing!") if self.session.run(self.global_step) == 50000: import sys sys.exit()
def __init__(self, config): super(Trainer, self).__init__() self.config = config hyper_parameter_str = '_lr_' + str(config.learning_rate) train_dir = './train_dir/%s-%s-%s-%s' % ( config.model, config.prefix, hyper_parameter_str, time.strftime("%Y%m%d-%H%M%S")) if not os.path.exists(train_dir): os.makedirs(train_dir) log.info("Train Dir: %s" % (train_dir, )) # create data loader self.batch_size = config.batch_size # create model model = Trainer.get_model_class(config.model) log.infov("Using Model class: %s" % (model, )) if self.config.cuda: self.model = model(config).cuda() else: self.model = model(config) # create optimizer self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.config.learning_rate) self.writer = SummaryWriter()
def train(self): log.infov("Training Starts!") print(self.batch_train) max_steps = self.max_steps ckpt_save_step = self.ckpt_save_step log_step = self.log_step test_sample_step = self.test_sample_step write_summary_step = self.write_summary_step for s in xrange(max_steps): # periodic inference if s % test_sample_step == 0: step, test_summary, loss, output, step_time = \ self.run_test(self.batch_test, step=s, is_train=False) self.log_step_message(step, loss, step_time, is_train=False) self.summary_writer.add_summary(test_summary, global_step=step) step, train_summary, loss, output, step_time = \ self.run_single_step(self.batch_train, step=s, is_train=True) if s % log_step == 0: self.log_step_message(step, loss, step_time) if s % write_summary_step == 0: self.summary_writer.add_summary(train_summary, global_step=step) if s % ckpt_save_step == 0: log.infov("Saved checkpoint at %d", s) self.saver.save(self.session, os.path.join(self.train_dir, 'model'), global_step=step)
def train(self): log.infov("Training Starts!") pprint(self.batch_train) step = self.session.run(self.global_step) # add for testing start_queue_runner #coord = tf.train.Coordinator() #threads = tf.train.start_queue_runners(self.session, coord=coord, start=True) # end for testing start_queue_runner for s in xrange(self.config.max_training_steps): if s % self.config.ckpt_save_step == 0: log.infov("Saved checkpoint at %d", s) self.saver.save(self.session, os.path.join(self.train_dir, 'model'), global_step=s) step, summary, loss, step_time = \ self.run_single_step(self.batch_train, step=s, is_train=True) if s % self.config.log_step == 0: self.log_step_message(step, loss, step_time) if s % self.config.write_summary_step == 0: self.summary_writer.add_summary(summary, global_step=step)
def train(self): log.infov("Training Starts!") pprint(self.batch_train) pprint(self.batch_train_unlabel) step = self.session.run(self.global_step) for s in xrange(self.config.max_training_steps): # periodic inference if s % self.config.test_sample_step == 0: accuracy, summary, d_loss, g_loss, s_loss, step_time = \ self.run_test(self.batch_test, self.batch_train_unlabel, is_train=False) self.log_step_message(step, accuracy, d_loss, g_loss, s_loss, step_time, is_train=False) step, accuracy, d_loss, g_loss, s_loss, step_time, prediction_train, gt_train, g_img = \ self.run_single_step(self.batch_train, self.batch_train_unlabel, step=s) if s % self.config.log_step == 0: self.log_step_message(step, accuracy, d_loss, g_loss, s_loss, step_time) if s % self.config.write_summary_step == 0: self.summary_writer.add_summary(summary, global_step=step) if s % self.config.output_save_step == 0: log.infov("Saved checkpoint at %d", step) self.saver.save(self.session, os.path.join(self.train_dir, 'model'), global_step=step) if self.config.dump_result: f = h5py.File(os.path.join(self.train_dir, 'g_img_'+str(s)+'.hdf5'), 'w') f['image'] = g_img f.close()
def train(self, dataset): log.infov("Training Starts!") pprint(self.batch_train) max_steps = 100000 output_save_step = 1000 for s in xrange(max_steps): step, summary, x, loss, loss_g_update, loss_z_update, step_time = \ self.run_single_step(self.batch_train, dataset, step=s, is_train=True) if s % 10 == 0: self.log_step_message(step, loss, loss_g_update, loss_z_update, step_time) self.summary_writer.add_summary(summary, global_step=step) if s % output_save_step == 0: log.infov("Saved checkpoint at %d", s) save_path = self.saver.save(self.session, os.path.join( self.train_dir, 'model'), global_step=step) if self.config.dump_result: f = h5py.File( os.path.join(self.train_dir, 'dump_result_' + str(s) + '.hdf5'), 'w') f['image'] = x f.close()
def train(self): log.infov("Training Starts!") pprint(self.batch_train) max_steps = 100000 output_save_step = 1000 for s in xrange(max_steps): step, summary, d_loss, g_loss, step_time, prediction_train, gt_train = \ self.run_single_step(self.batch_train, step=s, is_train=True) if s % 10 == 0: self.log_step_message(step, d_loss, g_loss, step_time) self.summary_writer.add_summary(summary, global_step=step) if s % output_save_step == 0: log.infov("Saved checkpoint at %d", s) save_path = self.saver.save(self.session, os.path.join( self.train_dir, 'model'), global_step=step) f = h5py.File( os.path.join(self.train_dir, 'generated_' + str(s) + '.hy'), 'w') f['image'] = prediction_train f.close()
def train(self): log.infov('Training starts') max_steps = 1000000 ckpt_save_steps = 5000 for s in range(max_steps): step, train_summary, loss, step_time = \ self.run_train_step() if s % self.log_step == 0: self.log_step_message(step, loss, step_time, is_train=True) # Periodic inference if s % self.val_sample_step == 0: val_step, val_summary, val_loss, val_step_time = \ self.run_val_step() self.summary_writer.add_summary(val_summary, global_step=val_step) self.log_step_message(val_step, val_loss, val_step_time, is_train=False) if s % self.write_summary_step == 0: self.summary_writer.add_summary(train_summary, global_step=step) if s % ckpt_save_steps == 0: log.infov('Saved checkpoint at {}'.format(step)) self.saver.save(self.session, os.path.join(self.train_dir, 'model'), global_step=step)
def train(self): log.infov("Training Starts!") output_save_step = 1000 buffer_save_step = 100 self.session.run(self.global_step.assign(0)) # reset global step n_updates = 1 for ep in xrange(1, 1+self.config.n_epoches): x_train, y_train = shuffle(self.dataset.x_train, self.dataset.y_train) max_batches = self.config.n_train // self.config.batch_size #if self.config.n_train % self.config.batch_size != 0: max_batches += 1 for bi in xrange(max_batches): start = bi * self.config.batch_size end = min((bi+1) * self.config.batch_size, self.config.n_train) batch_chunk = { 'X': x_train[start:end], 'y': y_train[start:end] } step, summary, log_prob, step_time = \ self.run_single_step(n_updates, batch_chunk) #if np.any(np.isnan(log_prob)): sys.exit(1) self.summary_writer.add_summary(summary, global_step=step) #if n_updates % 100 == 0: # self.log_step_message(n_updates, log_prob, step_time) if n_updates % 50 == 0: print (n_updates, self.evaluate(n_updates)) n_updates+= 1
def construct_train_dir(config): all_results_file_name = [] all_train_dir = [] temp = config.hdf5FileNametrain.split('.') hyper_parameter_all_folds = '{}_num_less_label_data_{}_batchSizeLabel{}_batchSizeUnlabel{}_maxIteration{}'.format( temp[0], config.num_less_label_data, config.batch_size_label, config.batch_size_unlabel, config.max_training_steps) config.prefix = 'TripleGan_depth{}_growthRate{}_reduce{}_model_type{}_keepPro{}_G_core{}_G_rank{}_C_core{}_C_rank{}_D_core{}_D_rank{}'.format( config.depth, config.growth_rate, config.reduction, config.model_type, config.keep_prob, config.split_dimension_core_G, config.tt_rank_G, config.split_dimension_core_C, config.tt_rank_C, config.split_dimension_core_D, config.tt_rank_D) train_dir = './train_dir/%s-%s' % ( hyper_parameter_all_folds, config.prefix, ) if tf.gfile.Exists(train_dir): log.infov("Train Dir exists") else: os.makedirs(train_dir) log.infov("Train Dir: %s", train_dir) result_file_name = hyper_parameter_all_folds + config.prefix + '-' + time.strftime( "%Y%m%d-%H%M%S") all_train_dir.append(train_dir) all_results_file_name.append(result_file_name) return all_train_dir, all_results_file_name
def train(self): log.infov("Training Starts!") pprint(self.batch_train) max_steps = 1000000 output_save_step = 1000 test_sample_step = 100 for s in xrange(max_steps): step, accuracy, summary, d_loss, g_loss, s_loss, step_time, prediction_train, gt_train, g_img = \ self.run_single_step(self.batch_train, step=s, is_train=True) # periodic inference if s % test_sample_step == 0: accuracy_test, prediction_test, gt_test = \ self.run_test(self.batch_test, is_train=False) else: accuracy_test = 0.0 if s % 10 == 0: self.log_step_message(step, accuracy, accuracy_test, d_loss, g_loss, s_loss, step_time) self.summary_writer.add_summary(summary, global_step=step) if s % output_save_step == 0: log.infov("Saved checkpoint at %d", s) save_path = self.saver.save(self.session, os.path.join(self.train_dir, 'model'), global_step=step) if self.config.dump_result: f = h5py.File(os.path.join(self.train_dir, 'g_img_'+str(s)+'.hy'), 'w') f['image'] = g_img f.close()
def dump_result(self, filename): log.infov("Dumping prediction result into %s ...", filename) f.h5py.File(filename, 'w') f['test'] = np.concatenate(self._predictions) f['test_gt'] = np.concatenate(self._groundtruths) f['id'] = str(np.concatenate(self._ids)) log.info("Dumping prediction done.")
def train(self): log.infov("Training Starts!") pprint(self.batch_train) max_steps = 200000 output_save_step = 1000 for s in xrange(max_steps): step, accuracy, summary, loss, step_time = \ self.run_single_step(self.batch_train, step=s, is_train=True) # periodic inference accuracy_test = \ self.run_test(self.batch_test, is_train=False) if s % 10 == 0: self.log_step_message(step, accuracy, accuracy_test, loss, step_time) self.summary_writer.add_summary(summary, global_step=step) if s % output_save_step == 0: log.infov("Saved checkpoint at %d", s) save_path = self.saver.save(self.session, os.path.join( self.train_dir, 'model'), global_step=step)
def train(self): log.infov("Training Starts!") output_save_step = 1000 self.session.run(self.global_step.assign(0)) # reset global step if self.config.dataset == 'mnist': from load import load_mnist inputs, targets = load_mnist() else: raise NotImplementedError if self.config.method == 'kmeans': y_pred, _ = clustering(np.reshape(inputs, (len(inputs), -1)), self.config.num_clusters) metrics(targets, y_pred) return ''' pre-training ''' if not self.config.skip_pretrain: self.pre_train_enc_dec(inputs, targets, batch_size=self.config.batch_size, num_epochs=1000) # save model self.save_curr_model(os.path.join(self.res_pretrain_dir, 'model')) else: self.try_load_checkpoint(self.res_pretrain_dir) # plot latent_z, _ = self.get_latent_rep_and_pred(inputs, targets) y_pred, centroids = clustering(latent_z, self.config.num_clusters) plot_latent_z_space(latent_z, y_pred, \ '%s/pre_train_z' % self.res_dir, with_legend=True) #sys.exit(0) if self.config.method == 'svgd': if not self.config.skip_svgd: self.session.run(self.model.mu.assign(centroids)) #scale = np.zeros((self.config.num_clusters, self.config.z_dim*(self.config.z_dim+1)//2)) scale = np.zeros((self.config.num_clusters, self.config.z_dim)) for c in range(self.config.num_clusters): z_c = latent_z[np.where(y_pred == c)[0]] s0 = np.std(z_c, axis=0) scale[c] = s0 self.session.run(self.model.scale_diag.assign(scale)) self.train_svgd(inputs, targets, num_epochs=400, batch_size=self.config.batch_size) self.save_curr_model(os.path.join(self.res_dir, 'model')) else: self.try_load_checkpoint(self.res_dir) # plot latent_z, y_pred = self.get_latent_rep_and_pred(inputs, targets) #y_pred, centroids = clustering(latent_z, self.config.num_clusters) plot_latent_z_space(latent_z, y_pred, \ '%s/%s_z' % (self.res_dir, self.config.method), with_legend=True)
def eval(self, batch_iter, test_size, global_step=None, sess=None, generate_results=False): sess = sess or tf.get_default_session() global_step = global_step or tf.no_op() sess.run(self.eval_reset) result_json = [] for k, batch_chunk in enumerate(batch_iter): feed_dict = self.model.get_feed_dict(batch_chunk) pred, val_acc, loss_, _ = sess.run( [self.model.predictions, self.model.acc, self.model.mean_loss, self.eval_step], feed_dict=feed_dict) pred = pred.reshape(-1) if k % 5 == 0: current_accuracy, current_examples = sess.run([self.accuracy, self.example_count]) log.infov('Evaluation step %d, current accuracy = %.3f (%d), acc = %.3f', k, current_accuracy, current_examples, val_acc) # SAMPLING if generate_results: for j, pred_j in enumerate(pred): cor = 0 if pred_j == batch_chunk['answer'][j]: cor = 1 result_json.append({ 'id' : batch_chunk['ids'][j], 'pred' : int(pred_j), 'ans' : int(batch_chunk['answer'][j]), 'question' : batch_chunk['debug_sent'][j], 'correct' : cor }) loss, acc, sumstr_vloss, sumstr_vacc, current_step = \ sess.run([self.mean_loss, self.accuracy, self.summary_v_loss, self.summary_v_acc, global_step]) if self.summary_writer is not None: self.summary_writer.add_summary(sumstr_vloss, current_step) self.summary_writer.add_summary(sumstr_vacc, current_step) # Adjust loss from duplicated data N = (k+1) * self.model.batch_size if N > test_size: pred_ = pred[:N-test_size] ans_ = batch_chunk['answer'][:N-test_size].reshape(-1) acc = acc*N - val_acc*self.model.batch_size + (pred_==ans_).sum() acc /= test_size if generate_results: result_json_dict = {} for item in result_json: result_json_dict[item['id']] = item result_json = [] for k in sorted(result_json_dict.keys()): result_json.append(result_json_dict[k]) return [loss, acc, current_step, result_json]
def report(self): log.info("Computing scores...") total_loss = [] for id, pred, gt in zip(self._ids, self._predictions, self._groundtruths): total_loss.append(self.compute_loss(pred, gt)) avg_loss = np.average(total_loss) log.infov("Average loss : %.4f", avg_loss)
def load_model(args): global model, config, session assert 0.0 < args.gpu_fraction <= 1.0 session = tf.Session(config=tf.ConfigProto( gpu_options=tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_fraction, allow_growth=True), device_count={'GPU': True}, # self-testing: NO GPU, USE CPU )) log.warn('Reloading Model ...') log.infov('MODEL : %s', args.model) if args.model == 'gaze_grcn': from models.gaze_grcn import GazePredictionGRCN as TheModel from models.gaze_grcn import CONSTANTS, GRUModelConfig elif args.model == 'gaze_deeprnn': from gaze_rnn_deep import DEEPRNN as TheModel from gaze_rnn_deep import CONSTANTS, GRUModelConfig elif args.model == 'gaze_rnn': from models.gaze_rnn import GazePredictionGRU as TheModel from models.gaze_rnn import CONSTANTS, GRUModelConfig elif args.model == 'gaze_c3d_conv': from models.gaze_c3d_conv import GazePredictionConv as TheModel from models.gaze_c3d_conv import CONSTANTS, GRUModelConfig elif args.model == 'gaze_shallownet_rnn': from models.gaze_shallownet_rnn import GazePredictionGRU as TheModel from models.gaze_shallownet_rnn import CONSTANTS, GRUModelConfig elif args.model == 'framesaliency': from models.gaze_framewise_shallownet import FramewiseShallowNet as TheModel from models.gaze_framewise_shallownet import CONSTANTS, GRUModelConfig else: raise NotImplementedError(args.model) # default configuration as of now config = GRUModelConfig() config.batch_size = args.batch_size or 14 if args.loss_type is not None: # important config.loss_type = args.loss_type else: config.loss_type = 'xentropy' config.dump(sys.stdout) # dummy data_sets = crc_input_data_seq.CRCDataSplits() data_sets.train = data_sets.test = data_sets.valid = [] import pdb pdb.set_trace() log.warn('Dataset Loading Finished ! (%d instances)', len(data_sets)) model = TheModel(session, data_sets, config) print model return model
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) # paths parser.add_argument('--tf_record_memft_dir', type=str, default='data/preprocessed/vqa_v2' '/new_qa_split_thres1_500_thres2_50/tf_record_memft', help=' ') parser.add_argument('--save_name', type=str, default='vfeat_extracted.hdf5', help=' ') parser.add_argument('--image_dir', type=str, default='data/VQA_v2/images', help=' ') parser.add_argument('--densecap_dir', type=str, default='data/VQA_v2/densecap', help=' ') # hyper parameters parser.add_argument('--pretrained_param_path', type=str, default=None, required=True) # model parameters parser.add_argument('--batch_size', type=int, default=96, help=' ') parser.add_argument('--model_type', type=str, default='vfeat', help=' ', choices=['vfeat', 'resnet']) config = parser.parse_args() config.image_info_path = os.path.join(config.tf_record_memft_dir, 'image_info.json') config.save_path = os.path.join(config.tf_record_memft_dir, config.save_name) check_config(config) log.infov('loading image_info: {}'.format(config.image_info_path)) image_info = json.load(open(config.image_info_path, 'r')) config.image_id2idx = image_info['image_id2idx'] config.image_path2idx = image_info['image_path2idx'] config.image_num2path = image_info['image_num2path'] log.infov('done') dataset = dataset_vfeat.create_dataset(config.image_path2idx.keys(), config.image_dir, config.densecap_dir, is_train=False) config.dataset_config = dataset.get_config() extractor = Extractor(config, dataset) extractor.extract()
def eval(self, batch_iter, global_step=None, sess=None, generate_results=False): sess = sess or tf.get_default_session() global_step = global_step or tf.no_op() sess.run(self.eval_reset) result_json = [] for k, batch_chunk in enumerate(batch_iter): feed_dict = self.model.get_feed_dict(batch_chunk) feed_dict[self.model.train_flag] = False self.model.dropout_keep_prob_t.assign(1.0) pred, val_acc, _ = sess.run( [self.model.predictions, self.model.acc, self.eval_step], feed_dict=feed_dict) pred = pred.reshape(-1) if k % 5 == 0: current_accuracy, current_examples = sess.run( [self.accuracy, self.example_count]) log.infov( 'Evaluation step %d, current accuracy = %.3f (%d), acc = %.3f', k, current_accuracy, current_examples, val_acc) # SAMPLING if generate_results: for j, pred_j in enumerate(pred): cor = 0 if pred_j == batch_chunk['answer'][j]: cor = 1 result_json.append({ 'id': batch_chunk['ids'][j], 'pred': int(pred_j), 'ans': int(batch_chunk['answer'][j]), 'question': batch_chunk['debug_sent'][j], 'correct': cor }) loss, acc, sumstr_vloss, sumstr_vacc, current_step = \ sess.run([self.mean_loss, self.accuracy, self.summary_v_loss, self.summary_v_acc, global_step]) if self.summary_writer is not None: self.summary_writer.add_summary(sumstr_vloss, current_step) self.summary_writer.add_summary(sumstr_vacc, current_step) if generate_results: result_json_dict = {} for item in result_json: result_json_dict[item['id']] = item result_json = [] for k in sorted(result_json_dict.keys()): result_json.append(result_json_dict[k]) return [loss, acc, current_step, result_json]
def __init__(self, config): self.config = config self.vfeat_path = config.vfeat_path self.tf_record_dir = config.tf_record_dir self.train_dir = os.path.dirname(config.checkpoint) self.vlmap_word_weight_dir = os.path.join( self.train_dir, config.vlmap_word_weight_dir.split('/')[-1]) config.vlmap_word_weight_dir = self.vlmap_word_weight_dir self.batch_size = config.batch_size with tf.name_scope('datasets'): self.target_split = tf.placeholder(tf.string) with tf.name_scope('datasets/batch'): vqa_batch = { 'train': input_ops_vqa.create( self.batch_size, self.tf_record_dir, 'train', is_train=True, scope='train_ops', shuffle=True), 'val': input_ops_vqa.create( self.batch_size, self.tf_record_dir, 'val', is_train=True, scope='val_ops', shuffle=False), 'testval': input_ops_vqa.create( self.batch_size, self.tf_record_dir, 'testval', is_train=True, scope='testval_ops', shuffle=False), 'test': input_ops_vqa.create( self.batch_size, self.tf_record_dir, 'test', is_train=True, scope='test_ops', shuffle=False) } batch_opt = { tf.equal(self.target_split, 'train'): lambda: vqa_batch['train'], tf.equal(self.target_split, 'val'): lambda: vqa_batch['val'], tf.equal(self.target_split, 'testval'): lambda: vqa_batch['testval'], tf.equal(self.target_split, 'test'): lambda: vqa_batch['test'], } self.batch = tf.case( batch_opt, default=lambda: vqa_batch['train'], exclusive=True) # Model Model = self.get_model_class(config.model_type) log.infov('using model class: {}'.format(Model)) self.model = Model(self.batch, config, is_train=True) self.checkpoint_loader = tf.train.Saver(max_to_keep=1) session_config = tf.ConfigProto( allow_soft_placement=True, gpu_options=tf.GPUOptions(allow_growth=True), device_count={'GPU': 1}) self.session = tf.Session(config=session_config) self.ckpt_path = config.checkpoint if self.ckpt_path is not None: log.info('Checkpoint path: {}'.format(self.ckpt_path)) self.checkpoint_loader.restore(self.session, self.ckpt_path) log.info('Loaded the checkpoint') log.warn('Inference initialization is done')
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--model', type=str, choices=['AU', 'VA', 'BOTH'], default='BOTH') parser.add_argument('-is', '--img_size', type=int, choices=[32, 64, 96], default=32) parser.add_argument('-bs', '--batch_size', type=int, default=64) parser.add_argument('-lr', '--learning_rate', type=float, default=1e-4) parser.add_argument('-ur', '--update_rate', type=int, default=5) parser.add_argument('--lr_weight_decay', action='store_true', default=False) parser.add_argument('--max_steps', type=int, default=1000000, help='Maximum number of iterations') parser.add_argument('--model_save_step', type=int, default=1000, help='Frequency of model saving') parser.add_argument('--test_sample_step', type=int, default=100, help='Frequency of testing on the testing set') parser.add_argument( '--summary_save_step', type=int, default=1000, help='Frequency of saving the elements to the TF summary') parser.add_argument('--dump_result', action='store_true', help='If the images are saved') parser.add_argument('--checkpoint', type=str, default=None) config = parser.parse_args() import facemotion as dataset config.dataset = 'FACEMOTION' config.data_info = dataset.get_data_info(config.img_size, config.model) config.conv_info = dataset.get_conv_info(config.img_size) config.deconv_info = dataset.get_deconv_info(config.img_size) dataset_train, dataset_test = dataset.create_default_splits( config.img_size, config.model) log.infov("PARAMETERS OF THE MODEL") log.infov("model: %s, image_size: %s, batch_size: %s, learning_rate: %.2E, update_rate: %s", \ config.model, config.img_size, config.batch_size, config.learning_rate, config.update_rate) trainer = Trainer(config, dataset_train, dataset_test) trainer.train()
def run_evaluation(model, args, data_sets, out_dir): assert out_dir is not None # feed forward all outputs in the datset T = model.n_lstm_steps # T^T test generator should be different length.. print 'Test length : %d' % len(data_sets.valid) # XXX valid ret = model.generate( data_sets.valid, # XXX valid max_instances=int(np.divide(args.num_frames, T, dtype=float) + 1)) # e.g. max_instances 50 x batch_size (35) == 1750 frames are sampled pred_gazemaps = ret['pred_gazemaps'] = np.asarray(ret['pred_gazemap_list']) gt_gazemaps = ret['gt_gazemaps'] = np.asarray(ret['gt_gazemap_list']) images = ret['images'] = np.asarray(ret['images_list']) fixationmaps = ret['fixationmap_list'] # list, not ndarray import pdb pdb.set_trace() # dirty!! (IPC overhead slow) global fixationmaps_all fixationmaps_all = fixationmaps # DUMP ALL IMAGES n_images = len(pred_gazemaps) assert n_images == len(pred_gazemaps) == len(gt_gazemaps) == len( images) == len(fixationmaps) aggreagted_scores = defaultdict(list) with Parallel(n_jobs=args.jobs, verbose=10) as parallel: # each item in scores_aggregated is a {metric->float} dict scores_aggregated = parallel( delayed(handle_frame)(i, n_images, images[i], pred_gazemaps[i], gt_gazemaps[i], fixationmaps[i], out_dir) for i in xrange(n_images)) # synchronization barrier. scores_aggregated = list(scores_aggregated) for scores in scores_aggregated: # metric -> float map for metric, score in scores.iteritems(): aggreagted_scores[metric].append(score) # report aggregated score overall_txt_path = os.path.join(out_dir, 'overall.txt') with open(overall_txt_path, 'w') as fp: for metric, score_list in aggreagted_scores.iteritems(): mean_score = np.mean(score_list) log.infov("Average %s : %.4f" % (metric, mean_score)) fp.write("Average %s : %.4f\n" % (metric, mean_score)) for score in score_list: fp.write('%.3f ' % score) fp.write('\n') log.warn("Dumped at %s", overall_txt_path)
def report(self): # report L2 loss log.info("Computing scores...") score = {} score = [] for id, pred, gt in zip(self._ids, self._predictions, self._groundtruths): score.append(self.compute_accuracy(pred, gt)) avg = np.average(score) log.infov("Average accuracy : %.4f", avg*100)
def set_eval_dir(self, config): self.checkpoint = config.checkpoint self.eval_dir = config.checkpoint + '_eval_{}'.format(self.split) if self.dump_heavy_output: self.eval_dir += '_dump_heavy' self.eval_dir += '_{}'.format(time.strftime("%Y%m%d-%H%M%S")) if not os.path.exists(self.eval_dir): os.makedirs(self.eval_dir) log.infov("Eval Dir: %s", self.eval_dir) self.save_hdf5 = os.path.join(self.eval_dir, 'results.hdf5') self.save_pkl = os.path.join(self.eval_dir, 'results.pkl')
def evaluate( pred_list, labels_list, model, use_gazemap, attention, count='', n_jobs=9, ): log.info('Evaluation with %d parallel jobs' % (n_jobs)) aggregated_scores = defaultdict(list) len_data = len(pred_list) with Parallel(n_jobs=n_jobs, verbose=10) as parallel: scores_aggregated = parallel( delayed(evaluate_helper)(pred_list[i], labels_list[i]) for i in xrange(len_data)) scores_aggregated = list(scores_aggregated) for scores in scores_aggregated: for metric, score in scores.iteritems(): aggregated_scores[metric].append(score) # To do - out_dir = "/home/amelie/gazecap/models/scores/" model_out_dir = os.path.join(out_dir, model) if not os.path.exists(model_out_dir): os.mkdir(model_out_dir) if use_gazemap: use_gazemap = 'gazemap' elif attention: use_gazemap = 'c3d_attention' else: use_gazemap = '' overall_txt_paths = os.path.join( model_out_dir, 'overall_' + model + '_' + use_gazemap + '%s.txt' % count) overall_avg_paths = os.path.join( model_out_dir, 'overall_avg_' + model + '_' + use_gazemap + '%s.txt' % count) with open(overall_avg_paths, 'w') as fp1: with open(overall_txt_paths, 'w') as fp: for metric, score_list in aggregated_scores.iteritems(): #metric = 'average-precision' mean_score = np.mean(score_list) log.infov("Average %s : %.4f\n" % (metric, mean_score)) #fp.write("Average %s : %.4f\n" % (metric, mean_score)) fp1.write("Average %s : %.4f\n" % (metric, mean_score)) for scroe in score_list: #import pdb; pdb.set_trace() fp.write('%.3f' % scroe) fp.write('\n') log.warn("Dumped at %s", model_out_dir) return aggregated_scores
def train(self): log.infov("Training Starts!") output_save_step = 1000 self.session.run(self.global_step.assign(0)) # reset global step from load import generate_sample_data x_train, mus_star, log_vars_star, w_star = generate_sample_data( k=self.config.n_components, t=self.config.t) n_train = len(x_train) n_plot = 0 for n_updates in range(1, 1 + self.config.max_steps): batch_x = x_train[np.random.choice(n_train, self.config.batch_size, replace=False)] step, summary, step_time = self.run_single_step(batch_x) self.summary_writer.add_summary(summary, global_step=step) if n_updates == 1 or n_updates % 500 == 0: samples, xvals, probs, probs_star, mode_dist, mean_diff, var_ratio = \ self.evaluate_step(x_train, mus_star, log_vars_star, w_star, sample_size=500, xlim= self.config.t*self.config.n_components+5) prefix = '%d,%d,%s,%s' % (n_updates, self.config.n_components, self.config.kernel, repr(self.config.temperature)) print(prefix + ',' + repr(mode_dist) + ',' + repr(mean_diff) + ',' + repr(var_ratio)) # plot the density function at the end if n_updates == 1 or n_updates == self.config.max_steps: _, ax1 = plt.subplots() plt.plot(xvals, probs_star, '-r', color='r', linewidth=2) plt.plot(xvals, np.mean(probs, axis=0), '-b', linewidth=2) plt.fill_between(xvals, probs_star, alpha=0.5, color='tomato') plt.fill_between(xvals, np.mean(probs, axis=0), alpha=0.5, color='dodgerblue') #for kk in range(self.config.n_components): # plt.plot(xvals, probs[kk], '--k', linewidth=1) plt.ylim(0.0, 0.14) plt.savefig('%s/step_%d.png' % (self.res_dir, n_updates)) plt.close() n_plot += 1 # save model at the end self.saver.save(self.session, os.path.join(self.res_dir, 'model'), global_step=step)
def __init__(self, config, dataset): self.config = config self.train_dir = config.train_dir log.info("self.train_dir = %s", self.train_dir) # --- input ops --- self.batch_size = config.batch_size self.dataset = dataset check_data_id(dataset, config.data_id) _, self.batch = create_input_ops(dataset, self.batch_size, data_id=config.data_id, is_training=False, shuffle=False) # --- create model --- Model = self.get_model_class(config.model) log.infov("Using Model class : %s", Model) self.model = Model(config) self.global_step = tf.contrib.framework.get_or_create_global_step(graph=None) self.step_op = tf.no_op(name='step_no_op') tf.set_random_seed(1234) session_config = tf.ConfigProto( allow_soft_placement=True, gpu_options=tf.GPUOptions(allow_growth=True), device_count={'GPU': 1}, ) self.session = tf.Session(config=session_config) # --- checkpoint and monitoring --- self.saver = tf.train.Saver(max_to_keep=100) self.checkpoint_path = config.checkpoint_path if self.checkpoint_path is None and self.train_dir: self.checkpoint_path = tf.train.latest_checkpoint(self.train_dir) if self.checkpoint_path is None: log.warn("No checkpoint is given. Just random initialization :-)") self.session.run(tf.global_variables_initializer()) else: log.info("Checkpoint path : %s", self.checkpoint_path) mean_std = np.load('../DatasetCreation/VG/mean_std.npz') self.img_mean = mean_std['img_mean'] self.img_std = mean_std['img_std'] self.coords_mean = mean_std['coords_mean'] self.coords_std = mean_std['coords_std']
def report(self): # report L2 loss log.info("Computing scores...") score = {} score['l2_loss'] = [] for id, pred, gt in zip(self._ids, self._predictions, self._groundtruths): score['l2_loss'].extend(self.compute_l2error(pred, gt)) avg_l2loss = np.average(score['l2_loss']) log.infov("Average L2 loss : %.5f", avg_l2loss)
def create_train_op(self, loss_tensor, params, learning_rate, global_step=None, gradient_summary_tag_name='gradient_norm'): if learning_rate == 0: return tf.no_op() # ToDo: add nromal gradient # optimizer if self.config.optimization_method == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate) elif self.config.optimization_method == 'rmsprop': optimizer = tf.train.RMSPropOptimizer(learning_rate, momentum=0.9) elif self.config.optimization_method == 'sgd': optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9) else: raise ValueError('Invalid optimization method!') # compute gradients gradients = tf.gradients( loss_tensor, params, aggregation_method=2 # see issue #492 ) if all(v is None for v in gradients): # in some cases, we are opted not to train some sub-networks at all. return tf.no_op() # use gradient clipping as well. if self.max_grad_norm > 0: clipped_grads, norm = tf.clip_by_global_norm( gradients, self.max_grad_norm) else: clipped_grads, norm = gradients, 0 tf.summary.scalar(gradient_summary_tag_name, norm) clipped_grad_and_vars = list(zip(clipped_grads, params)) train_op = optimizer.apply_gradients(clipped_grad_and_vars, global_step=global_step) # with some debugging information. total_num_elements = 0 for var in params: log.debug(" model param %s : %s (total %d)", var.name, var.get_shape().as_list(), var.get_shape().num_elements()) total_num_elements += var.get_shape().num_elements() log.infov("Total # of parameters in the train_op : %d", total_num_elements) # TODO learning rate might be overriden afterwards (e.g. checkpoint) return train_op
def run_evaluation(current_step): global checkpoint_file if FLAGS.test_phase: test_size = len(test_dataset.ids) dev_iter = test_dataset.batch_iter(1, FLAGS.batch_size, shuffle=False) else: test_size = len(val_dataset.ids) dev_iter = val_dataset.batch_iter(1, FLAGS.batch_size, shuffle=False) mean_loss, acc, _, result_json = evaluator.eval( dev_iter, test_size=test_size, global_step=trainer.global_step, generate_results=FLAGS.generate_results) log.info((" [{split_mode:5} step {step:4d}] " + "Dev mean_loss: {mean_loss:.5f}, " + "acc: {acc:.5f}").format(split_mode='Dev', step=current_step, mean_loss=mean_loss, acc=acc)) if FLAGS.test_phase: # dump result into JSON result_json_path = os.path.join( os.path.dirname(FLAGS.checkpoint_path), "%s_%s_results.json" % (FLAGS.task.lower(), FLAGS.name.lower())) with open(result_json_path, 'w') as f: json.dump(result_json, f, sort_keys=True, indent=4, separators=(',', ': ')) log.infov("Dumped result into : %s", result_json_path) else: if FLAGS.task == 'Count': checkpoint_file = os.path.join( os.path.dirname(checkpoint_file), str(mean_loss) + "_model.ckpt") else: checkpoint_file = os.path.join( os.path.dirname(checkpoint_file), str(acc) + "_model.ckpt") save_path = saver.save(sess, checkpoint_file, global_step=trainer.global_step) log.info("Saved {}".format(save_path))