def plot_final_scores(): ''' Plot the scores ''' font = { 'size' : 12 } mpl.rc('font', **font) fig, ax = plt.subplots( nrows=1, ncols=1, figsize=(7,4) ) # create figure & 1 axis outfiles = [ RESULT_DIR + 'seq2seq_sample_imagenet_%s_iter_20000.json', RESULT_DIR + 'seq2seq_teacher_imagenet_%s_iter_5000.json', RESULT_DIR + '%s_stop_agent.json', RESULT_DIR + '%s_random_agent.json' ] for split in ['val_seen']: ev = Evaluation([split]) for i,outfile in enumerate(outfiles): score_summary,scores = ev.score(outfile % split) if i == 1: method = 'Teacher-forcing' ax.hist(scores['nav_errors'], bins=range(0,30,3), label=method, normed=True, histtype = 'step', linewidth=2.5, color='C1') elif i == 0: method = 'Student-forcing' ax.hist(scores['nav_errors'], bins=range(0,30,3), label=method, alpha=0.7, normed=True, color='C0') elif i == 2: method = 'Start locations' ax.hist(scores['nav_errors'], bins=range(0,30,3), label=method, normed=True, histtype = 'step', linewidth=2.5, color='C3') elif i == 3: method = 'Random agent' ax.hist(scores['nav_errors'], bins=range(0,30,3), label=method, normed=True, histtype = 'step', linewidth=2.5, color='C2') ax.set_title('Val Seen Navigation Error') ax.set_xlabel('Error (m)') ax.set_ylabel('Frequency') ax.set_ylim([0,0.14]) ax.set_xlim([0,30]) plt.axvline(x=3, color='black', linestyle='--') legend = ax.legend(loc='upper right') plt.tight_layout() plt.savefig('%s/val_seen_error.png' % (PLOT_DIR)) plt.close(fig)
def train_val(): ''' Train on the training set, and validate on seen and unseen splits. ''' # Set which GPU to use device = torch.device('cuda', hparams.device_id) # Load hyperparameters from checkpoint (if exists) if os.path.exists(hparams.load_path): print('Load model from %s' % hparams.load_path) ckpt = load(hparams.load_path, device) start_iter = ckpt['iter'] else: if not hparams.forward_agent and not hparams.random_agent and not hparams.shortest_agent: if hasattr(hparams, 'load_path') and hasattr(hparams, 'eval_only') and hparams.eval_only: sys.exit('load_path %s does not exist!' % hparams.load_path) ckpt = None start_iter = 0 end_iter = hparams.n_iters if not hasattr(hparams, 'ask_baseline'): hparams.ask_baseline = None if not hasattr(hparams, 'instruction_baseline'): hparams.instruction_baseline = None # Set random seeds torch.manual_seed(hparams.seed) torch.cuda.manual_seed(hparams.seed) np.random.seed(hparams.seed) random.seed(hparams.seed) # Create or load vocab train_vocab_path = os.path.join(hparams.data_path, 'vocab.txt') if not os.path.exists(train_vocab_path): raise Exception('Vocab file not found at %s' % train_vocab_path) vocab = read_vocab([train_vocab_path]) hparams.instr_padding_idx = vocab.index('<PAD>') tokenizer = Tokenizer(vocab=vocab, encoding_length=hparams.max_instr_len) if hparams.encoder_type == 'dic': tokenizer = BTokenizer(vocab=vocab,encoding_length=hparams.max_instr_len) featurizer = ImageFeatures(hparams.img_features, device) simulator = Simulator(hparams) # Create train environment train_env = Batch(hparams, simulator, featurizer, tokenizer, split='train') # Create validation environments val_splits = ['val_seen', 'val_unseen'] eval_mode = hasattr(hparams, 'eval_only') and hparams.eval_only if eval_mode: if 'val_seen' in hparams.load_path: val_splits = ['test_seen'] elif 'val_unseen' in hparams.load_path: val_splits = ['test_unseen'] else: val_splits = ['test_seen', 'test_unseen'] end_iter = start_iter + 1 if hparams.eval_on_val: val_splits = [x.replace('test_', 'val_') for x in val_splits] val_envs_tmp = { split: ( Batch(hparams, simulator, featurizer, tokenizer, split=split), Evaluation(hparams, [split], hparams.data_path)) for split in val_splits } val_envs = {} for key, value in val_envs_tmp.items(): if '_seen' in key: val_envs[key + '_env_seen_anna'] = value val_envs[key + '_env_unseen_anna'] = value else: assert '_unseen' in key val_envs[key] = value # Build model and optimizer model = AgentModel(len(vocab), hparams, device).to(device) optimizer = optim.Adam(model.parameters(), lr=hparams.lr, weight_decay=hparams.weight_decay) best_metrics = { env_name : -1 for env_name in val_envs.keys() } best_metrics['combined'] = -1 # Load model paramters from checkpoint (if exists) if ckpt is not None: model.load_state_dict(ckpt['model_state_dict']) optimizer.load_state_dict(ckpt['optim_state_dict']) best_metrics = ckpt['best_metrics'] train_env.ix = ckpt['data_idx'] if hparams.log_every == -1: hparams.log_every = round(len(train_env.data) / \ (hparams.batch_size * 100)) * 100 print('') pprint(vars(hparams), width=1) print('') print(model) print('Number of parameters:', sum(p.numel() for p in model.parameters() if p.requires_grad)) if hparams.random_agent or hparams.forward_agent or hparams.shortest_agent: assert eval_mode agent = SimpleAgent(hparams) else: agent = VerbalAskAgent(model, hparams, device) return train(train_env, val_envs, agent, model, optimizer, start_iter, end_iter, best_metrics, eval_mode)
def test(cfg, dataLoader, model, models_info=None, models_vtx=None): model.eval() if cfg.pytorch.exp_mode in ['val']: from eval import Evaluation Eval = Evaluation(cfg.pytorch, models_info, models_vtx) elif cfg.pytorch.exp_mode == 'test': csv_file = open(cfg.pytorch.save_csv_path, 'w') fieldnames = ['scene_id', 'im_id', 'obj_id', 'score', 'R', 't', 'time'] csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames) csv_writer.writeheader() rst_collect = [] preds = {} nIters = len(dataLoader) bar = Bar('{}_{}'.format(cfg.pytorch.dataset, cfg.pytorch.object), max=nIters) wall_time = 0 for i, (input, pose, bbox, center, size, clsIdx, imgPath, scene_id, image_id, score) in enumerate(dataLoader): # input_var = input.cuda(cfg.pytorch.gpu, async=True).float().cuda(cfg.pytorch.gpu) input_var = input.cuda(cfg.pytorch.gpu, non_blocking=True).float().cuda(cfg.pytorch.gpu) batch_size = len(input) if cfg.pytorch.dataset.lower() == 'tless' or cfg.pytorch.dataset.lower( ) == 'itodd': # camera_matrix vary with images in TLESS & ITODD K = np.array(imgPath).reshape( 3, 3) # 'imgPath' in TLESS & ITODD is camera_matrix # time begin T_begin = time.time() output_conf, output_coor_x, output_coor_y, output_coor_z = model( input_var) output_coor_x = output_coor_x.data.cpu().numpy().copy() output_coor_y = output_coor_y.data.cpu().numpy().copy() output_coor_z = output_coor_z.data.cpu().numpy().copy() outConf = output_conf.data.cpu().numpy().copy() output_trans = np.zeros(batch_size) collector = list( zip(clsIdx.numpy(), output_coor_x, output_coor_y, output_coor_z, outConf, pose.numpy(), bbox.numpy(), center.numpy(), size.numpy(), input.numpy(), scene_id.numpy(), image_id.numpy(), score.numpy())) colLen = len(collector) for idx in range(colLen): clsIdx_, output_coor_x_, output_coor_y_, output_coor_z_, output_conf_, pose_gt, bbox_, center_, size_, input_, scene_id_, image_id_, score_ = collector[ idx] if cfg.pytorch.dataset.lower() == 'lmo': cls = ref.lmo_id2obj[int(clsIdx_)] elif cfg.pytorch.dataset.lower() == 'tless': cls = ref.tless_id2obj[int(clsIdx_)] elif cfg.pytorch.dataset.lower() == 'ycbv': cls = ref.ycbv_id2obj[int(clsIdx_)] elif cfg.pytorch.dataset.lower() == 'tudl': cls = ref.tudl_id2obj[int(clsIdx_)] elif cfg.pytorch.dataset.lower() == 'hb': cls = ref.hb_id2obj[int(clsIdx_)] elif cfg.pytorch.dataset.lower() == 'icbin': cls = ref.icbin_id2obj[clsIdx_] elif cfg.pytorch.dataset.lower() == 'itodd': cls = ref.itodd_id2obj[int(clsIdx_)] select_pts_2d = [] select_pts_3d = [] center_h = center_[0] center_w = center_[1] size_ = int(size_) output_coor_x_ = output_coor_x_.squeeze() output_coor_y_ = output_coor_y_.squeeze() output_coor_z_ = output_coor_z_.squeeze() output_coor_ = np.stack([ np.argmax(output_coor_x_, axis=0), np.argmax(output_coor_y_, axis=0), np.argmax(output_coor_z_, axis=0) ], axis=2) output_coor_[output_coor_ == cfg.network.coor_bin] = 0 output_coor_ = 2.0 * output_coor_ / float(cfg.network.coor_bin - 1) - 1.0 output_coor_[:, :, 0] = output_coor_[:, :, 0] * abs( models_info[clsIdx_]['min_x']) output_coor_[:, :, 1] = output_coor_[:, :, 1] * abs( models_info[clsIdx_]['min_y']) output_coor_[:, :, 2] = output_coor_[:, :, 2] * abs( models_info[clsIdx_]['min_z']) output_conf_ = np.argmax(output_conf_, axis=0) output_conf_ = (output_conf_ - output_conf_.min()) / ( output_conf_.max() - output_conf_.min()) min_x = 0.001 * abs(models_info[clsIdx_]['min_x']) min_y = 0.001 * abs(models_info[clsIdx_]['min_y']) min_z = 0.001 * abs(models_info[clsIdx_]['min_z']) w_begin = center_w - size_ / 2. h_begin = center_h - size_ / 2. w_unit = size_ * 1.0 / cfg.dataiter.rot_output_res h_unit = size_ * 1.0 / cfg.dataiter.rot_output_res output_conf_ = output_conf_.tolist() output_coor_ = output_coor_.tolist() for x in range(cfg.dataiter.rot_output_res): for y in range(cfg.dataiter.rot_output_res): if output_conf_[x][y] < cfg.test.mask_threshold: continue if abs(output_coor_[x][y][0]) < min_x and abs(output_coor_[x][y][1]) < min_y and \ abs(output_coor_[x][y][2]) < min_z: continue select_pts_2d.append( [w_begin + y * w_unit, h_begin + x * h_unit]) select_pts_3d.append(output_coor_[x][y]) model_points = np.asarray(select_pts_3d, dtype=np.float32) image_points = np.asarray(select_pts_2d, dtype=np.float32) try: if cfg.pytorch.dataset.lower( ) == 'tless' or cfg.pytorch.dataset.lower( ) == 'itodd': # camera_matrix vary with images in TLESS & ITODD _, R_vector, T_vector, inliers = cv2.solvePnPRansac( model_points, image_points, K, np.zeros((4, 1)), flags=cv2.SOLVEPNP_EPNP) else: _, R_vector, T_vector, inliers = cv2.solvePnPRansac( model_points, image_points, cfg.pytorch.camera_matrix, np.zeros((4, 1)), flags=cv2.SOLVEPNP_EPNP) cur_wall_time = time.time() - T_begin wall_time += cur_wall_time R_matrix = cv2.Rodrigues(R_vector, jacobian=0)[0] if R_matrix[0, 0] == 1.0: continue if cfg.pytorch.exp_mode == 'val': pose_est = np.concatenate( (R_matrix, np.asarray(T_vector).reshape(3, 1)), axis=1) Eval.pose_est_all[cls].append(pose_est) Eval.pose_gt_all[cls].append(pose_gt) Eval.num[cls] += 1 Eval.numAll += 1 elif cfg.pytorch.exp_mode == 'test': rst = { 'scene_id': int(scene_id_), 'im_id': int(image_id_), 'R': R_matrix.reshape(-1).tolist(), 't': T_vector.reshape(-1).tolist(), 'score': float(score_), 'obj_id': int(clsIdx), 'time': cur_wall_time } rst_collect.append(rst) except: if cfg.pytorch.exp_mode in ['val']: Eval.num[cls] += 1 Eval.numAll += 1 Bar.suffix = '{0} [{1}/{2}]| Total: {total:} | ETA: {eta:}'.format( cfg.pytorch.exp_mode, i, nIters, total=bar.elapsed_td, eta=bar.eta_td) bar.next() if cfg.pytorch.exp_mode == 'val': Eval.evaluate_pose() elif cfg.pytorch.exp_mode == 'test': for item in rst_collect: csv_writer.writerow(item) csv_file.close() print("Wall time of object {}: total {} seconds for {} samples".format( cfg.pytorch.object, wall_time, nIters)) bar.finish()
from eval import Evaluation # import nltk # emb_path='D:\\IOM\\word2vec\\GoogleNews-vectors-negative300.bin' # import jieba emb_path = 'D:\\IOM\\word2vec\\merge_sgns_bigram_char300.bin' from gensim.models import KeyedVectors wv_from_bin = KeyedVectors.load_word2vec_format(emb_path, binary=True) eval_class = Evaluation('', wv_from_bin) sep2 = '*#*' sep1 = '|||' def cut_triples(line): global notriple line = line.strip() triples = [] for triple_str in line.split(sep2): triple_es = triple_str.split(sep1) # #没有三元组的修正 # if len(triple_es)>3: # return [] triples.append(triple_es) return triples args = [(0, 100)] for arg in args: print(arg) begin = arg[0] end = arg[1]
def train_val(): ''' Train on the training set, and validate on seen and unseen splits. ''' # args.fast_train = True setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) feat_dict = read_img_features(features) # load object feature obj_s_feat = None if args.sparseObj: print("Start loading the object sparse feature") start = time.time() obj_s_feat = np.load(sparse_obj_feat, allow_pickle=True).item() print( "Finish Loading the object sparse feature from %s in %0.4f seconds" % (sparse_obj_feat, time.time() - start)) obj_d_feat = None if args.denseObj: print("Start loading the object dense feature") start = time.time() obj_d_feat1 = np.load(dense_obj_feat1, allow_pickle=True).item() obj_d_feat2 = np.load(dense_obj_feat2, allow_pickle=True).item() obj_d_feat = {**obj_d_feat1, **obj_d_feat2} print( "Finish Loading the dense object dense feature from %s and %s in %0.4f seconds" % (dense_obj_feat1, dense_obj_feat2, time.time() - start)) featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) train_env = R2RBatch(feat_dict, obj_d_feat=obj_d_feat, obj_s_feat=obj_s_feat, batch_size=args.batchSize, splits=['train'], tokenizer=tok) from collections import OrderedDict val_env_names = ['val_unseen', 'val_seen'] if args.submit: val_env_names.append('test') else: pass #val_env_names.append('train') if not args.beam: val_env_names.append("train") val_envs = OrderedDict(((split, (R2RBatch(feat_dict, obj_d_feat=obj_d_feat, obj_s_feat=obj_s_feat, batch_size=args.batchSize, splits=[split], tokenizer=tok), Evaluation([split], featurized_scans, tok))) for split in val_env_names)) if args.train == 'listener': train(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'validlistener': if args.beam: beam_valid(train_env, tok, val_envs=val_envs) else: valid(train_env, tok, val_envs=val_envs) elif args.train == 'speaker': train_speaker(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'validspeaker': valid_speaker(tok, val_envs) else: assert False
class TuneTrainable(Trainable): def _setup(self, config): inject_tuned_hyperparameters(config, config) os.chdir(os.path.dirname(os.path.realpath(__file__))) print('Trainable got the following config after injection', config) self.config = config self.device = self.config['device'] self.exp, self.model, self.train_dataloader, self.eval_dataloader = setup_training( self.config) self.exp.set_name(config['experiment_name'] + self._experiment_id) self.exp_name = config['experiment_name'] + self._experiment_id self.exp.send_notification(title='Experiment ' + str(self._experiment_id) + ' ended') self.train_data_iter = iter(self.train_dataloader) self.model = self.model.to(self.device) self.model.train() n_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad) log_dict = flatten_dict(config) log_dict.update({'trainable_params': n_params}) self.exp.log_parameters(log_dict) self.optimizers = get_optimizers(self.model, self.config) self.evaluator = Evaluation(self.eval_dataloader, self.config) self.num_examples = 0 self.batch_idx = 0 self.epoch = 1 self.ewma = EWMA(beta=0.75) self.last_accu = -1.0 self.max_accu = -1.0 self.back_prop_every_n_batches = config['training'][ 'back_prop_every_n_batches'] self.checkpoint_best = config['training']['checkpoint_best'] def get_batch(self): try: batch = next(self.train_data_iter) return batch except StopIteration: self.train_data_iter = iter(self.train_dataloader) batch = next(self.train_data_iter) self.batch_idx = 0 self.epoch += 1 return batch def _train(self): total_log_step_loss = 0 total_log_step_train_accu = 0 total_log_step_n = 0 [opt.zero_grad() for opt in self.optimizers] while True: batch = self.get_batch() self.batch_idx += 1 self.num_examples += len(batch[0]) batch = (batch[0].to(self.device), batch[1].to(self.device)) loss, train_accu = training_step( batch, self.model, self.optimizers, step=(self.batch_idx % self.back_prop_every_n_batches == 0)) total_log_step_loss += loss.cpu().detach().numpy() total_log_step_train_accu += train_accu total_log_step_n += 1 if self.batch_idx % self.config['training'][ 'log_every_n_batches'] == 0: avg_loss = total_log_step_loss / total_log_step_n avg_accu = total_log_step_train_accu / total_log_step_n total_log_step_n = 0 print(f'{Fore.YELLOW}Total number of seen examples:', self.num_examples, 'Average loss of current log step:', avg_loss, 'Average train accuracy of current log step:', avg_accu, f"{Style.RESET_ALL}") self.exp.log_metric('train_loss', avg_loss, step=self.num_examples, epoch=self.epoch) self.exp.log_metric('train_accuracy', avg_accu, step=self.num_examples, epoch=self.epoch) total_log_step_loss = 0 total_log_step_train_accu = 0 if (self.batch_idx + 1) % self.config['training']['eval_every_n_batches'] == 0: results, assets, image_fns = self.evaluator.eval_model( self.model) print(self.config['tune']['discriminating_metric'], results[self.config['tune']['discriminating_metric']]) self.exp.log_metrics(results, step=self.num_examples, epoch=self.epoch) [ self.exp.log_asset_data(asset, step=self.num_examples) for asset in assets ] [ self.exp.log_image(fn, step=self.num_examples) for fn in image_fns ] accu_diff_avg = abs( results[self.config['tune']['discriminating_metric']] - self.ewma.get()) accu_diff_cons = abs( results[self.config['tune']['discriminating_metric']] - self.last_accu) no_change_in_accu = 1 if accu_diff_avg < 0.0005 and accu_diff_cons < 0.002 and self.num_examples > 70000 else 0 self.ewma.update( results[self.config['tune']['discriminating_metric']]) self.last_accu = results[self.config['tune'] ['discriminating_metric']] if self.max_accu < results[self.config['tune'] ['discriminating_metric']]: self.max_accu = results[self.config['tune'] ['discriminating_metric']] if self.checkpoint_best: self.save_checkpoint('checkpoints', self.exp_name + '.pt') print( f'{Fore.GREEN}New best model saved.{Style.RESET_ALL}' ) self.exp.log_metric('max_accuracy', self.max_accu, step=self.num_examples, epoch=self.epoch) training_results = { self.config['tune']['discriminating_metric']: self.max_accu, 'num_examples': self.num_examples, 'no_change_in_accu': no_change_in_accu } return training_results def _save(self, checkpoint_dir): return self.save_checkpoint(checkpoint_dir, 'checkpoint_file.pt') def save_checkpoint(self, checkpoint_dir, fname='checkpoint_file.pt'): print(f'{Fore.CYAN}Saving model ...{Style.RESET_ALL}') save_dict = {'model_state_dict': self.model.state_dict()} for i, optimizer in enumerate(self.optimizers): save_dict['op_' + str(i) + '_state_dict'] = optimizer.state_dict() torch.save(save_dict, os.path.join(checkpoint_dir, fname)) return os.path.join(checkpoint_dir, fname) def _restore(self, checkpoint_path): checkpoint = torch.load(checkpoint_path) self.model.load_state_dict(checkpoint['model_state_dict']) for i, optimizer in enumerate(self.optimizers): optimizer.load_state_dict(checkpoint['op_' + str(i) + '_state_dict']) def stop(self): results, assets, image_fns = self.evaluator.eval_model( self.model, finished_training=True) self.exp.log_metrics(results, step=self.num_examples, epoch=self.epoch) [ self.exp.log_asset_data(asset, step=self.num_examples) for asset in assets ] [self.exp.log_image(fn, step=self.num_examples) for fn in image_fns] return super().stop()
def main(opts): # set manual_seed and build vocab setup(opts, opts.seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # create a batch training environment that will also preprocess text vocab = read_vocab(opts.train_vocab) tok = Tokenizer(opts.remove_punctuation == 1, opts.reversed == 1, vocab=vocab, encoding_length=opts.max_cap_length) # create language instruction encoder encoder_kwargs = { 'opts': opts, 'vocab_size': len(vocab), 'embedding_size': opts.word_embedding_size, 'hidden_size': opts.rnn_hidden_size, 'padding_idx': padding_idx, 'dropout_ratio': opts.rnn_dropout, 'bidirectional': opts.bidirectional == 1, 'num_layers': opts.rnn_num_layers } print('Using {} as encoder ...'.format(opts.lang_embed)) if 'lstm' in opts.lang_embed: encoder = EncoderRNN(**encoder_kwargs) else: raise ValueError('Unknown {} language embedding'.format(opts.lang_embed)) print(encoder) # create policy model policy_model_kwargs = { 'opts':opts, 'img_fc_dim': opts.img_fc_dim, 'img_fc_use_batchnorm': opts.img_fc_use_batchnorm == 1, 'img_dropout': opts.img_dropout, 'img_feat_input_dim': opts.img_feat_input_dim, 'rnn_hidden_size': opts.rnn_hidden_size, 'rnn_dropout': opts.rnn_dropout, 'max_len': opts.max_cap_length, 'max_navigable': opts.max_navigable } if opts.arch == 'self-monitoring': model = SelfMonitoring(**policy_model_kwargs) elif opts.arch == 'speaker-baseline': model = SpeakerFollowerBaseline(**policy_model_kwargs) else: raise ValueError('Unknown {} model for seq2seq agent'.format(opts.arch)) print(model) encoder = encoder.to(device) model = model.to(device) params = list(encoder.parameters()) + list(model.parameters()) optimizer = torch.optim.Adam(params, lr=opts.learning_rate) # optionally resume from a checkpoint if opts.resume: model, encoder, optimizer, best_success_rate = resume_training(opts, model, encoder, optimizer) # if a secondary exp name is specified, this is useful when resuming from a previous saved # experiment and save to another experiment, e.g., pre-trained on synthetic data and fine-tune on real data if opts.exp_name_secondary: opts.exp_name += opts.exp_name_secondary feature, img_spec = load_features(opts.img_feat_dir) if opts.test_submission: assert opts.resume, 'The model was not resumed before running for submission.' test_env = ('test', (R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, splits=['test'], tokenizer=tok), Evaluation(['test']))) agent_kwargs = { 'opts': opts, 'env': test_env[1][0], 'results_path': "", 'encoder': encoder, 'model': model, 'feedback': opts.feedback } agent = PanoSeq2SeqAgent(**agent_kwargs) # setup trainer trainer = PanoSeq2SeqTrainer(opts, agent, optimizer) epoch = opts.start_epoch - 1 trainer.eval(epoch, test_env) return # set up R2R environments if not opts.train_data_augmentation: train_env = R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed, splits=['train'], tokenizer=tok) else: train_env = R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed, splits=['synthetic'], tokenizer=tok) val_envs = {split: (R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, splits=[split], tokenizer=tok), Evaluation([split])) for split in ['val_seen', 'val_unseen']} # create agent agent_kwargs = { 'opts': opts, 'env': train_env, 'results_path': "", 'encoder': encoder, 'model': model, 'feedback': opts.feedback } agent = PanoSeq2SeqAgent(**agent_kwargs) # setup trainer trainer = PanoSeq2SeqTrainer(opts, agent, optimizer, opts.train_iters_epoch) if opts.eval_beam or opts.eval_only: success_rate = [] for val_env in val_envs.items(): success_rate.append(trainer.eval(opts.start_epoch - 1, val_env, tb_logger=None)) return # set up tensorboard logger tb_logger = set_tb_logger(opts.log_dir, opts.exp_name, opts.resume) best_success_rate = best_success_rate if opts.resume else 0.0 for epoch in range(opts.start_epoch, opts.max_num_epochs + 1): trainer.train(epoch, train_env, tb_logger) if epoch % opts.eval_every_epochs == 0: success_rate = [] for val_env in val_envs.items(): success_rate.append(trainer.eval(epoch, val_env, tb_logger)) success_rate_compare = success_rate[1] if is_experiment(): # remember best val_seen success rate and save checkpoint is_best = success_rate_compare >= best_success_rate best_success_rate = max(success_rate_compare, best_success_rate) print("--> Highest val_unseen success rate: {}".format(best_success_rate)) # save the model if it is the best so far save_checkpoint({ 'opts': opts, 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'encoder_state_dict': encoder.state_dict(), 'best_success_rate': best_success_rate, 'optimizer': optimizer.state_dict(), 'max_episode_len': opts.max_episode_len, }, is_best, checkpoint_dir=opts.checkpoint_dir, name=opts.exp_name) if opts.train_data_augmentation and epoch == opts.epochs_data_augmentation: train_env = R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed, splits=['train'], tokenizer=tok) print("--> Finished training")
def test(): print('current directory', os.getcwd()) os.chdir('..') print('current directory', os.getcwd()) # os.environ["CUDA_VISIBLE_DEVICES"] = "0" visible_gpu = "0" os.environ["CUDA_VISIBLE_DEVICES"] = visible_gpu args.name = 'SSM' args.attn = 'soft' args.train = 'listener' args.featdropout = 0.3 args.angle_feat_size = 128 args.feedback = 'sample' args.ml_weight = 0.2 args.sub_out = 'max' args.dropout = 0.5 args.optim = 'adam' args.lr = 3e-4 args.iters = 80000 args.maxAction = 15 args.batchSize = 4 args.target_batch_size = 4 args.pe_dim = 128 args.self_train = True args.aug = 'tasks/R2R/data/aug_paths.json' args.featdropout = 0.4 args.iters = 200000 if args.optim == 'rms': print("Optimizer: Using RMSProp") args.optimizer = torch.optim.RMSprop elif args.optim == 'adam': print("Optimizer: Using Adam") args.optimizer = torch.optim.Adam elif args.optim == 'sgd': print("Optimizer: sgd") args.optimizer = torch.optim.SGD TRAIN_VOCAB = 'tasks/R2R/data/train_vocab.txt' TRAINVAL_VOCAB = 'tasks/R2R/data/trainval_vocab.txt' IMAGENET_FEATURES = 'img_features/ResNet-152-imagenet.tsv' if args.features == 'imagenet': features = IMAGENET_FEATURES if args.fast_train: name, ext = os.path.splitext(features) features = name + "-fast" + ext print(args) def setup(): torch.manual_seed(1) torch.cuda.manual_seed(1) # Check for vocabs if not os.path.exists(TRAIN_VOCAB): write_vocab(build_vocab(splits=['train']), TRAIN_VOCAB) if not os.path.exists(TRAINVAL_VOCAB): write_vocab( build_vocab(splits=['train', 'val_seen', 'val_unseen']), TRAINVAL_VOCAB) # setup() vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) feat_dict = read_img_features(features) print('start extract keys...') featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) print('keys extracted...') val_envs = { split: R2RBatch(feat_dict, batch_size=args.batchSize, splits=[split], tokenizer=tok) for split in ['train', 'val_seen', 'val_unseen'] } evaluators = { split: Evaluation([split], featurized_scans, tok) for split in ['train', 'val_seen', 'val_unseen'] } learner = Learner(val_envs, "", tok, args.maxAction, process_num=4, max_node=17, visible_gpu=visible_gpu) learner.eval_init() ckpt = 'snap/%s/state_dict/ssm_ckpt' % args.name learner.load_eval(ckpt) results = learner.eval() loss_str = '' for key in results: evaluator = evaluators[key] result = results[key] score_summary, score_details = evaluator.score(result) loss_str += ", %s \n" % key for metric, val in score_summary.items(): loss_str += ', %s: %.3f' % (metric, val) loss_str += '\n' print(loss_str)
def train_val(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind, args): ''' Train on the training set, and validate on seen and unseen splits. ''' nav_graphs = setup(args.action_space, args.navigable_locs_path) # Create a batch training environment that will also preprocess text use_bert = (args.encoder_type in ['bert', 'vlbert']) # for tokenizer and dataloader if use_bert: tok = BTokenizer(MAX_INPUT_LENGTH) else: vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH) #train_env = R2RBatch(features, batch_size=batch_size, splits=['train'], tokenizer=tok, # path_type=path_type, history=history, blind=blind) feature_store = Feature(features, args.panoramic) train_env = R2RBatch(feature_store, nav_graphs, args.panoramic, args.action_space, batch_size=args.batch_size, splits=['train'], tokenizer=tok, path_type=path_type, history=history, blind=blind) # Creat validation environments #val_envs = {split: (R2RBatch(features, batch_size=batch_size, splits=[split], # tokenizer=tok, path_type=path_type, history=history, blind=blind), # Evaluation([split], path_type=path_type)) for split in ['val_seen', 'val_unseen']} val_envs = { split: (R2RBatch(feature_store, nav_graphs, args.panoramic, args.action_space, batch_size=args.batch_size, splits=[split], tokenizer=tok, path_type=path_type, history=history, blind=blind), Evaluation([split], path_type=path_type)) for split in ['val_seen', 'val_unseen'] } # Build models and train #enc_hidden_size = hidden_size//2 if bidirectional else hidden_size if args.encoder_type == 'vlbert': if args.pretrain_model_name is not None: print("Using the pretrained lm model from %s" % (args.pretrain_model_name)) encoder = DicEncoder(FEATURE_ALL_SIZE, args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm, args.vl_layers, args.la_layers, args.bert_type) premodel = DicAddActionPreTrain.from_pretrained( args.pretrain_model_name) encoder.bert = premodel.bert encoder.drop = nn.Dropout(p=args.dropout_ratio) encoder.bert._resize_token_embeddings( len(tok)) # remember to resize tok embedding size encoder.bert.update_lang_bert, encoder.bert.config.update_lang_bert = args.transformer_update, args.transformer_update encoder.bert.update_add_layer, encoder.bert.config.update_add_layer = args.update_add_layer, args.update_add_layer encoder = encoder.cuda() else: encoder = DicEncoder(FEATURE_ALL_SIZE, args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm, args.vl_layers, args.la_layers, args.bert_type).cuda() encoder.bert._resize_token_embeddings( len(tok)) # remember to resize tok embedding size elif args.encoder_type == 'bert': if args.pretrain_model_name is not None: print("Using the pretrained lm model from %s" % (args.pretrain_model_name)) encoder = BertEncoder(args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm, args.bert_type) premodel = BertForMaskedLM.from_pretrained( args.pretrain_model_name) encoder.bert = premodel.bert encoder.drop = nn.Dropout(p=args.dropout_ratio) encoder.bert._resize_token_embeddings( len(tok)) # remember to resize tok embedding size #encoder.bert.update_lang_bert, encoder.bert.config.update_lang_bert = args.transformer_update, args.transformer_update #encoder.bert.update_add_layer, encoder.bert.config.update_add_layer = args.update_add_layer, args.update_add_layer encoder = encoder.cuda() pdb.set_trace() else: encoder = BertEncoder(args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm, args.bert_type).cuda() encoder.bert._resize_token_embeddings(len(tok)) else: enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda() #decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), # action_embedding_size, args.hidden_size, args.dropout_ratio).cuda() ctx_hidden_size = args.enc_hidden_size * (2 if args.bidirectional else 1) if use_bert and not args.top_lstm: ctx_hidden_size = 768 decoder = R2RAttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), action_embedding_size, ctx_hidden_size, args.hidden_size, args.dropout_ratio, FEATURE_SIZE, args.panoramic, args.action_space, args.dec_h_type).cuda() train(train_env, encoder, decoder, n_iters, path_type, history, feedback_method, max_episode_len, MAX_INPUT_LENGTH, model_prefix, val_envs=val_envs, args=args)
def train_val(seed=None): ''' Train on the training set, and validate on seen and unseen splits. ''' # which GPU to use device = torch.device('cuda', hparams.device_id) # Resume from lastest checkpoint (if any) if os.path.exists(hparams.load_path): # only present in os if not first time. present in hparam but not in os ckpt = load(hparams.load_path, device) start_iter = ckpt['iter'] # iter is a key of ckpt object that gives start_iter # print("start_iter:") # print(start_iter) # input() else: if hasattr(args, 'load_path') and hasattr(args, 'eval_only') and args.eval_only: sys.exit('load_path %s does not exist!' % hparams.load_path) # exit only if no path and eval, can still train ckpt = None start_iter = 0 end_iter = hparams.n_iters # from config # Setup seed and read vocab setup(seed=seed) train_vocab_path = os.path.join(hparams.data_path, 'train_vocab.txt') if hasattr(hparams, 'external_main_vocab') and hparams.external_main_vocab: train_vocab_path = hparams.external_main_vocab # external_main_vocab likely from command line arg if present # verbal advisor means vocab is a list of navigation action for the agent. if 'verbal' in hparams.advisor: subgoal_vocab_path = os.path.join(hparams.data_path, hparams.subgoal_vocab) # data/asknav/verbal_hard_vocab.txt vocab = read_vocab([train_vocab_path, subgoal_vocab_path]) else: vocab = read_vocab([train_vocab_path]) tok = Tokenizer(vocab=vocab, encoding_length=hparams.max_input_length) # tokenize vocab # Create a training environment train_env = VNLABatch(hparams, split='train', tokenizer=tok) # Create validation environments val_splits = ['val_seen', 'val_unseen'] # eval_mode code eval_mode = hasattr(hparams, 'eval_only') and hparams.eval_only # if command line indicates eval and value of test seen/unseen if eval_mode: if '_unseen' in hparams.load_path: val_splits = ['test_unseen'] if '_seen' in hparams.load_path: val_splits = ['test_seen'] end_iter = start_iter + hparams.log_every # end # create object/dict containing envs, key is 'val_seen' or 'val_unseen' values are VNLABatch respectively. val_envs = { split: (VNLABatch(hparams, split=split, tokenizer=tok, from_train_env=train_env, traj_len_estimates=train_env.traj_len_estimates), Evaluation(hparams, [split], hparams.data_path)) for split in val_splits} # evaluate val for both seen and unseen # Build models model = AttentionSeq2SeqModel(len(vocab), hparams, device).to(device) optimizer = optim.Adam(model.parameters(), lr=hparams.lr, weight_decay=hparams.weight_decay) best_metrics = { 'val_seen' : -1, 'val_unseen': -1, 'combined' : -1 } # probably the best scores so far if ckpt has it # Load model parameters from a checkpoint (if any) if ckpt is not None: model.load_state_dict(ckpt['model_state_dict']) optimizer.load_state_dict(ckpt['optim_state_dict']) best_metrics = ckpt['best_metrics'] train_env.ix = ckpt['data_idx'] print('') pprint(vars(hparams), width=1) print('') print(model) # Initialize agent if 'verbal' in hparams.advisor: agent = VerbalAskAgent(model, hparams, device) elif hparams.advisor == 'direct': agent = AskAgent(model, hparams, device) # agent, as well as model (in attentionSeq2SeqModel), depends on whether the advisor is direct or hint / verbal # Train return train(train_env, val_envs, agent, model, optimizer, start_iter, end_iter, best_metrics, eval_mode) # eval mode has splits that gives different environments.
def train_val_augment(test_only=False): """ Train the listener with the augmented data """ setup() vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) feat_dict = read_img_features(features, test_only=test_only) if test_only: featurized_scans = None val_env_names = ['val_train_seen'] else: featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) val_env_names = ['val_train_seen', 'val_seen', 'val_unseen'] if not args.test_obj: print('Loading compact pano-caffe object features ... (~3 seconds)') import pickle as pkl with open('img_features/objects/pano_object_class.pkl', 'rb') as f_pc: pano_caffe = pkl.load(f_pc) else: pano_caffe = None aug_path = args.aug # Create the training environment train_env = R2RBatch(feat_dict, pano_caffe, batch_size=args.batchSize, splits=['train'], tokenizer=tok) aug_env = R2RBatch(feat_dict, pano_caffe, batch_size=args.batchSize, splits=[aug_path], tokenizer=tok, name='aug') stats = train_env.get_statistics() print("The training data_size is : %d" % train_env.size()) print("The average instruction length of the dataset is %0.4f." % (stats['length'])) print("The average action length of the dataset is %0.4f." % (stats['path'])) stats = aug_env.get_statistics() print("The augmentation data size is %d" % aug_env.size()) print("The average instruction length of the dataset is %0.4f." % (stats['length'])) print("The average action length of the dataset is %0.4f." % (stats['path'])) val_envs = { split: (R2RBatch(feat_dict, pano_caffe, batch_size=args.batchSize, splits=[split], tokenizer=tok), Evaluation([split], featurized_scans, tok)) for split in val_env_names } train(train_env, tok, args.iters, val_envs=val_envs, aug_env=aug_env)