def _setup(self, config): inject_tuned_hyperparameters(config, config) os.chdir(os.path.dirname(os.path.realpath(__file__))) print('Trainable got the following config after injection', config) self.config = config self.device = self.config['device'] self.exp, self.model, self.train_dataloader, self.eval_dataloader = setup_training( self.config) self.exp.set_name(config['experiment_name'] + self._experiment_id) self.exp_name = config['experiment_name'] + self._experiment_id self.exp.send_notification(title='Experiment ' + str(self._experiment_id) + ' ended') self.train_data_iter = iter(self.train_dataloader) self.model = self.model.to(self.device) self.model.train() n_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad) log_dict = flatten_dict(config) log_dict.update({'trainable_params': n_params}) self.exp.log_parameters(log_dict) self.optimizers = get_optimizers(self.model, self.config) self.evaluator = Evaluation(self.eval_dataloader, self.config) self.num_examples = 0 self.batch_idx = 0 self.epoch = 1 self.ewma = EWMA(beta=0.75) self.last_accu = -1.0 self.max_accu = -1.0 self.back_prop_every_n_batches = config['training'][ 'back_prop_every_n_batches'] self.checkpoint_best = config['training']['checkpoint_best']
def __init__(self, env, vocab_size, results_path, batch_size, episode_len=20): super(ActorCriticAgent, self).__init__(env, results_path) #For evaluation self.ev = Evaluation(['train']) #For navigation self.episode_len = episode_len self.losses = [] ''' Define instruction encoder ''' word_embedding_size = 256 hidden_size = 512 bidirectional = False dropout_ratio = 0.5 enc_hidden_size = hidden_size//2 if bidirectional else hidden_size self.encoder = EncoderLSTM(vocab_size, word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda() context_size = 1024 self.hist_encoder = EncoderHistory(len(self.model_actions), 32, 2048, context_size).cuda() self.a2c_agent = A2CAgent(enc_hidden_size, context_size, len(self.model_actions) - 2).cuda() self.saved_actions = [] params = list(self.encoder.parameters()) + list(self.hist_encoder.parameters()) + list(self.a2c_agent.parameters()) self.losses = [] self.optimizer = torch.optim.Adam(params, lr=0.001, weight_decay=1e-5)
def train_val(): ''' Train on the training set, and validate on seen and unseen splits. ''' setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH) train_env = R2RBatch(features, batch_size=batch_size, splits=['train'], tokenizer=tok) # Creat validation environments val_envs = { split: (R2RBatch(features, batch_size=batch_size, splits=[split], tokenizer=tok), Evaluation([split])) for split in ['val_seen', 'val_unseen'] } # Build models and train enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda() decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), action_embedding_size, hidden_size, dropout_ratio).cuda() train(train_env, encoder, decoder, n_iters, val_envs=val_envs)
def normal_training(config): device = torch.device(config['device']) print('Using device', device) exp, model, train_dataloader, eval_dataloader, loss_func = setup_training( config) exp.set_name(config['experiment_name']) model.train() model = model.to(device) optimizers = get_optimizers(model, config) evaluator = Evaluation(eval_dataloader, config) num_examples = 0 for epoch in range(config['training']['training_epochs']): for idx, batch in enumerate(train_dataloader): batch = (batch[0].to(device), batch[1].to(device)) num_examples += len(batch[0]) loss, train_accuracy = training_step(batch, model, optimizers, loss_func) if idx % config['training']['log_every_n_batches'] == 0: print(epoch, num_examples, loss.detach().cpu().numpy()) exp.log_metric('train_loss', loss.detach().cpu().numpy(), step=num_examples, epoch=epoch) if idx % config['training']['eval_every_n_batches'] == 0: results = evaluator.eval_model(model, loss_func) for metric in results: print(metric, results[metric]) exp.log_metric(metric, results[metric], step=num_examples, epoch=epoch)
def finetune(): setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) if args.fast_train: feat_dict = read_img_features(features_fast) else: feat_dict = read_img_features(features) candidate_dict = utils.read_candidates(CANDIDATE_FEATURES) featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) train_env = R2RBatch(feat_dict, candidate_dict, batch_size=args.batchSize, splits=['train'], tokenizer=tok) print("The finetune data_size is : %d\n" % train_env.size()) val_envs = { split: (R2RBatch(feat_dict, candidate_dict, batch_size=args.batchSize, splits=[split], tokenizer=tok), Evaluation([split], featurized_scans, tok)) for split in ['train', 'val_seen', 'val_unseen'] } train(train_env, tok, args.iters, val_envs=val_envs)
def plot_final_scores(): ''' Plot the scores ''' font = {'size': 12} mpl.rc('font', **font) fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7, 4)) # create figure & 1 axis outfiles = [ RESULT_DIR + 'seq2seq_teacher_imagenet_%s_iter_5000.json', RESULT_DIR + 'seq2seq_sample_imagenet_%s_iter_20000.json', RESULT_DIR + '%s_stop_agent.json', RESULT_DIR + '%s_random_agent.json' ] for split in ['val_seen']: ev = Evaluation([split]) for i, outfile in enumerate(outfiles): score_summary, scores = ev.score(outfile % split) if i == 0: method = 'Teacher-forcing' ax.hist(scores['nav_errors'], bins=range(0, 30, 3), label=method, normed=True, histtype='step', linewidth=2.5, color='C1') elif i == 1: method = 'Student-forcing' ax.hist(scores['nav_errors'], bins=range(0, 30, 3), label=method, alpha=0.7, normed=True, color='C0') elif i == 2: method = 'Start locations' ax.hist(scores['nav_errors'], bins=range(0, 30, 3), label=method, normed=True, histtype='step', linewidth=2.5, color='C3') elif i == 3: method = 'Random agent' ax.hist(scores['nav_errors'], bins=range(0, 30, 3), label=method, normed=True, histtype='step', linewidth=2.5, color='C2') ax.set_title('Val Seen Navigation Error') ax.set_xlabel('Error (m)') ax.set_ylabel('Frequency') ax.set_ylim([0, 0.14]) ax.set_xlim([0, 30]) plt.axvline(x=3, color='black', linestyle='--') legend = ax.legend(loc='upper right') plt.tight_layout() plt.savefig('%s/val_seen_error.png' % (PLOT_DIR)) plt.close(fig)
def train_val_augment(): """ Train the listener with the augmented data """ setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) # Load the env img features feat_dict = read_img_features(features) featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) # Load the augmentation data aug_path = args.aug # Create the training environment aug_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=[aug_path], tokenizer=tok, name='aug') # import sys # sys.exit() train_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=['train'], tokenizer=tok) # Printing out the statistics of the dataset stats = train_env.get_statistics() print("The training data_size is : %d" % train_env.size()) print("The average instruction length of the dataset is %0.4f." % (stats['length'])) print("The average action length of the dataset is %0.4f." % (stats['path'])) stats = aug_env.get_statistics() print("The augmentation data size is %d" % aug_env.size()) print("The average instruction length of the dataset is %0.4f." % (stats['length'])) print("The average action length of the dataset is %0.4f." % (stats['path'])) # Setup the validation data val_envs = { split: (R2RBatch(feat_dict, batch_size=args.batchSize, splits=[split], tokenizer=tok), Evaluation([split], featurized_scans, tok)) for split in ['train', 'val_seen', 'val_unseen'] } # Start training train(train_env, tok, args.iters, val_envs=val_envs, aug_env=aug_env)
def train_all(eval_type, seed, max_episode_len, max_input_length, feedback, n_iters, prefix, blind, debug, train_vocab, trainval_vocab, batch_size, action_embedding_size, target_embedding_size, bidirectional, dropout_ratio, weight_decay, feature_size, hidden_size, word_embedding_size, lr, result_dir, snapshot_dir, plot_dir, train_splits, test_splits): ''' Train on the training set, and validate on the test split. ''' setup(seed, train_vocab, trainval_vocab) # Create a batch training environment that will also preprocess text vocab = read_vocab(train_vocab if eval_type == 'val' else trainval_vocab) tok = Tokenizer(vocab=vocab, encoding_length=max_input_length) train_env = R2RBatch(batch_size=batch_size, splits=train_splits, tokenizer=tok, seed=seed, blind=blind) # Creat validation environments val_envs = { split: (R2RBatch(batch_size=batch_size, splits=[split], tokenizer=tok, seed=seed, blind=blind), Evaluation([split], seed=seed)) for split in test_splits } # Build models and train enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda() decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), action_embedding_size, hidden_size, dropout_ratio, feature_size).cuda() train(eval_type, train_env, encoder, decoder, n_iters, seed, feedback, max_episode_len, max_input_length, prefix, blind, lr, weight_decay, result_dir, snapshot_dir, plot_dir, val_envs=val_envs, debug=debug)
def train_val(test_only=False): ''' Train on the training set, and validate on seen and unseen splits. ''' setup() vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) feat_dict = read_img_features(features, test_only=test_only) if test_only: featurized_scans = None val_env_names = ['val_train_seen'] else: featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) val_env_names = ['val_train_seen', 'val_seen', 'val_unseen'] if not args.test_obj: print('Loading compact pano-caffe object features ... (~3 seconds)') import pickle as pkl with open('img_features/objects/pano_object_class.pkl', 'rb') as f_pc: pano_caffe = pkl.load(f_pc) else: pano_caffe = None train_env = R2RBatch(feat_dict, pano_caffe, batch_size=args.batchSize, splits=['train'], tokenizer=tok) from collections import OrderedDict if args.submit: val_env_names.append('test') val_envs = OrderedDict(((split, (R2RBatch(feat_dict, pano_caffe, batch_size=args.batchSize, splits=[split], tokenizer=tok), Evaluation([split], featurized_scans, tok))) for split in val_env_names)) if args.train == 'listener': train(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'validlistener': if args.beam: beam_valid(train_env, tok, val_envs=val_envs) else: valid(train_env, tok, val_envs=val_envs) elif args.train == 'speaker': train_speaker(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'validspeaker': valid_speaker(tok, val_envs) else: assert False
def train_val(): ''' Train on the training set, and validate on seen and unseen splits. ''' # args.fast_train = True setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) feat_dict = read_img_features(features) featurized_scans = set([key.split("_")[0] for key in list(feat_dict.keys())]) train_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=['train'], tokenizer=tok) from collections import OrderedDict val_env_names = ['val_unseen', 'val_seen'] if args.submit: val_env_names.append('test') else: pass #val_env_names.append('train') if not args.beam: val_env_names.append("train") val_envs = OrderedDict( ((split, (R2RBatch(feat_dict, batch_size=args.batchSize, splits=[split], tokenizer=tok), Evaluation([split], featurized_scans, tok)) ) for split in val_env_names ) ) if args.train == 'listener': train(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'vae_agent': train_vae_agent(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'validlistener': if args.beam: beam_valid(train_env, tok, val_envs=val_envs) else: valid(train_env, tok, val_envs=val_envs) elif args.train == 'speaker': train_speaker(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'validspeaker': valid_speaker(train_env, tok, val_envs) elif args.train == 'inferspeaker': unseen_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=['tasks/R2R/data/aug_paths_test.json'], tokenizer=None) infer_speaker(unseen_env, tok) else: assert False
def train_test(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind): ''' Train on the training set, and validate on the test split. ''' setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAINVAL_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH) train_env = R2RBatch(features, batch_size=batch_size, splits=['train', 'val_seen', 'val_unseen'], tokenizer=tok, path_type=path_type, history=history, blind=blind) # Creat validation environments val_envs = { split: (R2RBatch(features, batch_size=batch_size, splits=[split], tokenizer=tok, path_type=path_type, history=history, blind=blind), Evaluation([split], path_type=path_type)) for split in ['test'] } # Build models and train enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda() decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), action_embedding_size, hidden_size, dropout_ratio).cuda() train(train_env, encoder, decoder, n_iters, path_type, history, feedback_method, max_episode_len, MAX_INPUT_LENGTH, model_prefix, val_envs=val_envs)
def get_scores(output_file, split): output_ids = [] eval = Evaluation([split], 'lstm') eval.scores = defaultdict(list) instr_ids = set(eval.instr_ids) with open(output_file) as f: for item in json.load(f): if item['instr_id'] in instr_ids: output_ids.append(item['instr_id']) instr_ids.remove(item['instr_id']) eval._score_item(item['instr_id'], item['trajectory']) return output_ids, eval.scores
def train_val(eval_type, seed, max_episode_len, history, max_input_length, feedback_method, n_iters, model_prefix, blind, debug): ''' Train on the training set, and validate on seen and unseen splits. ''' setup(seed) # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=max_input_length) train_env = R2RBatch(batch_size=batch_size, splits=['train'], tokenizer=tok, seed=seed, history=history, blind=blind) # Creat validation environments val_envs = { split: (R2RBatch(batch_size=batch_size, splits=[split], tokenizer=tok, seed=seed, history=history, blind=blind), Evaluation([split], seed=seed)) for split in ['val_seen'] } # Build models and train enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda() decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), action_embedding_size, hidden_size, dropout_ratio, feature_size).cuda() train(eval_type, train_env, encoder, decoder, n_iters, seed, history, feedback_method, max_episode_len, max_input_length, model_prefix, val_envs=val_envs, debug=debug)
def train_val_augment(test_only=False): """ Train the listener with the augmented data """ setup() # Create a batch training environment that will also preprocess text tok_bert = get_tokenizer(args) # Load the env img features feat_dict = read_img_features(features, test_only=test_only) if test_only: featurized_scans = None val_env_names = ['val_train_seen'] else: featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) val_env_names = ['val_train_seen', 'val_seen', 'val_unseen'] # Load the augmentation data aug_path = args.aug # Create the training environment train_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=['train'], tokenizer=tok_bert) aug_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=[aug_path], tokenizer=tok_bert, name='aug') # Setup the validation data val_envs = { split: (R2RBatch(feat_dict, batch_size=args.batchSize, splits=[split], tokenizer=tok_bert), Evaluation([split], featurized_scans, tok_bert)) for split in val_env_names } # Start training train(train_env, tok_bert, args.iters, val_envs=val_envs, aug_env=aug_env)
def train_val(): ''' Train on the training set, and validate on seen and unseen splits. ''' # args.fast_train = True setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) if args.fast_train: feat_dict = read_img_features(features_fast) else: feat_dict = read_img_features(features) candidate_dict = utils.read_candidates(CANDIDATE_FEATURES) featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) train_env = R2RBatch(feat_dict, candidate_dict, batch_size=args.batchSize, splits=['train'], tokenizer=tok) from collections import OrderedDict val_envs = OrderedDict(((split, (R2RBatch(feat_dict, candidate_dict, batch_size=args.batchSize, splits=[split], tokenizer=tok), Evaluation([split], featurized_scans, tok))) for split in ['val_seen', 'val_unseen', 'train'])) if args.train == 'listener': train(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'validlistener': valid(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'speaker': train_speaker(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'validspeaker': valid_speaker(tok, val_envs) else: assert False
def train_val(test_only=False): ''' Train on the training set, and validate on seen and unseen splits. ''' setup() tok = get_tokenizer(args) feat_dict = read_img_features(features, test_only=test_only) if test_only: featurized_scans = None val_env_names = ['val_train_seen'] else: featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) val_env_names = ['val_train_seen', 'val_seen', 'val_unseen'] train_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=['train'], tokenizer=tok) from collections import OrderedDict if args.submit: val_env_names.append('test') else: pass val_envs = OrderedDict(((split, (R2RBatch(feat_dict, batch_size=args.batchSize, splits=[split], tokenizer=tok), Evaluation([split], featurized_scans, tok))) for split in val_env_names)) if args.train == 'listener': train(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'validlistener': valid(train_env, tok, val_envs=val_envs) else: assert False
def train_val(): ''' Train on the training set, and validate on seen and unseen splits. ''' # args.fast_train = True setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(train_vocab) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) feat_dict = read_img_features(features) # load object feature obj_s_feat = None if args.sparseObj: obj_s_feat = utils.read_obj_sparse_features(sparse_obj_feat, args.objthr) obj_d_feat = None if args.denseObj: obj_d_feat = utils.read_obj_dense_features(dense_obj_feat1, dense_obj_feat2, bbox, sparse_obj_feat, args.objthr) featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) train_env = R2RBatch(feat_dict, obj_d_feat=obj_d_feat, obj_s_feat=obj_s_feat, batch_size=args.batchSize, splits=['train'], tokenizer=tok) val_env_names = ['val_unseen', 'val_seen'] if args.submit: val_env_names.append('test') else: pass #val_env_names.append('train') if not args.beam: val_env_names.append("train") val_envs = OrderedDict(((split, (R2RBatch(feat_dict, obj_d_feat=obj_d_feat, obj_s_feat=obj_s_feat, batch_size=args.batchSize, splits=[split], tokenizer=tok), Evaluation([split], featurized_scans, tok))) for split in val_env_names)) if args.train == 'listener': train(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'validlistener': if args.beam: beam_valid(train_env, tok, val_envs=val_envs) else: valid(train_env, tok, val_envs=val_envs) elif args.train == 'speaker': train_speaker(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'validspeaker': valid_speaker(tok, val_envs) else: assert False
from eval import Evaluation # import nltk # emb_path='D:\\IOM\\word2vec\\GoogleNews-vectors-negative300.bin' # import jieba emb_path = 'D:\\IOM\\word2vec\\merge_sgns_bigram_char300.bin' from gensim.models import KeyedVectors wv_from_bin = KeyedVectors.load_word2vec_format(emb_path, binary=True) eval_class = Evaluation('', wv_from_bin) sep2 = '*#*' sep1 = '|||' def cut_triples(line): global notriple line = line.strip() triples = [] for triple_str in line.split(sep2): triple_es = triple_str.split(sep1) # #没有三元组的修正 # if len(triple_es)>3: # return [] triples.append(triple_es) return triples # pres=['Twitter.100w.test.att','Twitter.100w.test.attbeam.num','Twitter.100w.test.nmt'] # key='Twitter.100w.test.key' # key_path='D:\\ieee\\code\\idef\\ex\\2\\new\\' pres = [
def main(opts): # set manual_seed and build vocab print(opts, flush=True) setup(opts, opts.seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Usando {device} :)") # create a batch training environment that will also preprocess text vocab = read_vocab(opts.train_vocab) tok = Tokenizer(opts.remove_punctuation == 1, opts.reversed == 1, vocab=vocab, encoding_length=opts.max_cap_length) # create language instruction encoder encoder_kwargs = { 'opts': opts, 'vocab_size': len(vocab), 'embedding_size': opts.word_embedding_size, 'hidden_size': opts.rnn_hidden_size, 'padding_idx': padding_idx, 'dropout_ratio': opts.rnn_dropout, 'bidirectional': opts.bidirectional == 1, 'num_layers': opts.rnn_num_layers } print('Using {} as encoder ...'.format(opts.lang_embed)) if 'lstm' in opts.lang_embed: encoder = EncoderRNN(**encoder_kwargs) else: raise ValueError('Unknown {} language embedding'.format(opts.lang_embed)) print(encoder) # create policy model policy_model_kwargs = { 'opts':opts, 'img_fc_dim': opts.img_fc_dim, 'img_fc_use_batchnorm': opts.img_fc_use_batchnorm == 1, 'img_dropout': opts.img_dropout, 'img_feat_input_dim': opts.img_feat_input_dim, 'rnn_hidden_size': opts.rnn_hidden_size, 'rnn_dropout': opts.rnn_dropout, 'max_len': opts.max_cap_length, 'max_navigable': opts.max_navigable } if opts.arch == 'regretful': model = Regretful(**policy_model_kwargs) elif opts.arch == 'self-monitoring': model = SelfMonitoring(**policy_model_kwargs) elif opts.arch == 'speaker-baseline': model = SpeakerFollowerBaseline(**policy_model_kwargs) else: raise ValueError('Unknown {} model for seq2seq agent'.format(opts.arch)) print(model) encoder = encoder.to(device) model = model.to(device) params = list(encoder.parameters()) + list(model.parameters()) optimizer = torch.optim.Adam(params, lr=opts.learning_rate) # optionally resume from a checkpoint if opts.resume: model, encoder, optimizer, best_success_rate = resume_training(opts, model, encoder, optimizer) # if a secondary exp name is specified, this is useful when resuming from a previous saved # experiment and save to another experiment, e.g., pre-trained on synthetic data and fine-tune on real data if opts.exp_name_secondary: opts.exp_name += opts.exp_name_secondary feature, img_spec = load_features(opts.img_feat_dir, opts.blind) if opts.test_submission: assert opts.resume, 'The model was not resumed before running for submission.' test_env = ('test', (R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, splits=['test'], tokenizer=tok), Evaluation(['test'], opts))) agent_kwargs = { 'opts': opts, 'env': test_env[1][0], 'results_path': "", 'encoder': encoder, 'model': model, 'feedback': opts.feedback } agent = PanoSeq2SeqAgent(**agent_kwargs) # setup trainer trainer = PanoSeq2SeqTrainer(opts, agent, optimizer) epoch = opts.start_epoch - 1 trainer.eval(epoch, test_env) return # set up R2R environments if not opts.train_data_augmentation: train_env = R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed, splits=['train'], tokenizer=tok) else: train_env = R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed, splits=['synthetic'], tokenizer=tok) val_craft_splits = ['craft_seen', 'craft_unseen'] val_splits = ['val_seen', 'val_unseen'] if opts.craft_eval: val_splits += val_craft_splits val_envs = {split: (R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, splits=[split], tokenizer=tok), Evaluation([split], opts)) for split in val_splits} # create agent agent_kwargs = { 'opts': opts, 'env': train_env, 'results_path': "", 'encoder': encoder, 'model': model, 'feedback': opts.feedback } agent = PanoSeq2SeqAgent(**agent_kwargs) # setup trainer trainer = PanoSeq2SeqTrainer(opts, agent, optimizer, opts.train_iters_epoch) if opts.eval_only: success_rate = [] for val_env in val_envs.items(): success_rate.append(trainer.eval(opts.start_epoch - 1, val_env, tb_logger=None)) return # set up tensorboard logger tb_logger = set_tb_logger(opts.log_dir, opts.exp_name, opts.resume) sys.stdout.flush() best_success_rate = best_success_rate if opts.resume else 0.0 for epoch in range(opts.start_epoch, opts.max_num_epochs + 1): trainer.train(epoch, train_env, tb_logger) if epoch % opts.eval_every_epochs == 0: success_rate = [] for val_env in val_envs.items(): success_rate.append(trainer.eval(epoch, val_env, tb_logger)) success_rate_compare = success_rate[1] if is_experiment(): # remember best val_seen success rate and save checkpoint is_best = success_rate_compare >= best_success_rate best_success_rate = max(success_rate_compare, best_success_rate) print("--> Highest val_unseen success rate: {}".format(best_success_rate)) sys.stdout.flush() # save the model if it is the best so far save_checkpoint({ 'opts': opts, 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'encoder_state_dict': encoder.state_dict(), 'best_success_rate': best_success_rate, 'optimizer': optimizer.state_dict(), 'max_episode_len': opts.max_episode_len, }, is_best, checkpoint_dir=opts.checkpoint_dir, name=opts.exp_name) if opts.train_data_augmentation and epoch == opts.epochs_data_augmentation: train_env = R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed, splits=['train'], tokenizer=tok) print("--> Finished training")
def test(cfg, dataLoader, model, models_info=None, models_vtx=None): model.eval() if cfg.pytorch.exp_mode == 'val': from eval import Evaluation Eval = Evaluation(cfg.pytorch, models_info, models_vtx) elif cfg.pytorch.exp_mode == 'test': csv_file = open(cfg.pytorch.save_csv_path, 'w') fieldnames = ['scene_id', 'im_id', 'obj_id', 'score', 'R', 't', 'time'] csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames) csv_writer.writeheader() rst_collect = [] preds = {} nIters = len(dataLoader) bar = Bar('{}_{}'.format(cfg.pytorch.dataset, cfg.pytorch.object), max=nIters) wall_time = 0 for i, (input, pose, bbox, center, size, clsIdx, imgPath, scene_id, image_id, score) in enumerate(dataLoader): input_var = input.cuda(cfg.pytorch.gpu, async=True).float().cuda(cfg.pytorch.gpu) batch_size = len(input) # time begin T_begin = time.time() output_conf, output_coor_x, output_coor_y, output_coor_z = model( input_var) output_coor_x = output_coor_x.data.cpu().numpy().copy() output_coor_y = output_coor_y.data.cpu().numpy().copy() output_coor_z = output_coor_z.data.cpu().numpy().copy() outConf = output_conf.data.cpu().numpy().copy() output_trans = np.zeros(batch_size) collector = list( zip(clsIdx.numpy(), output_coor_x, output_coor_y, output_coor_z, outConf, pose.numpy(), bbox.numpy(), center.numpy(), size.numpy(), input.numpy(), scene_id.numpy(), image_id.numpy(), score.numpy())) colLen = len(collector) for idx in range(colLen): clsIdx_, output_coor_x_, output_coor_y_, output_coor_z_, output_conf_, pose_gt, bbox_, center_, size_, input_, scene_id_, image_id_, score_ = collector[ idx] if cfg.pytorch.dataset.lower() == 'lmo': cls = ref.lmo_id2obj[int(clsIdx_)] elif cfg.pytorch.dataset.lower() == 'tless': cls = ref.tless_id2obj[int(clsIdx_)] elif cfg.pytorch.dataset.lower() == 'ycbv': cls = ref.ycbv_id2obj[int(clsIdx_)] elif cfg.pytorch.dataset.lower() == 'tudl': cls = ref.tudl_id2obj[int(clsIdx_)] elif cfg.pytorch.dataset.lower() == 'hb': cls = ref.hb_id2obj[int(clsIdx_)] elif cfg.pytorch.dataset.lower() == 'icbin': cls = ref.icbin_id2obj[clsIdx_] elif cfg.pytorch.dataset.lower() == 'itodd': cls = ref.itodd_id2obj[int(clsIdx_)] select_pts_2d = [] select_pts_3d = [] center_h = center_[0] center_w = center_[1] size_ = int(size_) output_coor_x_ = output_coor_x_.squeeze() output_coor_y_ = output_coor_y_.squeeze() output_coor_z_ = output_coor_z_.squeeze() output_coor_ = np.stack([ np.argmax(output_coor_x_, axis=0), np.argmax(output_coor_y_, axis=0), np.argmax(output_coor_z_, axis=0) ], axis=2) output_coor_[output_coor_ == cfg.network.coor_bin] = 0 output_coor_ = 2.0 * output_coor_ / float(cfg.network.coor_bin - 1) - 1.0 output_coor_[:, :, 0] = output_coor_[:, :, 0] * abs( models_info[clsIdx_]['min_x']) output_coor_[:, :, 1] = output_coor_[:, :, 1] * abs( models_info[clsIdx_]['min_y']) output_coor_[:, :, 2] = output_coor_[:, :, 2] * abs( models_info[clsIdx_]['min_z']) output_conf_ = np.argmax(output_conf_, axis=0) output_conf_ = (output_conf_ - output_conf_.min()) / ( output_conf_.max() - output_conf_.min()) min_x = 0.001 * abs(models_info[clsIdx_]['min_x']) min_y = 0.001 * abs(models_info[clsIdx_]['min_y']) min_z = 0.001 * abs(models_info[clsIdx_]['min_z']) w_begin = center_w - size_ / 2. h_begin = center_h - size_ / 2. w_unit = size_ * 1.0 / cfg.dataiter.rot_output_res h_unit = size_ * 1.0 / cfg.dataiter.rot_output_res output_conf_ = output_conf_.tolist() output_coor_ = output_coor_.tolist() for x in range(cfg.dataiter.rot_output_res): for y in range(cfg.dataiter.rot_output_res): if output_conf_[x][y] < cfg.test.mask_threshold: continue if abs(output_coor_[x][y][0]) < min_x and abs(output_coor_[x][y][1]) < min_y and \ abs(output_coor_[x][y][2]) < min_z: continue select_pts_2d.append( [w_begin + y * w_unit, h_begin + x * h_unit]) select_pts_3d.append(output_coor_[x][y]) model_points = np.asarray(select_pts_3d, dtype=np.float32) image_points = np.asarray(select_pts_2d, dtype=np.float32) try: _, R_vector, T_vector, inliers = cv2.solvePnPRansac( model_points, image_points, cfg.pytorch.camera_matrix, np.zeros((4, 1)), flags=cv2.SOLVEPNP_EPNP) cur_wall_time = time.time() - T_begin wall_time += cur_wall_time R_matrix = cv2.Rodrigues(R_vector, jacobian=0)[0] if R_matrix[0, 0] == 1.0: continue if cfg.pytorch.exp_mode == 'val': pose_est = np.concatenate( (R_matrix, np.asarray(T_vector).reshape(3, 1)), axis=1) Eval.pose_est_all[cls].append(pose_est) Eval.pose_gt_all[cls].append(pose_gt) Eval.num[cls] += 1 Eval.numAll += 1 elif cfg.pytorch.exp_mode == 'test': rst = { 'scene_id': int(scene_id_), 'im_id': int(image_id_), 'R': R_matrix.reshape(-1).tolist(), 't': T_vector.reshape(-1).tolist(), 'score': float(score_), 'obj_id': int(clsIdx), 'time': cur_wall_time } rst_collect.append(rst) except: if cfg.pytorch.exp_mode == 'val': Eval.num[cls] += 1 Eval.numAll += 1 Bar.suffix = '{0} [{1}/{2}]| Total: {total:} | ETA: {eta:}'.format( cfg.pytorch.exp_mode, i, nIters, total=bar.elapsed_td, eta=bar.eta_td) bar.next() if cfg.pytorch.exp_mode == 'val': Eval.evaluate_pose() elif cfg.pytorch.exp_mode == 'test': for item in rst_collect: csv_writer.writerow(item) csv_file.close() print("Wall time of object {}: total {} seconds for {} samples".format( cfg.pytorch.object, wall_time, nIters)) bar.finish()
def test(): print('current directory', os.getcwd()) os.chdir('..') print('current directory', os.getcwd()) visible_gpu = "0" os.environ["CUDA_VISIBLE_DEVICES"] = visible_gpu args.name = 'SSM' args.attn = 'soft' args.train = 'listener' args.featdropout = 0.3 args.angle_feat_size = 128 args.feedback = 'sample' args.ml_weight = 0.2 args.sub_out = 'max' args.dropout = 0.5 args.optim = 'adam' args.lr = 3e-4 args.iters = 80000 args.maxAction = 35 args.batchSize = 24 args.target_batch_size = 24 args.self_train = True args.aug = 'tasks/R2R/data/aug_paths.json' args.speaker = 'snap/speaker/state_dict/best_val_unseen_bleu' args.featdropout = 0.4 args.iters = 200000 if args.optim == 'rms': print("Optimizer: Using RMSProp") args.optimizer = torch.optim.RMSprop elif args.optim == 'adam': print("Optimizer: Using Adam") args.optimizer = torch.optim.Adam elif args.optim == 'sgd': print("Optimizer: sgd") args.optimizer = torch.optim.SGD log_dir = 'snap/%s' % args.name if not os.path.exists(log_dir): os.makedirs(log_dir) logdir = '%s/eval' % log_dir writer = SummaryWriter(logdir=logdir) TRAIN_VOCAB = 'tasks/R2R/data/train_vocab.txt' TRAINVAL_VOCAB = 'tasks/R2R/data/trainval_vocab.txt' IMAGENET_FEATURES = 'img_features/ResNet-152-imagenet.tsv' if args.features == 'imagenet': features = IMAGENET_FEATURES if args.fast_train: name, ext = os.path.splitext(features) features = name + "-fast" + ext print(args) def setup(): torch.manual_seed(1) torch.cuda.manual_seed(1) # Check for vocabs if not os.path.exists(TRAIN_VOCAB): write_vocab(build_vocab(splits=['train']), TRAIN_VOCAB) if not os.path.exists(TRAINVAL_VOCAB): write_vocab( build_vocab(splits=['train', 'val_seen', 'val_unseen']), TRAINVAL_VOCAB) # setup() vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) feat_dict = read_img_features(features) print('start extract keys...') featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) print('keys extracted...') val_envs = { split: R2RBatch(feat_dict, batch_size=args.batchSize, splits=[split], tokenizer=tok) for split in ['train', 'val_seen', 'val_unseen'] } evaluators = { split: Evaluation([split], featurized_scans, tok) for split in ['train', 'val_seen', 'val_unseen'] } learner = Learner(val_envs, "", tok, args.maxAction, process_num=2, visible_gpu=visible_gpu) learner.eval_init() for i in range(0, 10000): ckpt = '%s/state_dict/Iter_%06d' % (log_dir, (i + 1) * 100) while not os.path.exists(ckpt): time.sleep(10) time.sleep(10) learner.load_eval(ckpt) results = learner.eval() loss_str = '' for key in results: evaluator = evaluators[key] result = results[key] score_summary, _ = evaluator.score(result) loss_str += ", %s \n" % key for metric, val in score_summary.items(): loss_str += ', %s: %.3f' % (metric, val) writer.add_scalar('%s/%s' % (metric, key), val, (i + 1) * 100) loss_str += '\n' print(loss_str)
def train_val(seed=None): ''' Train on the training set, and validate on seen and unseen splits. ''' # which GPU to use device = torch.device('cuda', hparams.device_id) # Resume from lastest checkpoint (if any) if os.path.exists(hparams.load_path): print('Load model from %s' % hparams.load_path) ckpt = load(hparams.load_path, device) start_iter = ckpt['iter'] else: if hasattr(args, 'load_path') and hasattr(args, 'eval_only') and args.eval_only: sys.exit('load_path %s does not exist!' % hparams.load_path) ckpt = None start_iter = 0 end_iter = hparams.n_iters # Setup seed and read vocab setup(seed=seed) train_vocab_path = os.path.join(hparams.data_path, 'train_vocab.txt') if hasattr(hparams, 'external_main_vocab') and hparams.external_main_vocab: train_vocab_path = hparams.external_main_vocab if 'verbal' in hparams.advisor: subgoal_vocab_path = os.path.join(hparams.data_path, hparams.subgoal_vocab) vocab = read_vocab([train_vocab_path, subgoal_vocab_path]) else: vocab = read_vocab([train_vocab_path]) tok = Tokenizer(vocab=vocab, encoding_length=hparams.max_input_length) # Create a training environment train_env = VNLABatch(hparams, split='train', tokenizer=tok) # Create validation environments val_splits = ['val_seen', 'val_unseen'] eval_mode = hasattr(hparams, 'eval_only') and hparams.eval_only if eval_mode: if '_unseen' in hparams.load_path: val_splits = ['test_unseen'] if '_seen' in hparams.load_path: val_splits = ['test_seen'] end_iter = start_iter + hparams.log_every val_envs = { split: (VNLABatch(hparams, split=split, tokenizer=tok, from_train_env=train_env, traj_len_estimates=train_env.traj_len_estimates), Evaluation(hparams, [split], hparams.data_path)) for split in val_splits} # Build models model = AttentionSeq2SeqModel(len(vocab), hparams, device).to(device) optimizer = optim.Adam(model.parameters(), lr=hparams.lr, weight_decay=hparams.weight_decay) best_metrics = { 'val_seen' : -1, 'val_unseen': -1, 'combined' : -1 } # Load model parameters from a checkpoint (if any) if ckpt is not None: model.load_state_dict(ckpt['model_state_dict']) optimizer.load_state_dict(ckpt['optim_state_dict']) best_metrics = ckpt['best_metrics'] train_env.ix = ckpt['data_idx'] print('') pprint(vars(hparams), width=1) print('') print(model) # Initialize agent if 'verbal' in hparams.advisor: agent = VerbalAskAgent(model, hparams, device) elif hparams.advisor == 'direct': agent = AskAgent(model, hparams, device) # Train return train(train_env, val_envs, agent, model, optimizer, start_iter, end_iter, best_metrics, eval_mode)
image_features_list = ImageFeatures.from_args(args) vocab = read_vocab(TRAIN_VOCAB, args.language) tok = Tokenizer(vocab) env = R2RBatch(image_features_list, batch_size=256, splits=['train', 'val_seen', 'val_unseen'], tokenizer=tok) env.batch = env.data from eval import Evaluation test_envs = { split: (R2RBatch(image_features_list, batch_size=64, splits=[split], tokenizer=tok), Evaluation([split])) for split in ['val_unseen'] } agent = make_follower(args, vocab) def average(_l): return float(sum(_l)) / len(_l) def load_data(filenames): all_data = [] for fn in filenames: with open(fn, 'r') as f: train_file = json.loads(f.read())
def train_val(): ''' Train on the training set, and validate on seen and unseen splits. ''' # args.fast_train = True setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) feat_dict = read_img_features(features) featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) if not args.test_obj: print('Loading compact pano-caffe object features ... (~3 seconds)') import pickle as pkl with open( '/egr/research-hlr/joslin/Matterdata/v1/scans/img_features/pano_object_class.pkl', 'rb') as f_pc: pano_caffe = pkl.load(f_pc) else: pano_caffe = None train_env = R2RBatch(feat_dict, pano_caffe, batch_size=args.batchSize, splits=['train'], tokenizer=tok) from collections import OrderedDict val_env_names = ['val_unseen', 'val_seen'] if args.submit: val_env_names.append('test') else: pass # if you want to test "train", just uncomment this #val_env_names.append('train') if not args.beam: val_env_names.append("train") val_envs = OrderedDict(((split, (R2RBatch(feat_dict, pano_caffe, batch_size=args.batchSize, splits=[split], tokenizer=tok), Evaluation([split], featurized_scans, tok))) for split in val_env_names)) # import sys # sys.exit() if args.train == 'listener': train(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'validlistener': if args.beam: beam_valid(train_env, tok, val_envs=val_envs) else: valid(train_env, tok, val_envs=val_envs) elif args.train == 'speaker': train_speaker(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'validspeaker': valid_speaker(tok, val_envs) else: assert False
def train_val(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind, args): ''' Train on the training set, and validate on seen and unseen splits. ''' nav_graphs = setup(args.action_space, args.navigable_locs_path) # Create a batch training environment that will also preprocess text use_bert = (args.encoder_type in ['bert','vlbert']) # for tokenizer and dataloader if use_bert: tok = BTokenizer(MAX_INPUT_LENGTH) else: vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH) #train_env = R2RBatch(features, batch_size=batch_size, splits=['train'], tokenizer=tok, # path_type=path_type, history=history, blind=blind) feature_store = Feature(features, args.panoramic) train_env = R2RBatch(feature_store, nav_graphs, args.panoramic,args.action_space,batch_size=args.batch_size, splits=['train'], tokenizer=tok, path_type=path_type, history=history, blind=blind) # Creat validation environments #val_envs = {split: (R2RBatch(features, batch_size=batch_size, splits=[split], # tokenizer=tok, path_type=path_type, history=history, blind=blind), # Evaluation([split], path_type=path_type)) for split in ['val_seen', 'val_unseen']} val_envs = {split: (R2RBatch(feature_store,nav_graphs, args.panoramic, args.action_space,batch_size=args.batch_size, splits=[split], tokenizer=tok, path_type=path_type, history=history, blind=blind), Evaluation([split], path_type=path_type)) for split in ['val_seen','val_unseen']} # Build models and train #enc_hidden_size = hidden_size//2 if bidirectional else hidden_size if args.encoder_type == 'vlbert': if args.pretrain_model_name is not None: print("Using the pretrained lm model from %s" %(args.pretrain_model_name)) encoder = DicEncoder(FEATURE_ALL_SIZE,args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm,args.vl_layers,args.la_layers,args.bert_type) premodel = DicAddActionPreTrain.from_pretrained(args.pretrain_model_name) encoder.bert = premodel.bert encoder.drop = nn.Dropout(p=args.dropout_ratio) encoder.bert._resize_token_embeddings(len(tok)) # remember to resize tok embedding size encoder.bert.update_lang_bert, encoder.bert.config.update_lang_bert = args.transformer_update, args.transformer_update encoder.bert.update_add_layer, encoder.bert.config.update_add_layer = args.update_add_layer, args.update_add_layer encoder = encoder.cuda() else: encoder = DicEncoder(FEATURE_ALL_SIZE,args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm,args.vl_layers,args.la_layers,args.bert_type).cuda() encoder.bert._resize_token_embeddings(len(tok)) # remember to resize tok embedding size elif args.encoder_type == 'bert': if args.pretrain_model_name is not None: print("Using the pretrained lm model from %s" %(args.pretrain_model_name)) encoder = BertEncoder(args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm, args.bert_type) premodel = BertForMaskedLM.from_pretrained(args.pretrain_model_name) encoder.bert = premodel.bert encoder.drop = nn.Dropout(p=args.dropout_ratio) encoder.bert._resize_token_embeddings(len(tok)) # remember to resize tok embedding size #encoder.bert.update_lang_bert, encoder.bert.config.update_lang_bert = args.transformer_update, args.transformer_update #encoder.bert.update_add_layer, encoder.bert.config.update_add_layer = args.update_add_layer, args.update_add_layer encoder = encoder.cuda() else: encoder = BertEncoder(args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm, args.bert_type).cuda() encoder.bert._resize_token_embeddings(len(tok)) else: enc_hidden_size = hidden_size//2 if bidirectional else hidden_size encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda() #decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), # action_embedding_size, args.hidden_size, args.dropout_ratio).cuda() ctx_hidden_size = args.enc_hidden_size * (2 if args.bidirectional else 1) if use_bert and not args.top_lstm: ctx_hidden_size = 768 decoder = R2RAttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), action_embedding_size, ctx_hidden_size, args.hidden_size, args.dropout_ratio,FEATURE_SIZE, args.panoramic,args.action_space,args.dec_h_type).cuda() decoder = R2RAttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), action_embedding_size, ctx_hidden_size, args.hidden_size, args.dropout_ratio,FEATURE_SIZE, args.panoramic,args.action_space,args.dec_h_type).cuda() train(train_env, encoder, decoder, n_iters, path_type, history, feedback_method, max_episode_len, MAX_INPUT_LENGTH, model_prefix, val_envs=val_envs, args=args)
def train_val_augment(): """ Train the listener with the augmented data """ setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) if args.fast_train: feat_dict = read_img_features(features_fast) else: feat_dict = read_img_features(features) candidate_dict = utils.read_candidates(CANDIDATE_FEATURES) featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) # Load the augmentation data if args.aug is None: # If aug is specified, load the "aug" speaker_snap_name = "adam_drop6_correctsave" print("Loading from %s" % speaker_snap_name) aug_path = "snap/speaker/long/%s/aug_data/best_val_unseen_loss.json" % speaker_snap_name else: # Load the path from args aug_path = args.aug # The dataset used in training splits = [aug_path, 'train'] if args.combineAug else [aug_path] # Create the training environment if args.half_half: assert args.aug is not None gt_env = R2RBatch(feat_dict, candidate_dict, batch_size=args.batchSize, splits=['train'], tokenizer=tok) aug_env = R2RBatch(feat_dict, candidate_dict, batch_size=args.batchSize, splits=[aug_path], tokenizer=tok) train_env = ArbiterBatch(gt_env, aug_env, args.batchSize // 2, args.batchSize // 2, feat_dict, candidate_dict, batch_size=args.batchSize, splits=[], tokenizer=tok) else: train_env = R2RBatch(feat_dict, candidate_dict, batch_size=args.batchSize, splits=splits, tokenizer=tok) print("The augmented data_size is : %d" % train_env.size()) # stats = train_env.get_statistics() # print("The average instruction length of the dataset is %0.4f." % (stats['length'])) # print("The average action length of the dataset is %0.4f." % (stats['path'])) # Setup the validation data val_envs = { split: (R2RBatch(feat_dict, candidate_dict, batch_size=args.batchSize, splits=[split], tokenizer=tok), Evaluation([split], featurized_scans, tok)) for split in ['train', 'val_seen', 'val_unseen'] } # Start training train(train_env, tok, args.iters, val_envs=val_envs)
def meta_filter(): """ Train the listener with the augmented data """ setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) if args.fast_train: feat_dict = read_img_features(features_fast) else: feat_dict = read_img_features(features) candidate_dict = utils.read_candidates(CANDIDATE_FEATURES) featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) # Load the augmentation data if args.aug is None: # If aug is specified, load the "aug" speaker_snap_name = "adam_drop6_correctsave" print("Loading from %s" % speaker_snap_name) aug_path = "snap/speaker/long/%s/aug_data/best_val_unseen_loss.json" % speaker_snap_name else: # Load the path from args aug_path = args.aug # Create the training environment aug_env = R2RBatch(feat_dict, candidate_dict, batch_size=args.batchSize, splits=[aug_path], tokenizer=tok) train_env = R2RBatch(feat_dict, candidate_dict, batch_size=args.batchSize, splits=['train@3333'], tokenizer=tok) print("The augmented data_size is : %d" % train_env.size()) stats = train_env.get_statistics() print("The average instruction length of the dataset is %0.4f." % (stats['length'])) print("The average action length of the dataset is %0.4f." % (stats['path'])) # Setup the validation data val_envs = { split: (R2RBatch(feat_dict, candidate_dict, batch_size=args.batchSize, splits=[split], tokenizer=tok), Evaluation([split], featurized_scans, tok)) for split in ['train', 'val_seen', 'val_unseen@133'] } val_env, val_eval = val_envs['val_unseen@133'] listner = Seq2SeqAgent(train_env, "", tok, args.maxAction) def filter_result(): listner.env = val_env val_env.reset_epoch() listner.test(use_dropout=False, feedback='argmax') result = listner.get_results() score_summary, _ = val_eval.score(result) for metric, val in score_summary.items(): if metric in ['success_rate']: return val listner.load(args.load) base_accu = (filter_result()) print("BASE ACCU %0.4f" % base_accu) success = 0 for data_id, datum in enumerate(aug_env.data): # Reload the param of the listener listner.load(args.load) train_env.reset_epoch(shuffle=True) listner.env = train_env # Train for the datum # iters = train_env.size() // train_env.batch_size iters = 10 for i in range(iters): listner.env = train_env # train_env.reset(batch=([datum] * (train_env.batch_size // 2)), inject=True) train_env.reset(batch=[datum] * train_env.batch_size, inject=True) # train_env.reset() # train_env.reset() listner.train(1, feedback='sample', reset=False) # print("Iter %d, result %0.4f" % (i, filter_result())) now_accu = filter_result() if now_accu > base_accu: success += 1 # print("RESULT %0.4f" % filter_result()) print('Accu now %0.4f, success / total: %d / %d = %0.4f' % (now_accu, success, data_id + 1, success / (data_id + 1)))
def train_val_augment(): """ Train the listener with the augmented data """ setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(train_vocab) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) # Load the env img features feat_dict = read_img_features(features) featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) # Load the augmentation data if args.upload: aug_path = get_sync_dir(os.path.join(args.upload_path, args.aug)) else: aug_path = os.path.join(args.R2R_Aux_path, args.aug) # Create the training environment # load object feature obj_s_feat = None if args.sparseObj: obj_s_feat = utils.read_obj_sparse_features(sparse_obj_feat, args.objthr) obj_d_feat = None if args.denseObj: obj_d_feat = utils.read_obj_dense_features(dense_obj_feat1, dense_obj_feat2, bbox, sparse_obj_feat, args.objthr) train_env = R2RBatch(feat_dict, obj_d_feat=obj_d_feat, obj_s_feat=obj_s_feat, batch_size=args.batchSize, splits=['train'], tokenizer=tok) aug_env = R2RBatch(feat_dict, obj_d_feat=obj_d_feat, obj_s_feat=obj_s_feat, batch_size=args.batchSize, splits=[aug_path], tokenizer=tok, name='aug') # Printing out the statistics of the dataset stats = train_env.get_statistics() print("The training data_size is : %d" % train_env.size()) print("The average instruction length of the dataset is %0.4f." % (stats['length'])) print("The average action length of the dataset is %0.4f." % (stats['path'])) stats = aug_env.get_statistics() print("The augmentation data size is %d" % aug_env.size()) print("The average instruction length of the dataset is %0.4f." % (stats['length'])) print("The average action length of the dataset is %0.4f." % (stats['path'])) # Setup the validation data val_envs = { split: (R2RBatch(feat_dict, batch_size=args.batchSize, splits=[split], tokenizer=tok), Evaluation([split], featurized_scans, tok)) for split in ['train', 'val_seen', 'val_unseen'] } val_envs = OrderedDict(((split, (R2RBatch(feat_dict, obj_d_feat=obj_d_feat, obj_s_feat=obj_s_feat, batch_size=args.batchSize, splits=[split], tokenizer=tok), Evaluation([split], featurized_scans, tok))) for split in ['train', 'val_seen', 'val_unseen'])) # Start training train(train_env, tok, args.iters, val_envs=val_envs, aug_env=aug_env)
def main(opts): # set manual_seed and build vocab setup(opts, opts.seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # create a batch training environment that will also preprocess text vocab = read_vocab(opts.train_vocab) tok = Tokenizer( opts.remove_punctuation == 1, opts.reversed == 1, vocab=vocab, encoding_length=opts.max_cap_length, ) # create language instruction encoder encoder_kwargs = { "opts": opts, "vocab_size": len(vocab), "embedding_size": opts.word_embedding_size, "hidden_size": opts.rnn_hidden_size, "padding_idx": padding_idx, "dropout_ratio": opts.rnn_dropout, "bidirectional": opts.bidirectional == 1, "num_layers": opts.rnn_num_layers, } print("Using {} as encoder ...".format(opts.lang_embed)) if "lstm" in opts.lang_embed: encoder = EncoderRNN(**encoder_kwargs) else: raise ValueError("Unknown {} language embedding".format( opts.lang_embed)) print(encoder) # create policy model policy_model_kwargs = { "opts": opts, "img_fc_dim": opts.img_fc_dim, "img_fc_use_batchnorm": opts.img_fc_use_batchnorm == 1, "img_dropout": opts.img_dropout, "img_feat_input_dim": opts.img_feat_input_dim, "rnn_hidden_size": opts.rnn_hidden_size, "rnn_dropout": opts.rnn_dropout, "max_len": opts.max_cap_length, "max_navigable": opts.max_navigable, } if opts.arch == "self-monitoring": model = SelfMonitoring(**policy_model_kwargs) elif opts.arch == "speaker-baseline": model = SpeakerFollowerBaseline(**policy_model_kwargs) else: raise ValueError("Unknown {} model for seq2seq agent".format( opts.arch)) print(model) encoder = encoder.to(device) model = model.to(device) params = list(encoder.parameters()) + list(model.parameters()) optimizer = torch.optim.Adam(params, lr=opts.learning_rate) # optionally resume from a checkpoint if opts.resume: model, encoder, optimizer, best_success_rate = resume_training( opts, model, encoder, optimizer) # if a secondary exp name is specified, this is useful when resuming from a previous saved # experiment and save to another experiment, e.g., pre-trained on synthetic data and fine-tune on real data if opts.exp_name_secondary: opts.exp_name += opts.exp_name_secondary feature, img_spec = load_features(opts.img_feat_dir) if opts.test_submission: assert (opts.resume ), "The model was not resumed before running for submission." test_env = ( "test", ( R2RPanoBatch( opts, feature, img_spec, batch_size=opts.batch_size, splits=["test"], tokenizer=tok, ), Evaluation(["test"]), ), ) agent_kwargs = { "opts": opts, "env": test_env[1][0], "results_path": "", "encoder": encoder, "model": model, "feedback": opts.feedback, } agent = PanoSeq2SeqAgent(**agent_kwargs) # setup trainer trainer = PanoSeq2SeqTrainer(opts, agent, optimizer) epoch = opts.start_epoch - 1 trainer.eval(epoch, test_env) return # set up R2R environments if not opts.train_data_augmentation: train_env = R2RPanoBatch( opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed, splits=["train"], tokenizer=tok, ) else: train_env = R2RPanoBatch( opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed, splits=["synthetic"], tokenizer=tok, ) val_envs = { split: ( R2RPanoBatch( opts, feature, img_spec, batch_size=opts.batch_size, splits=[split], tokenizer=tok, ), Evaluation([split]), ) for split in ["val_seen", "val_unseen"] } # create agent agent_kwargs = { "opts": opts, "env": train_env, "results_path": "", "encoder": encoder, "model": model, "feedback": opts.feedback, } agent = PanoSeq2SeqAgent(**agent_kwargs) # setup trainer trainer = PanoSeq2SeqTrainer(opts, agent, optimizer, opts.train_iters_epoch) if opts.eval_beam or opts.eval_only: success_rate = [] for val_env in val_envs.items(): success_rate.append( trainer.eval(opts.start_epoch - 1, val_env, tb_logger=None)) return # set up tensorboard logger tb_logger = set_tb_logger(opts.log_dir, opts.exp_name, opts.resume) best_success_rate = best_success_rate if opts.resume else 0.0 for epoch in range(opts.start_epoch, opts.max_num_epochs + 1): trainer.train(epoch, train_env, tb_logger) if epoch % opts.eval_every_epochs == 0: success_rate = [] for val_env in val_envs.items(): success_rate.append(trainer.eval(epoch, val_env, tb_logger)) success_rate_compare = success_rate[1] if is_experiment(): # remember best val_seen success rate and save checkpoint is_best = success_rate_compare >= best_success_rate best_success_rate = max(success_rate_compare, best_success_rate) print("--> Highest val_unseen success rate: {}".format( best_success_rate)) # save the model if it is the best so far save_checkpoint( { "opts": opts, "epoch": epoch + 1, "state_dict": model.state_dict(), "encoder_state_dict": encoder.state_dict(), "best_success_rate": best_success_rate, "optimizer": optimizer.state_dict(), "max_episode_len": opts.max_episode_len, }, is_best, checkpoint_dir=opts.checkpoint_dir, name=opts.exp_name, ) if (opts.train_data_augmentation and epoch == opts.epochs_data_augmentation): train_env = R2RPanoBatch( opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed, splits=["train"], tokenizer=tok, ) print("--> Finished training")
'prog_monitor': True, 'dev_monitor': False, 'attn_only_verb': False, 'soft_align': False, 'scorer': None, 'load_follower': 'tasks/R2R/experiments/pretrain_cgPm_pertraj/snapshots/follower_cg_pm_sample2step_imagenet_mean_pooled_1heads_train_iter_1900_val_unseen-success_rate=0.478' }) image_features_list = ImageFeatures.from_args(args) vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab) env = R2RBatch(image_features_list, batch_size=256, splits=['train','val_seen','val_unseen'],tokenizer=tok) env.batch = env.data from eval import Evaluation test_envs = {split: (R2RBatch(image_features_list, batch_size=64,splits=[split], tokenizer=tok), Evaluation([split])) for split in ['val_unseen']} agent = make_follower(args, vocab) def average(_l): return float(sum(_l)) / len(_l) def load_data(filenames): all_data = [] for fn in filenames: with open(fn,'r') as f: train_file = json.loads(f.read()) train_instrs = list(train_file.keys()) train_data = {} for instr_id in train_instrs: