def finetune(): setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) if args.fast_train: feat_dict = read_img_features(features_fast) else: feat_dict = read_img_features(features) candidate_dict = utils.read_candidates(CANDIDATE_FEATURES) featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) train_env = R2RBatch(feat_dict, candidate_dict, batch_size=args.batchSize, splits=['train'], tokenizer=tok) print("The finetune data_size is : %d\n" % train_env.size()) val_envs = { split: (R2RBatch(feat_dict, candidate_dict, batch_size=args.batchSize, splits=[split], tokenizer=tok), Evaluation([split], featurized_scans, tok)) for split in ['train', 'val_seen', 'val_unseen'] } train(train_env, tok, args.iters, val_envs=val_envs)
def make_env_and_models(args, train_vocab_path, train_splits, test_splits, batch_size=BATCH_SIZE): setup() image_features_list = ImageFeatures.from_args(args) vocab = read_vocab(train_vocab_path) tok = Tokenizer(vocab=vocab) train_env = R2RBatch(image_features_list, batch_size=batch_size, splits=train_splits, tokenizer=tok) enc_hidden_size = hidden_size//2 if args.bidirectional else hidden_size glove = np.load(glove_path) feature_size = FEATURE_SIZE encoder = try_cuda(EncoderLSTM( len(vocab), word_embedding_size, enc_hidden_size, vocab_pad_idx, dropout_ratio, bidirectional=args.bidirectional, glove=glove)) decoder = try_cuda(AttnDecoderLSTM( action_embedding_size, hidden_size, dropout_ratio, feature_size=feature_size)) test_envs = { split: (R2RBatch(image_features_list, batch_size=batch_size, splits=[split], tokenizer=tok), eval.Evaluation([split])) for split in test_splits} return train_env, test_envs, encoder, decoder
def make_env_and_models(args, train_vocab_path, train_splits, test_splits, test_instruction_limit=None): setup() image_features_list = ImageFeatures.from_args(args) vocab = read_vocab(train_vocab_path) tok = Tokenizer(vocab=vocab) train_env = R2RBatch(image_features_list, batch_size=batch_size, splits=train_splits, tokenizer=tok) enc_hidden_size = hidden_size//2 if bidirectional else hidden_size glove = np.load(glove_path) feature_size = FEATURE_SIZE encoder = try_cuda(SpeakerEncoderLSTM( action_embedding_size, feature_size, enc_hidden_size, dropout_ratio, bidirectional=bidirectional)) decoder = try_cuda(SpeakerDecoderLSTM( len(vocab), word_embedding_size, hidden_size, dropout_ratio, glove=glove)) test_envs = { split: (R2RBatch(image_features_list, batch_size=batch_size, splits=[split], tokenizer=tok, instruction_limit=test_instruction_limit), eval_speaker.SpeakerEvaluation( [split], instructions_per_path=test_instruction_limit)) for split in test_splits} return train_env, test_envs, encoder, decoder
def train_val(): ''' Train on the training set, and validate on seen and unseen splits. ''' setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH) train_env = R2RBatch(features, batch_size=batch_size, splits=['train'], tokenizer=tok) # Creat validation environments val_envs = { split: (R2RBatch(features, batch_size=batch_size, splits=[split], tokenizer=tok), Evaluation([split])) for split in ['val_seen', 'val_unseen'] } # Build models and train enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda() decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), action_embedding_size, hidden_size, dropout_ratio).cuda() train(train_env, encoder, decoder, n_iters, val_envs=val_envs)
def make_env_and_models(args, train_vocab_path, train_splits, test_splits): setup(args.seed) image_features_list = ImageFeatures.from_args(args) if args.job == None: # create vocab only during training (job == none) vocab = build_vocab(train_splits) write_vocab(vocab, TRAIN_VOCAB) vocab = read_vocab(train_vocab_path) tok = Tokenizer(vocab=vocab) train_env = R2RBatch(image_features_list, batch_size=args.batch_size, splits=train_splits, tokenizer=tok) if len(train_splits) > 0 else None test_envs = { split: (R2RBatch(image_features_list, batch_size=args.batch_size, splits=[split], tokenizer=tok), Evaluation(split, args.instrType)) for split in test_splits } agent = make_follower(args, vocab) agent.env = train_env if args.useObjLabelOrVis in ['label', 'both']: if not train_env is None: agent.pointer.wtoi = train_env.wtoi else: agent.pointer.wtoi = test_envs[test_splits[0]][0].wtoi return train_env, test_envs, agent
def test_submission(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind): ''' Train on combined training and validation sets, and generate test submission. ''' setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAINVAL_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH) train_env = R2RBatch(features, batch_size=batch_size, splits=['train', 'val_seen', 'val_unseen'], tokenizer=tok, path_type=path_type, history=history, blind=blind) # Build models and train enc_hidden_size = hidden_size//2 if bidirectional else hidden_size encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda() decoder = AttnDecoderLSTM(action_embedding_size, hidden_size, dropout_ratio).cuda() train(train_env, encoder, decoder, n_iters, path_type, history, feedback_method, max_episode_len, MAX_INPUT_LENGTH, model_prefix) # Generate test submission test_env = R2RBatch(features, batch_size=batch_size, splits=['test'], tokenizer=tok, path_type=path_type, history=history, blind=blind) agent = Seq2SeqAgent(test_env, "", encoder, decoder, max_episode_len) agent.results_path = '%s%s_%s_iter_%d.json' % (RESULT_DIR, model_prefix, 'test', 5000) agent.test(use_dropout=False, feedback='argmax') agent.write_results()
def train_all(eval_type, seed, max_episode_len, max_input_length, feedback, n_iters, prefix, blind, debug, train_vocab, trainval_vocab, batch_size, action_embedding_size, target_embedding_size, bidirectional, dropout_ratio, weight_decay, feature_size, hidden_size, word_embedding_size, lr, result_dir, snapshot_dir, plot_dir, train_splits, test_splits): ''' Train on the training set, and validate on the test split. ''' setup(seed, train_vocab, trainval_vocab) # Create a batch training environment that will also preprocess text vocab = read_vocab(train_vocab if eval_type == 'val' else trainval_vocab) tok = Tokenizer(vocab=vocab, encoding_length=max_input_length) train_env = R2RBatch(batch_size=batch_size, splits=train_splits, tokenizer=tok, seed=seed, blind=blind) # Creat validation environments val_envs = { split: (R2RBatch(batch_size=batch_size, splits=[split], tokenizer=tok, seed=seed, blind=blind), Evaluation([split], seed=seed)) for split in test_splits } # Build models and train enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda() decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), action_embedding_size, hidden_size, dropout_ratio, feature_size).cuda() train(eval_type, train_env, encoder, decoder, n_iters, seed, feedback, max_episode_len, max_input_length, prefix, blind, lr, weight_decay, result_dir, snapshot_dir, plot_dir, val_envs=val_envs, debug=debug)
def train_val_augment(): """ Train the listener with the augmented data """ setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) # Load the env img features feat_dict = read_img_features(features) featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) # Load the augmentation data aug_path = args.aug # Create the training environment aug_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=[aug_path], tokenizer=tok, name='aug') # import sys # sys.exit() train_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=['train'], tokenizer=tok) # Printing out the statistics of the dataset stats = train_env.get_statistics() print("The training data_size is : %d" % train_env.size()) print("The average instruction length of the dataset is %0.4f." % (stats['length'])) print("The average action length of the dataset is %0.4f." % (stats['path'])) stats = aug_env.get_statistics() print("The augmentation data size is %d" % aug_env.size()) print("The average instruction length of the dataset is %0.4f." % (stats['length'])) print("The average action length of the dataset is %0.4f." % (stats['path'])) # Setup the validation data val_envs = { split: (R2RBatch(feat_dict, batch_size=args.batchSize, splits=[split], tokenizer=tok), Evaluation([split], featurized_scans, tok)) for split in ['train', 'val_seen', 'val_unseen'] } # Start training train(train_env, tok, args.iters, val_envs=val_envs, aug_env=aug_env)
def train_val(test_only=False): ''' Train on the training set, and validate on seen and unseen splits. ''' setup() vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) feat_dict = read_img_features(features, test_only=test_only) if test_only: featurized_scans = None val_env_names = ['val_train_seen'] else: featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) val_env_names = ['val_train_seen', 'val_seen', 'val_unseen'] if not args.test_obj: print('Loading compact pano-caffe object features ... (~3 seconds)') import pickle as pkl with open('img_features/objects/pano_object_class.pkl', 'rb') as f_pc: pano_caffe = pkl.load(f_pc) else: pano_caffe = None train_env = R2RBatch(feat_dict, pano_caffe, batch_size=args.batchSize, splits=['train'], tokenizer=tok) from collections import OrderedDict if args.submit: val_env_names.append('test') val_envs = OrderedDict(((split, (R2RBatch(feat_dict, pano_caffe, batch_size=args.batchSize, splits=[split], tokenizer=tok), Evaluation([split], featurized_scans, tok))) for split in val_env_names)) if args.train == 'listener': train(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'validlistener': if args.beam: beam_valid(train_env, tok, val_envs=val_envs) else: valid(train_env, tok, val_envs=val_envs) elif args.train == 'speaker': train_speaker(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'validspeaker': valid_speaker(tok, val_envs) else: assert False
def train_val(): ''' Train on the training set, and validate on seen and unseen splits. ''' # args.fast_train = True setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) feat_dict = read_img_features(features) featurized_scans = set([key.split("_")[0] for key in list(feat_dict.keys())]) train_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=['train'], tokenizer=tok) from collections import OrderedDict val_env_names = ['val_unseen', 'val_seen'] if args.submit: val_env_names.append('test') else: pass #val_env_names.append('train') if not args.beam: val_env_names.append("train") val_envs = OrderedDict( ((split, (R2RBatch(feat_dict, batch_size=args.batchSize, splits=[split], tokenizer=tok), Evaluation([split], featurized_scans, tok)) ) for split in val_env_names ) ) if args.train == 'listener': train(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'vae_agent': train_vae_agent(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'validlistener': if args.beam: beam_valid(train_env, tok, val_envs=val_envs) else: valid(train_env, tok, val_envs=val_envs) elif args.train == 'speaker': train_speaker(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'validspeaker': valid_speaker(train_env, tok, val_envs) elif args.train == 'inferspeaker': unseen_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=['tasks/R2R/data/aug_paths_test.json'], tokenizer=None) infer_speaker(unseen_env, tok) else: assert False
def train_test(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind): ''' Train on the training set, and validate on the test split. ''' setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAINVAL_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH) train_env = R2RBatch(features, batch_size=batch_size, splits=['train', 'val_seen', 'val_unseen'], tokenizer=tok, path_type=path_type, history=history, blind=blind) # Creat validation environments val_envs = { split: (R2RBatch(features, batch_size=batch_size, splits=[split], tokenizer=tok, path_type=path_type, history=history, blind=blind), Evaluation([split], path_type=path_type)) for split in ['test'] } # Build models and train enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda() decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), action_embedding_size, hidden_size, dropout_ratio).cuda() train(train_env, encoder, decoder, n_iters, path_type, history, feedback_method, max_episode_len, MAX_INPUT_LENGTH, model_prefix, val_envs=val_envs)
def train_val(eval_type, seed, max_episode_len, history, max_input_length, feedback_method, n_iters, model_prefix, blind, debug): ''' Train on the training set, and validate on seen and unseen splits. ''' setup(seed) # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=max_input_length) train_env = R2RBatch(batch_size=batch_size, splits=['train'], tokenizer=tok, seed=seed, history=history, blind=blind) # Creat validation environments val_envs = { split: (R2RBatch(batch_size=batch_size, splits=[split], tokenizer=tok, seed=seed, history=history, blind=blind), Evaluation([split], seed=seed)) for split in ['val_seen'] } # Build models and train enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda() decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), action_embedding_size, hidden_size, dropout_ratio, feature_size).cuda() train(eval_type, train_env, encoder, decoder, n_iters, seed, history, feedback_method, max_episode_len, max_input_length, model_prefix, val_envs=val_envs, debug=debug)
def eval_simple_agents(): """ Run simple baselines on each split. """ for split in ["train", "val_seen", "val_unseen"]: env = R2RBatch( Feature(None, False), False, False, 6, False, "lstm", batch_size=1, splits=[split], tokenizer=None, ) ev = Evaluation([split], encoder_type="lstm") # subgoal=False) for agent_type in ["Stop", "Shortest", "Random"]: outfile = "%s%s_%s_agent.json" % (RESULT_DIR, split, agent_type.lower()) agent = BaseAgent.get_agent(agent_type)(env, outfile) agent.test() agent.write_results() score_summary, _ = ev.score(outfile) print("\n%s" % agent_type) pp.pprint(score_summary)
def make_more_train_env(args, train_vocab_path, train_splits): setup(args.seed) image_features_list = ImageFeatures.from_args(args) vocab = read_vocab(train_vocab_path) tok = Tokenizer(vocab=vocab) train_env = R2RBatch(image_features_list, batch_size=args.batch_size, splits=train_splits, tokenizer=tok) return train_env
def make_env_and_models(args, train_vocab_path, train_splits, test_splits): setup(args.seed) image_features_list = ImageFeatures.from_args(args) vocab = read_vocab(train_vocab_path) tok = Tokenizer(vocab=vocab) train_env = R2RBatch(image_features_list, batch_size=args.batch_size, splits=train_splits, tokenizer=tok) if len(train_splits) > 0 else None test_envs = { split: (R2RBatch(image_features_list, batch_size=args.batch_size, splits=[split], tokenizer=tok), eval.Evaluation([split])) for split in test_splits} agent = make_follower(args, vocab) agent.env = train_env return train_env, test_envs, agent
def hard_negative(): setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) if args.fast_train: feat_dict = read_img_features(features_fast) else: feat_dict = read_img_features(features) candidate_dict = utils.read_candidates(CANDIDATE_FEATURES) gt_train_env, gt_val_seen_env, gt_val_unseen_env = gt_envs = list( R2RBatch(feat_dict, candidate_dict, batch_size=args.batchSize, splits=[split], tokenizer=tok) for split in ['train', 'val_seen', 'val_unseen']) neg_train_env, neg_val_seen_env, neg_val_unseen_env = neg_envs = list( R2RBatch(feat_dict, candidate_dict, batch_size=args.batchSize, splits=[split + "_instneg", split + "_pathneg"], tokenizer=tok) for split in ['train', 'val_seen', 'val_unseen']) arbiter_train_env, arbiter_val_seen_env, arbiter_val_unseen_env = ( ArbiterBatch(gt_env, neg_env, args.batchSize // 2, args.batchSize // 2, feat_dict, candidate_dict, batch_size=args.batchSize, splits=[], tokenizer=tok) for gt_env, neg_env in zip(gt_envs, neg_envs)) train_arbiter(arbiter_train_env, tok, args.iters, val_envs={ 'train': arbiter_train_env, 'val_seen': arbiter_val_seen_env, 'val_unseen': arbiter_val_unseen_env, })
def train_val_augment(test_only=False): """ Train the listener with the augmented data """ setup() # Create a batch training environment that will also preprocess text tok_bert = get_tokenizer(args) # Load the env img features feat_dict = read_img_features(features, test_only=test_only) if test_only: featurized_scans = None val_env_names = ['val_train_seen'] else: featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) val_env_names = ['val_train_seen', 'val_seen', 'val_unseen'] # Load the augmentation data aug_path = args.aug # Create the training environment train_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=['train'], tokenizer=tok_bert) aug_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=[aug_path], tokenizer=tok_bert, name='aug') # Setup the validation data val_envs = { split: (R2RBatch(feat_dict, batch_size=args.batchSize, splits=[split], tokenizer=tok_bert), Evaluation([split], featurized_scans, tok_bert)) for split in val_env_names } # Start training train(train_env, tok_bert, args.iters, val_envs=val_envs, aug_env=aug_env)
def train_val(): ''' Train on the training set, and validate on seen and unseen splits. ''' # args.fast_train = True setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) if args.fast_train: feat_dict = read_img_features(features_fast) else: feat_dict = read_img_features(features) candidate_dict = utils.read_candidates(CANDIDATE_FEATURES) featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) train_env = R2RBatch(feat_dict, candidate_dict, batch_size=args.batchSize, splits=['train'], tokenizer=tok) from collections import OrderedDict val_envs = OrderedDict(((split, (R2RBatch(feat_dict, candidate_dict, batch_size=args.batchSize, splits=[split], tokenizer=tok), Evaluation([split], featurized_scans, tok))) for split in ['val_seen', 'val_unseen', 'train'])) if args.train == 'listener': train(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'validlistener': valid(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'speaker': train_speaker(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'validspeaker': valid_speaker(tok, val_envs) else: assert False
def train_val(test_only=False): ''' Train on the training set, and validate on seen and unseen splits. ''' setup() tok = get_tokenizer(args) feat_dict = read_img_features(features, test_only=test_only) if test_only: featurized_scans = None val_env_names = ['val_train_seen'] else: featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) val_env_names = ['val_train_seen', 'val_seen', 'val_unseen'] train_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=['train'], tokenizer=tok) from collections import OrderedDict if args.submit: val_env_names.append('test') else: pass val_envs = OrderedDict(((split, (R2RBatch(feat_dict, batch_size=args.batchSize, splits=[split], tokenizer=tok), Evaluation([split], featurized_scans, tok))) for split in val_env_names)) if args.train == 'listener': train(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'validlistener': valid(train_env, tok, val_envs=val_envs) else: assert False
def eval_simple_agents(): ''' Run simple baselines on each split. ''' for split in ['train', 'val_seen', 'val_unseen']: env = R2RBatch(None, batch_size=1, splits=[split]) ev = Evaluation([split]) for agent_type in ['Stop', 'Shortest', 'Random']: outfile = '%s%s_%s_agent.json' % (RESULT_DIR, split, agent_type.lower()) agent = BaseAgent.get_agent(agent_type)(env, outfile) agent.test() agent.write_results() score_summary, _ = ev.score(outfile) print('\n%s' % agent_type) pp.pprint(score_summary)
def train_val(): ''' Train on the training set, and validate on seen and unseen splits. ''' setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH) train_env = R2RBatch(features, batch_size=batch_size, splits=['train'], tokenizer=tok) # Creat validation environments val_envs = { split: (R2RBatch(features, batch_size=batch_size, splits=[split], tokenizer=tok), Evaluation([split])) for split in ['val_seen', 'val_unseen'] } # Build models and train enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size train(train_env, len(vocab), n_iters, val_envs=val_envs)
def train_vae(): """Train vae for sub-policy(z->policy)""" setup() vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) feat_dict = read_img_features(features) featurized_scans = set([key.split("_")[0] for key in list(feat_dict.keys())]) # Create a batch training environment that will also preprocess text train_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=['sub_train'],tokenizer=tok) writer = SummaryWriter(logdir=log_dir) obs_dim = train_env.feature_size+args.angle_feat_size # TODO: latent_dim ablation path_len = 2 # fix path_len = 2, total path_len = 6 vae = BaseVAE(train_env, tok, obs_dim, args.vae_latent_dim).cuda() vae.train()
def go(self): self.envs = {} for key in self.env_args: # print('env', key) feature_store, data, scans, bs = self.env_args[key] env = R2RBatch(feature_store, bs, splits=None, tokenizer=self.master_model.tok, name='sub_train', record_scans=scans) env.data = data self.envs[key] = env k = key while True: _ = self.sync_Q.get() self.model = agent_v6.SSM(self.envs[k], self.master_model.results_path, self.master_model.tok, self.master_model.episode_len, self.master_model.max_node, self.master_model.args) self._sync_local_with_global() for model in self.model.models: model.eval() for name in self.envs: # print('doing', name) iters = None if name != 'train' else 20 # iters = 1 self.model.env = self.envs[name] self.model.test(use_dropout=False, feedback='argmax', iters=iters) res = self.model.get_results() self.res_Q.put((name, res)) del self.model self.model = None torch.cuda.empty_cache()
def eval_simple_agents(args): ''' Run simple baselines on each split. ''' img_features = ImageFeatures.from_args(args) for split in ['train', 'val_seen', 'val_unseen', 'test']: env = R2RBatch(img_features, batch_size=1, splits=[split], prefix=args.prefix) ev = Evaluation([split]) for agent_type in ['Stop', 'Shortest', 'Random']: outfile = '%s%s_%s_agent.json' % (train.RESULT_DIR, split, agent_type.lower()) agent = BaseAgent.get_agent(agent_type)(env, outfile) agent.test() agent.write_results() score_summary, _, _ = ev.score_file(outfile) print('\n%s' % agent_type) pp.pprint(score_summary)
def test_submission(): ''' Train on combined training and validation sets, and generate test submission. ''' setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH) # train_env = R2RBatch(features, batch_size=batch_size, splits=['train', 'val_seen', 'val_unseen'], tokenizer=tok) # Build models and train enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda() decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), action_embedding_size, hidden_size, dropout_ratio).cuda() # train(train_env, encoder, decoder, n_iters) encoder.load_state_dict(torch.load('%s/seq2seq_enc.pt' % (SNAPSHOT_DIR))) decoder.load_state_dict(torch.load('%s/seq2seq_dec.pt' % (SNAPSHOT_DIR))) # Generate test submission test_env = R2RBatch(features, batch_size=batch_size, splits=['test1'], tokenizer=tok) agent = Seq2SeqAgent(test_env, "", encoder, decoder, max_episode_len) agent.results_path = '%s%s_%s_iter_%d.json' % (RESULT_DIR, 'seq2seq', 'test1', 20000) agent.test(use_dropout=False, feedback='argmax') agent.write_results()
def train(): print('current directory', os.getcwd()) os.chdir('..') print('current directory', os.getcwd()) visible_gpu = "0,1,2,3" # avaiable GPUs, GPU0 is for processing gradient accumulating os.environ["CUDA_VISIBLE_DEVICES"] = visible_gpu args.name = 'SSM' args.attn = 'soft' args.train = 'listener' args.featdropout = 0.4 args.angle_feat_size = 128 args.feedback = 'sample' args.ml_weight = 0.2 args.sub_out = 'max' args.dropout = 0.5 args.optim = 'rms' args.lr = 1e-4 args.iters = 80000 args.maxAction = 15 args.batchSize = 16 args.aug = 'tasks/R2R/data/aug_paths.json' args.self_train = True args.featdropout = 0.4 args.iters = 200000 if args.optim == 'rms': print("Optimizer: Using RMSProp") args.optimizer = torch.optim.RMSprop elif args.optim == 'adam': print("Optimizer: Using Adam") args.optimizer = torch.optim.Adam elif args.optim == 'sgd': print("Optimizer: sgd") args.optimizer = torch.optim.SGD log_dir = 'snap/%s' % args.name if not os.path.exists(log_dir): os.makedirs(log_dir) TRAIN_VOCAB = 'tasks/R2R/data/train_vocab.txt' TRAINVAL_VOCAB = 'tasks/R2R/data/trainval_vocab.txt' IMAGENET_FEATURES = 'img_features/ResNet-152-imagenet.tsv' if args.features == 'imagenet': features = IMAGENET_FEATURES if args.fast_train: name, ext = os.path.splitext(features) features = name + "-fast" + ext print(args) def setup(): torch.manual_seed(1) torch.cuda.manual_seed(1) # Check for vocabs if not os.path.exists(TRAIN_VOCAB): write_vocab(build_vocab(splits=['train']), TRAIN_VOCAB) if not os.path.exists(TRAINVAL_VOCAB): write_vocab( build_vocab(splits=['train', 'val_seen', 'val_unseen']), TRAINVAL_VOCAB) # setup() vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) feat_dict = read_img_features(features) # Create the training environment train_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=['train'], tokenizer=tok) aug_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=[args.aug], tokenizer=tok) train_env = {'train': train_env, 'aug': aug_env} load_path = None torch.autograd.set_detect_anomaly(True) learner = Learner(train_env, "", tok, args.maxAction, process_num=4, max_node=17, visible_gpu=visible_gpu) if load_path is not None: print('load checkpoint from:', load_path) learner.load(load_path) learner.train()
def train_val(): ''' Train on the training set, and validate on seen and unseen splits. ''' # args.fast_train = True setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) feat_dict = read_img_features(features) featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) if not args.test_obj: print('Loading compact pano-caffe object features ... (~3 seconds)') import pickle as pkl with open( '/egr/research-hlr/joslin/Matterdata/v1/scans/img_features/pano_object_class.pkl', 'rb') as f_pc: pano_caffe = pkl.load(f_pc) else: pano_caffe = None train_env = R2RBatch(feat_dict, pano_caffe, batch_size=args.batchSize, splits=['train'], tokenizer=tok) from collections import OrderedDict val_env_names = ['val_unseen', 'val_seen'] if args.submit: val_env_names.append('test') else: pass # if you want to test "train", just uncomment this #val_env_names.append('train') if not args.beam: val_env_names.append("train") val_envs = OrderedDict(((split, (R2RBatch(feat_dict, pano_caffe, batch_size=args.batchSize, splits=[split], tokenizer=tok), Evaluation([split], featurized_scans, tok))) for split in val_env_names)) # import sys # sys.exit() if args.train == 'listener': train(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'validlistener': if args.beam: beam_valid(train_env, tok, val_envs=val_envs) else: valid(train_env, tok, val_envs=val_envs) elif args.train == 'speaker': train_speaker(train_env, tok, args.iters, val_envs=val_envs) elif args.train == 'validspeaker': valid_speaker(tok, val_envs) else: assert False
def test(): print('current directory', os.getcwd()) os.chdir('..') print('current directory', os.getcwd()) visible_gpu = "0" os.environ["CUDA_VISIBLE_DEVICES"] = visible_gpu args.name = 'SSM' args.attn = 'soft' args.train = 'listener' args.featdropout = 0.3 args.angle_feat_size = 128 args.feedback = 'sample' args.ml_weight = 0.2 args.sub_out = 'max' args.dropout = 0.5 args.optim = 'adam' args.lr = 3e-4 args.iters = 80000 args.maxAction = 35 args.batchSize = 24 args.target_batch_size = 24 args.self_train = True args.aug = 'tasks/R2R/data/aug_paths.json' args.speaker = 'snap/speaker/state_dict/best_val_unseen_bleu' args.featdropout = 0.4 args.iters = 200000 if args.optim == 'rms': print("Optimizer: Using RMSProp") args.optimizer = torch.optim.RMSprop elif args.optim == 'adam': print("Optimizer: Using Adam") args.optimizer = torch.optim.Adam elif args.optim == 'sgd': print("Optimizer: sgd") args.optimizer = torch.optim.SGD log_dir = 'snap/%s' % args.name if not os.path.exists(log_dir): os.makedirs(log_dir) logdir = '%s/eval' % log_dir writer = SummaryWriter(logdir=logdir) TRAIN_VOCAB = 'tasks/R2R/data/train_vocab.txt' TRAINVAL_VOCAB = 'tasks/R2R/data/trainval_vocab.txt' IMAGENET_FEATURES = 'img_features/ResNet-152-imagenet.tsv' if args.features == 'imagenet': features = IMAGENET_FEATURES if args.fast_train: name, ext = os.path.splitext(features) features = name + "-fast" + ext print(args) def setup(): torch.manual_seed(1) torch.cuda.manual_seed(1) # Check for vocabs if not os.path.exists(TRAIN_VOCAB): write_vocab(build_vocab(splits=['train']), TRAIN_VOCAB) if not os.path.exists(TRAINVAL_VOCAB): write_vocab( build_vocab(splits=['train', 'val_seen', 'val_unseen']), TRAINVAL_VOCAB) # setup() vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) feat_dict = read_img_features(features) print('start extract keys...') featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) print('keys extracted...') val_envs = { split: R2RBatch(feat_dict, batch_size=args.batchSize, splits=[split], tokenizer=tok) for split in ['train', 'val_seen', 'val_unseen'] } evaluators = { split: Evaluation([split], featurized_scans, tok) for split in ['train', 'val_seen', 'val_unseen'] } learner = Learner(val_envs, "", tok, args.maxAction, process_num=2, visible_gpu=visible_gpu) learner.eval_init() for i in range(0, 10000): ckpt = '%s/state_dict/Iter_%06d' % (log_dir, (i + 1) * 100) while not os.path.exists(ckpt): time.sleep(10) time.sleep(10) learner.load_eval(ckpt) results = learner.eval() loss_str = '' for key in results: evaluator = evaluators[key] result = results[key] score_summary, _ = evaluator.score(result) loss_str += ", %s \n" % key for metric, val in score_summary.items(): loss_str += ', %s: %.3f' % (metric, val) writer.add_scalar('%s/%s' % (metric, key), val, (i + 1) * 100) loss_str += '\n' print(loss_str)
'prog_monitor': True, 'dev_monitor': False, 'attn_only_verb': False, 'soft_align': False, 'scorer': None, 'load_follower': 'tasks/R2R/experiments/pretrain_cgPm_pertraj/snapshots/follower_cg_pm_sample2step_imagenet_mean_pooled_1heads_train_iter_1900_val_unseen-success_rate=0.478', 'language': 'en-OLD', 'prefix': 'R2R', }) image_features_list = ImageFeatures.from_args(args) vocab = read_vocab(TRAIN_VOCAB, args.language) tok = Tokenizer(vocab) env = R2RBatch(image_features_list, batch_size=256, splits=['train', 'val_seen', 'val_unseen'], tokenizer=tok) env.batch = env.data from eval import Evaluation test_envs = { split: (R2RBatch(image_features_list, batch_size=64, splits=[split], tokenizer=tok), Evaluation([split])) for split in ['val_unseen'] } agent = make_follower(args, vocab)
def make_env_and_models(args, train_vocab_path, train_splits, test_splits, test_instruction_limit=None): setup() image_features_list = ImageFeatures.from_args(args) vocab = read_vocab(train_vocab_path) tok = Tokenizer(vocab=vocab) train_env = R2RBatch(image_features_list, batch_size=batch_size, splits=train_splits, tokenizer=tok) train_env.data.extend(hardNeg_train) # extend train data and shuffle random.shuffle(train_env.data) enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size glove = np.load(glove_path) feature_size = FEATURE_SIZE # ============================================================================= # visEncoder = try_cuda(CompatVisEncoderLSTM( # action_embedding_size, feature_size, enc_hidden_size, dropout_ratio, # bidirectional=bidirectional)) # ============================================================================= visEncoder = try_cuda( SpeakerEncoderLSTM(action_embedding_size, feature_size, enc_hidden_size, dropout_ratio, bidirectional=bidirectional)) # ============================================================================= # lanEncoder = try_cuda(CompatLanEncoderLSTM( # len(vocab), word_embedding_size, enc_hidden_size, vocab_pad_idx, # dropout_ratio, bidirectional=True, glove=glove)) # ============================================================================= lanEncoder = try_cuda( EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, vocab_pad_idx, dropout_ratio, bidirectional=False, glove=glove)) dotSim = try_cuda(dotSimilarity(batch_size, enc_hidden_size)) #visEncoder.load_state_dict(torch.load('tasks/R2R/snapshots/release/speaker_final_release_enc')) #lanEncoder.load_state_dict(torch.load('tasks/R2R/snapshots/release/follower_final_release_enc')) test_envs = { split: (R2RBatch(image_features_list, batch_size=batch_size, splits=[split], tokenizer=tok, instruction_limit=test_instruction_limit), eval_speaker.SpeakerEvaluation( [split], instructions_per_path=test_instruction_limit)) for split in test_splits } #test_envs['val_seen'][0].data.extend(hardNeg_val_seen) test_envs['val_unseen'][0].data.extend(hardNeg_val_unseen) test_envs['val_unseen'][0].data = test_envs['val_unseen'][0].data[ 3000:4000] return train_env, test_envs, visEncoder, lanEncoder, dotSim