def test_submission(): ''' Train on combined training and validation sets, and generate test submission. ''' setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAINVAL_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH) train_env = R2RBatch(features, batch_size=batch_size, splits=['train', 'val_seen', 'val_unseen'], tokenizer=tok) # Build models and train enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda() decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), action_embedding_size, hidden_size, dropout_ratio).cuda() train(train_env, encoder, decoder, n_iters) # Generate test submission test_env = R2RBatch(features, batch_size=batch_size, splits=['test'], tokenizer=tok) agent = Seq2SeqAgent(test_env, "", encoder, decoder, max_episode_len) agent.results_path = '%s%s_%s_iter_%d.json' % (RESULT_DIR, model_prefix, 'test', 20000) agent.test(use_dropout=False, feedback='argmax') agent.write_results()
def train_val(): ''' Train on the training set, and validate on seen and unseen splits. ''' setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH) train_env = R2RBatch(features, batch_size=batch_size, splits=['train'], tokenizer=tok) # Creat validation environments val_envs = { split: (R2RBatch(features, batch_size=batch_size, splits=[split], tokenizer=tok), Evaluation([split])) for split in ['val_seen', 'val_unseen'] } # Build models and train enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda() decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), action_embedding_size, hidden_size, dropout_ratio).cuda() train(train_env, encoder, decoder, n_iters, val_envs=val_envs)
def train_all(eval_type, seed, max_episode_len, max_input_length, feedback, n_iters, prefix, blind, debug, train_vocab, trainval_vocab, batch_size, action_embedding_size, target_embedding_size, bidirectional, dropout_ratio, weight_decay, feature_size, hidden_size, word_embedding_size, lr, result_dir, snapshot_dir, plot_dir, train_splits, test_splits): ''' Train on the training set, and validate on the test split. ''' setup(seed, train_vocab, trainval_vocab) # Create a batch training environment that will also preprocess text vocab = read_vocab(train_vocab if eval_type == 'val' else trainval_vocab) tok = Tokenizer(vocab=vocab, encoding_length=max_input_length) train_env = R2RBatch(batch_size=batch_size, splits=train_splits, tokenizer=tok, seed=seed, blind=blind) # Creat validation environments val_envs = { split: (R2RBatch(batch_size=batch_size, splits=[split], tokenizer=tok, seed=seed, blind=blind), Evaluation([split], seed=seed)) for split in test_splits } # Build models and train enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda() decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), action_embedding_size, hidden_size, dropout_ratio, feature_size).cuda() train(eval_type, train_env, encoder, decoder, n_iters, seed, feedback, max_episode_len, max_input_length, prefix, blind, lr, weight_decay, result_dir, snapshot_dir, plot_dir, val_envs=val_envs, debug=debug)
def train_test(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind): ''' Train on the training set, and validate on the test split. ''' setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAINVAL_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH) train_env = R2RBatch(features, batch_size=batch_size, splits=['train', 'val_seen', 'val_unseen'], tokenizer=tok, path_type=path_type, history=history, blind=blind) # Creat validation environments val_envs = { split: (R2RBatch(features, batch_size=batch_size, splits=[split], tokenizer=tok, path_type=path_type, history=history, blind=blind), Evaluation([split], path_type=path_type)) for split in ['test'] } # Build models and train enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda() decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), action_embedding_size, hidden_size, dropout_ratio).cuda() train(train_env, encoder, decoder, n_iters, path_type, history, feedback_method, max_episode_len, MAX_INPUT_LENGTH, model_prefix, val_envs=val_envs)
def train_val(eval_type, seed, max_episode_len, history, max_input_length, feedback_method, n_iters, model_prefix, blind, debug): ''' Train on the training set, and validate on seen and unseen splits. ''' setup(seed) # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=max_input_length) train_env = R2RBatch(batch_size=batch_size, splits=['train'], tokenizer=tok, seed=seed, history=history, blind=blind) # Creat validation environments val_envs = { split: (R2RBatch(batch_size=batch_size, splits=[split], tokenizer=tok, seed=seed, history=history, blind=blind), Evaluation([split], seed=seed)) for split in ['val_seen'] } # Build models and train enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda() decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), action_embedding_size, hidden_size, dropout_ratio, feature_size).cuda() train(eval_type, train_env, encoder, decoder, n_iters, seed, history, feedback_method, max_episode_len, max_input_length, model_prefix, val_envs=val_envs, debug=debug)
def valid_speaker(tok, val_envs): import tqdm listner = Seq2SeqAgent(None, "", tok, args.maxAction) speaker = Speaker(None, listner, tok) speaker.load(os.path.join(log_dir, 'state_dict', 'best_val_seen_bleu')) # speaker.load(os.path.join(log_dir, 'state_dict', 'best_val_unseen_loss')) for args.beam in [False, True]: print("Using Beam Search %s" % args.beam) for env_name, (env, evaluator) in val_envs.items(): if env_name == 'train': continue print("............ Evaluating %s ............." % env_name) speaker.env = env path2inst, loss, word_accu, sent_accu = speaker.valid( beam=args.beam, wrapper=tqdm.tqdm) path_id = next(iter(path2inst.keys())) print("Inference: ", tok.decode_sentence(path2inst[path_id])) print("GT: ", evaluator.gt[path_id]['instructions']) bleu_score, precisions, _ = evaluator.bleu_score(path2inst) print( "Bleu, Loss, Word_Accu, Sent_Accu for %s is: %0.4f, %0.4f, %0.4f, %0.4f" % (env_name, bleu_score, loss, word_accu, sent_accu)) print( "Bleu 1: %0.4f Bleu 2: %0.4f, Bleu 3 :%0.4f, Bleu 4: %0.4f" % tuple(precisions)) print("Average Length %0.4f" % utils.average_length(path2inst))
def valid(train_env, tok, n_iters, log_every=100, val_envs={}): ''' Train on training set, validating on both seen and unseen. ''' agent = Seq2SeqAgent(train_env, "", tok, args.maxAction) print("Loaded the listener model at iter % d" % agent.load(args.load)) for env_name, (env, evaluator) in val_envs.items(): agent.logs = defaultdict(list) agent.env = env # Get validation loss under the same conditions as training # iters = None if args.fast_train or env_name != 'train' else 20 # 20 * 64 = 1280 iters = None # agent.test(use_dropout=True, feedback=feedback_method, allow_cheat=True, iters=iters) # val_losses = np.array(agent.losses) # val_loss_avg = np.average(val_losses) # loss_str += ', %s loss: %.4f' % (env_name), val_loss_avg) # Get validation distance from goal under test evaluation conditions # agent.logs['circle'] = 0 agent.test(use_dropout=False, feedback='argmax', iters=iters) # print("In env %s, the circle cases are %d(%0.4f)" % (env_name, agent.logs['circle'], # agent.logs['circle']*1./env.size())) result = agent.get_results() score_summary, _ = evaluator.score(result) loss_str = "%s: " % env_name for metric, val in score_summary.items(): loss_str += ', %s: %.3f' % (metric, val) print(loss_str)
def valid(train_env, tok, val_envs={}): agent = Seq2SeqAgent(train_env, "", tok, args.maxAction) print("Loaded the listener model at iter %d from %s" % (agent.load(args.load), args.load)) for env_name, (env, evaluator) in val_envs.items(): agent.logs = defaultdict(list) agent.env = env iters = None agent.test(use_dropout=False, feedback='argmax', iters=iters) result = agent.get_results() if env_name != '': score_summary, _ = evaluator.score(result) loss_str = "Env name: %s" % env_name for metric, val in score_summary.items(): loss_str += ', %s: %.4f' % (metric, val) print(loss_str) if args.submit: json.dump(result, open(os.path.join(log_dir, "submit_%s.json" % env_name), 'w'), sort_keys=True, indent=4, separators=(',', ': '))
def train_val(): ''' Train on the training set, and validate on seen and unseen splits. ''' setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH) train_env = R2RBatch(features, batch_size=batch_size, splits=['train'], tokenizer=tok) # Creat validation environments val_envs = {split: (R2RBatch(features, batch_size=batch_size, splits=[split], tokenizer=tok), Evaluation([split])) for split in ['val_seen', 'val_unseen']} # Build models and train enc_hidden_size = hidden_size//2 if bidirectional else hidden_size encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda() decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), action_embedding_size, hidden_size, dropout_ratio).cuda() train(train_env, encoder, decoder, n_iters, val_envs=val_envs)
def test(): setup() vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) listner = Seq2SeqAgent(None, "", tok, args.maxAction) start_iter = 0 if args.load is not None: print("LOAD THE DICT from %s" % args.load) start_iter = listner.load(os.path.join(args.load))
def train_speaker(train_env, tok, n_iters, log_every=500, val_envs={}): writer = SummaryWriter(logdir=log_dir) listner = Seq2SeqAgent(train_env, "", tok, args.maxAction) speaker = Speaker(train_env, listner, tok) if args.fast_train: log_every = 40 best_bleu = defaultdict(lambda: 0) best_loss = defaultdict(lambda: 1232) for idx in range(0, n_iters, log_every): interval = min(log_every, n_iters - idx) # Train for log_every interval speaker.env = train_env speaker.train(interval) # Train interval iters print() print("Iter: %d" % idx) # Evaluation for env_name, (env, evaluator) in val_envs.items(): if 'train' in env_name: # Ignore the large training set for the efficiency continue print("............ Evaluating %s ............." % env_name) speaker.env = env path2inst, loss, word_accu, sent_accu = speaker.valid() path_id = next(iter(path2inst.keys())) print("Inference: ", tok.decode_sentence(path2inst[path_id])) print("GT: ", evaluator.gt[str(path_id)]['instructions']) bleu_score, precisions = evaluator.bleu_score(path2inst) # Tensorboard log writer.add_scalar("bleu/%s" % (env_name), bleu_score, idx) writer.add_scalar("loss/%s" % (env_name), loss, idx) writer.add_scalar("word_accu/%s" % (env_name), word_accu, idx) writer.add_scalar("sent_accu/%s" % (env_name), sent_accu, idx) writer.add_scalar("bleu4/%s" % (env_name), precisions[3], idx) # Save the model according to the bleu score if bleu_score > best_bleu[env_name]: best_bleu[env_name] = bleu_score print('Save the model with %s BEST env bleu %0.4f' % (env_name, bleu_score)) speaker.save(idx, os.path.join(log_dir, 'state_dict', 'best_%s_bleu' % env_name)) if loss < best_loss[env_name]: best_loss[env_name] = loss print('Save the model with %s BEST env loss %0.4f' % (env_name, loss)) speaker.save(idx, os.path.join(log_dir, 'state_dict', 'best_%s_loss' % env_name)) # Screen print out print("Bleu 1: %0.4f Bleu 2: %0.4f, Bleu 3 :%0.4f, Bleu 4: %0.4f" % tuple(precisions))
def prepare_r2r_data(): ''' Prepare data from the training set, valseen and val_unseen splits. ''' torch.manual_seed(1) torch.cuda.manual_seed(1) feature_data_store = imgfeat_r2r( '/media/diskpart2/oscar_data/r2r_vln/train.yaml') featurized_scans = feature_data_store.get_feat_scans() train_env = R2RBatch(feature_data_store, batch_size=args.batchSize, splits=['train']) listner = Seq2SeqAgent(train_env, "", tok, args.maxAction) listner.env = train_env listner.train(interval, feedback=args.feedback) # Train interval iters
def test_submission(): ''' Train on combined training and validation sets, and generate test submission. ''' setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAINVAL_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH) train_env = R2RBatch(features, batch_size=batch_size, splits=['train', 'val_seen', 'val_unseen'], tokenizer=tok) # Build models and train enc_hidden_size = hidden_size//2 if bidirectional else hidden_size encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda() decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), action_embedding_size, hidden_size, dropout_ratio).cuda() train(train_env, encoder, decoder, n_iters) # Generate test submission test_env = R2RBatch(features, batch_size=batch_size, splits=['test'], tokenizer=tok) agent = Seq2SeqAgent(test_env, "", encoder, decoder, max_episode_len) agent.results_path = '%s%s_%s_iter_%d.json' % (RESULT_DIR, model_prefix, 'test', 20000) agent.test(use_dropout=False, feedback='argmax') agent.write_results()
def infer_speaker(env, tok): import tqdm from utils import load_datasets listner = Seq2SeqAgent(env, "", tok, args.maxAction) speaker = Speaker(env, listner, tok) speaker.load(args.load) dataset = load_datasets(env.splits) key_map = {} for i, item in enumerate(dataset): key_map[item["path_id"]] = i path2inst = speaker.get_insts(wrapper=tqdm.tqdm) for path_id in path2inst.keys(): speaker_pred = tok.decode_sentence(path2inst[path_id]) dataset[key_map[path_id]]['instructions'] = [speaker_pred] with open("tasks/R2R/data/aug_paths_unseen_infer.json", "w") as f: json.dump(dataset, f, indent=4, sort_keys=True)
def valid_speaker(train_env, tok, val_envs): import tqdm listner = Seq2SeqAgent(train_env, "", tok, args.maxAction) speaker = Speaker(train_env, listner, tok) speaker.load(args.load) for env_name, (env, evaluator) in val_envs.items(): if env_name == 'train': continue print("............ Evaluating %s ............." % env_name) speaker.env = env path2inst, loss, word_accu, sent_accu = speaker.valid(wrapper=tqdm.tqdm) path_id = next(iter(path2inst.keys())) print("Inference: ", tok.decode_sentence(path2inst[path_id])) print("GT: ", evaluator.gt[str(path_id)]['instructions']) bleu_score, precisions = evaluator.bleu_score(path2inst) print(len(env.data), len(path2inst.keys())) import pdb; pdb.set_trace()
def filter_arbiter(valid_env, aug_env, tok): import tqdm listner = Seq2SeqAgent(aug_env, "", tok, args.maxAction) arbiter = Arbiter(aug_env, listner, tok) # Load the model arbiter.load(args.load) # Create Dir os.makedirs(os.path.join(log_dir, 'arbiter_result'), exist_ok=True) # Get the prob for the validation env (may be used for determining the threshold) # arbiter.env = valid_env # valid_inst2prob = arbiter.valid(wrapper=tqdm.tqdm) # json.dump(valid_inst2prob, open(os.path.join(log_dir, 'arbiter_result', 'valid_prob.json'), 'w')) # Get the prob of the augmentation data arbiter.env = aug_env aug_inst2prob = arbiter.valid(wrapper=tqdm.tqdm) aug_data = [datum.copy() for datum in aug_env.data] for datum in aug_data: datum['instructions'] = [datum['instructions']] datum.pop( 'instr_encoding') # Remove the redundant components in the dataset for datum in aug_data: datum['prob'] = aug_inst2prob[datum['instr_id']] json.dump( aug_data, open(os.path.join(log_dir, 'arbiter_result', 'aug_prob.json'), 'w')) # Create the Dataset data = [ datum for datum in aug_data if aug_inst2prob[datum['instr_id']] > 0.5 ] for datum in aug_data: datum.pop('instr_id') return data
def valid_speaker(tok, val_envs): import tqdm listner = Seq2SeqAgent(None, "", tok, args.maxAction) speaker = Speaker(None, listner, tok) speaker.load(args.load) for env_name, (env, evaluator) in val_envs.items(): if env_name == 'train': continue print("............ Evaluating %s ............." % env_name) speaker.env = env path2inst, loss, word_accu, sent_accu = speaker.valid(wrapper=tqdm.tqdm) path_id = next(iter(path2inst.keys())) print("Inference: ", tok.decode_sentence(path2inst[path_id])) print("GT: ", evaluator.gt[path_id]['instructions']) pathXinst = list(path2inst.items()) name2score = evaluator.lang_eval(pathXinst, no_metrics={'METEOR'}) score_string = " " for score_name, score in name2score.items(): score_string += "%s_%s: %0.4f " % (env_name, score_name, score) print("For env %s" % env_name) print(score_string) print("Average Length %0.4f" % utils.average_length(path2inst))
features = IMAGENET_FEATURES CANDIDATE_FEATURES = IMAGENET_CANDIDATE_FEATURES #load features and feature_candidates feature_dict = read_img_features(features) candidate_dict = utils.read_candidates(CANDIDATE_FEATURES) #load glove and vocab glove_path = 'tasks/R2R/data/train_glove.npy' glove = np.load(glove_path) vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab) #intantialize listener and load pre-trained model listner = Seq2SeqAgent(None, "", tok, feat=feature_dict, candidates=candidate_dict, episode_len=args.maxAction) listner.load( 'snap/long/ablation_cand_0208_accuGrad_envdrop_ty/state_dict/best_val_unseen' ) # nav graph loader from env.py def load_nav_graphs(scans): ''' Load connectivity graph for each scan ''' def distance(pose1, pose2): ''' Euclidean distance between two graph poses ''' return ((pose1['pose'][3]-pose2['pose'][3])**2\ + (pose1['pose'][7]-pose2['pose'][7])**2\ + (pose1['pose'][11]-pose2['pose'][11])**2)**0.5
def train(train_env, encoder, decoder, n_iters, path_type, history, feedback_method, max_episode_len, MAX_INPUT_LENGTH, model_prefix, log_every=100, val_envs=None): ''' Train on training set, validating on both seen and unseen. ''' if val_envs is None: val_envs = {} if agent_type == 'seq2seq': agent = Seq2SeqAgent(train_env, "", encoder, decoder, max_episode_len) else: sys.exit("Unrecognized agent_type '%s'" % agent_type) print 'Training a %s agent with %s feedback' % (agent_type, feedback_method) encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate, weight_decay=weight_decay) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate, weight_decay=weight_decay) data_log = defaultdict(list) start = time.time() print 'Start training' for idx in range(0, n_iters, log_every): interval = min(log_every,n_iters-idx) iter = idx + interval data_log['iteration'].append(iter) # Train for log_every interval agent.train(encoder_optimizer, decoder_optimizer, interval, feedback=feedback_method) train_losses = np.array(agent.losses) assert len(train_losses) == interval train_loss_avg = np.average(train_losses) data_log['train loss'].append(train_loss_avg) loss_str = 'train loss: %.4f' % train_loss_avg # Run validation for env_name, (env, evaluator) in val_envs.iteritems(): agent.env = env agent.results_path = '%s%s_%s_iter_%d.json' % (RESULT_DIR, model_prefix, env_name, iter) # Get validation loss under the same conditions as training agent.test(use_dropout=True, feedback=feedback_method, allow_cheat=True) val_losses = np.array(agent.losses) val_loss_avg = np.average(val_losses) data_log['%s loss' % env_name].append(val_loss_avg) # Get validation distance from goal under test evaluation conditions agent.test(use_dropout=False, feedback='argmax') agent.write_results() score_summary, _ = evaluator.score(agent.results_path) loss_str = ', %s loss: %.4f' % (env_name, val_loss_avg) for metric, val in score_summary.iteritems(): data_log['%s %s' % (env_name, metric)].append(val) if metric in ['success_rate', 'oracle success_rate', 'oracle path_success_rate', 'dist_to_end_reduction']: loss_str += ', %s: %.3f' % (metric, val) agent.env = train_env print('%s (%d %d%%) %s' % (timeSince(start, float(iter)/n_iters), iter, float(iter)/n_iters*100, loss_str)) df = pd.DataFrame(data_log) df.set_index('iteration') df_path = '%s%s-log.csv' % (PLOT_DIR, model_prefix) df.to_csv(df_path) split_string = "-".join(train_env.splits) enc_path = '%s%s_%s_enc_iter_%d' % (SNAPSHOT_DIR, model_prefix, split_string, iter) dec_path = '%s%s_%s_dec_iter_%d' % (SNAPSHOT_DIR, model_prefix, split_string, iter) agent.save(enc_path, dec_path) print 'Finish training'
def train(eval_type, train_env, encoder, decoder, n_iters, seed, feedback, max_episode_len, max_input_length, prefix, blind, lr, weight_decay, result_dir, snapshot_dir, plot_dir, log_every=50, val_envs=None, debug=False): ''' Train on training set, validating on both seen and unseen. ''' if debug: print("Training in debug mode") log_every = 1 if val_envs is None: val_envs = {} print('Training with %s feedback' % (feedback)) agent = Seq2SeqAgent(train_env, "", encoder, decoder, max_episode_len, blind=blind) encoder_optimizer = optim.Adam(encoder.parameters(), lr=lr, weight_decay=weight_decay) decoder_optimizer = optim.Adam(decoder.parameters(), lr=lr, weight_decay=weight_decay) data_log = defaultdict(list) start = time.time() for idx in range(0, n_iters, log_every): interval = min(log_every, n_iters - idx) iter = idx + interval data_log['iteration'].append(iter) # Train for log_every interval agent.train(encoder_optimizer, decoder_optimizer, interval, feedback=feedback) train_losses = np.array(agent.losses) assert len(train_losses) == interval train_loss_avg = np.average(train_losses) data_log['train loss'].append(train_loss_avg) loss_str = 'train loss: %.4f' % train_loss_avg # Run validation for env_name, (env, evaluator) in val_envs.items(): agent.env = env agent.results_path = '%s%s_%s_iter_%d.json' % (result_dir, prefix, env_name, iter) # Get validation loss under the same conditions as training agent.test(use_dropout=True, feedback=feedback, allow_cheat=True) val_losses = np.array(agent.losses) val_loss_avg = np.average(val_losses) data_log['%s loss' % env_name].append(val_loss_avg) # Get validation distance from goal under test evaluation conditions agent.test(use_dropout=False, feedback='argmax') agent.write_results() score_summary = evaluator.score(agent.results_path) loss_str += ', %s loss: %.4f' % (env_name, val_loss_avg) for metric, val in score_summary.items(): data_log['%s %s' % (env_name, metric)].append(val) loss_str += ', %s: %.3f' % (metric, val) agent.env = train_env print(('%s (%d %d%%) %s' % (timeSince(start, float(iter) / n_iters), iter, float(iter) / n_iters * 100, loss_str))) df = pd.DataFrame(data_log) df.set_index('iteration') df_path = '%s%s-log.csv' % (plot_dir, prefix) df.to_csv(df_path) split_string = "-".join(train_env.splits) enc_path = '%s%s_%s_enc_iter_%d' % (snapshot_dir, prefix, split_string, iter) dec_path = '%s%s_%s_dec_iter_%d' % (snapshot_dir, prefix, split_string, iter) agent.save(enc_path, dec_path) # Log data to wandb for visualization wandb.log(last_entry(data_log, eval_type), step=idx)
tok = Tokenizer(vocab=vocab, encoding_length=80) feat_dict = read_img_features(features) train_env = R2RBatch(feat_dict, batch_size=64, splits=['train'], tokenizer=tok) log_dir = "snap/speaker/state_dict/best_val_seen_bleu" val_env_names = ['val_unseen', 'val_seen'] featurized_scans = set([key.split("_")[0] for key in list(feat_dict.keys())]) val_envs = OrderedDict(((split, (R2RBatch(feat_dict, batch_size=args.batchSize, splits=[split], tokenizer=tok), Evaluation([split], featurized_scans, tok))) for split in val_env_names)) listner = Seq2SeqAgent(train_env, "", tok, 35) speaker = speaker.Speaker(train_env, listner, tok) speaker.load(log_dir) speaker.env = train_env results = {} for env_name, (env, evaluator) in val_envs.items(): print("............ Evaluating %s ............." % env_name) speaker.env = env path2inst, loss, word_accu, sent_accu = speaker.valid() r = defaultdict(dict) for path_id in path2inst.keys(): # internal_bleu = evaluator.compute_internal_bleu_score(path_id) # if internal_bleu == 1.0: # import pdb; # pdb.set_trace()
def beam_valid(train_env, tok, val_envs={}): listener = Seq2SeqAgent(train_env, "", tok, args.maxAction) speaker = Speaker(train_env, listener, tok) if args.speaker is not None: print("Load the speaker from %s." % args.speaker) speaker.load(args.speaker) print("Loaded the listener model at iter % d" % listener.load(args.load)) final_log = "" for env_name, (env, evaluator) in val_envs.items(): listener.logs = defaultdict(list) listener.env = env listener.beam_search_test(speaker) results = listener.results def cal_score(x, alpha, avg_speaker, avg_listener): speaker_score = sum(x["speaker_scores"]) * alpha if avg_speaker: speaker_score /= len(x["speaker_scores"]) # normalizer = sum(math.log(k) for k in x['listener_actions']) normalizer = 0. listener_score = (sum(x["listener_scores"]) + normalizer) * (1 - alpha) if avg_listener: listener_score /= len(x["listener_scores"]) return speaker_score + listener_score if args.param_search: # Search for the best speaker / listener ratio interval = 0.01 logs = [] for avg_speaker in [False, True]: for avg_listener in [False, True]: for alpha in np.arange(0, 1 + interval, interval): result_for_eval = [] for key in results: result_for_eval.append({ "instr_id": key, "trajectory": max(results[key]['paths'], key=lambda x: cal_score( x, alpha, avg_speaker, avg_listener)) ['trajectory'] }) score_summary, _ = evaluator.score(result_for_eval) for metric, val in score_summary.items(): if metric in ['success_rate']: print( "Avg speaker %s, Avg listener %s, For the speaker weight %0.4f, the result is %0.4f" % (avg_speaker, avg_listener, alpha, val)) logs.append( (avg_speaker, avg_listener, alpha, val)) tmp_result = "Env Name %s\n" % (env_name) + \ "Avg speaker %s, Avg listener %s, For the speaker weight %0.4f, the result is %0.4f\n" % max(logs, key=lambda x: x[3]) print(tmp_result) # print("Env Name %s" % (env_name)) # print("Avg speaker %s, Avg listener %s, For the speaker weight %0.4f, the result is %0.4f" % # max(logs, key=lambda x: x[3])) final_log += tmp_result print() else: avg_speaker = True avg_listener = True alpha = args.alpha result_for_eval = [] for key in results: result_for_eval.append({ "instr_id": key, "trajectory": [(vp, 0, 0) for vp in results[key]['dijk_path']] + \ max(results[key]['paths'], key=lambda x: cal_score(x, alpha, avg_speaker, avg_listener) )['trajectory'] }) # result_for_eval = utils.add_exploration(result_for_eval) score_summary, _ = evaluator.score(result_for_eval) if env_name != 'test': loss_str = "Env Name: %s" % env_name for metric, val in score_summary.items(): if metric in ['success_rate']: print( "Avg speaker %s, Avg listener %s, For the speaker weight %0.4f, the result is %0.4f" % (avg_speaker, avg_listener, alpha, val)) loss_str += ",%s: %0.4f " % (metric, val) print(loss_str) print() if args.submit: json.dump(result_for_eval, open( os.path.join(log_dir, "submit_%s.json" % env_name), 'w'), sort_keys=True, indent=4, separators=(',', ': ')) print(final_log)
def train(train_env, tok, n_iters, log_every=100, val_envs={}, aug_env=None): writer = SummaryWriter(logdir=log_dir) listner = Seq2SeqAgent(train_env, "", tok, args.maxAction) speaker = None if args.self_train: speaker = Speaker(train_env, listner, tok) if args.speaker is not None: if args.upload: print("Load the speaker from %s." % args.speaker) speaker.load( get_sync_dir(os.path.join(args.upload_path, args.speaker))) else: print("Load the speaker from %s." % args.speaker) speaker.load(os.path.join(args.R2R_Aux_path, args.speaker)) start_iter = 0 if args.load is not None: if args.upload: refs_paths = get_outputs_refs_paths()['experiments'][0] print(refs_paths) load_model = os.path.join(refs_paths, args.load) print(load_model) print("LOAD THE listener from %s" % load_model) start_iter = listner.load(load_model) else: print("LOAD THE listener from %s" % args.load) start_iter = listner.load( os.path.join(args.R2R_Aux_path, args.load)) start = time.time() best_val = { 'val_seen': { "accu": 0., "state": "", 'update': False }, 'val_unseen': { "accu": 0., "state": "", 'update': False } } if args.fast_train: log_every = 40 for idx in range(start_iter, start_iter + n_iters, log_every): listner.logs = defaultdict(list) interval = min(log_every, start_iter + n_iters - idx) iter = idx + interval # Train for log_every interval if aug_env is None: # The default training process listner.env = train_env listner.train(interval, feedback=feedback_method) # Train interval iters else: if args.accumulate_grad: for _ in range(interval // 2): listner.zero_grad() listner.env = train_env # Train with GT data args.ml_weight = 0.2 listner.accumulate_gradient(feedback_method) listner.env = aug_env # Train with Back Translation args.ml_weight = 0.6 # Sem-Configuration listner.accumulate_gradient(feedback_method, speaker=speaker) listner.optim_step() else: for _ in range(interval // 2): # Train with GT data listner.env = train_env args.ml_weight = 0.2 listner.train(1, feedback=feedback_method) # Train with Back Translation listner.env = aug_env args.ml_weight = 0.6 listner.train(1, feedback=feedback_method, speaker=speaker) # Log the training stats to tensorboard total = max(sum(listner.logs['total']), 1) # import pdb; pdb.set_trace() # length_rl == length_ml ? entropy length assert (max(len(listner.logs['rl_loss']), 1) == max(len(listner.logs['ml_loss']), 1)) max_rl_length = max(len(listner.logs['critic_loss']), 1) log_length = max(len(listner.logs['rl_loss']), 1) rl_loss = sum(listner.logs['rl_loss']) / log_length ml_loss = sum(listner.logs['ml_loss']) / log_length critic_loss = sum(listner.logs['critic_loss'] ) / log_length #/ length / args.batchSize spe_loss = sum(listner.logs['spe_loss']) / log_length pro_loss = sum(listner.logs['pro_loss']) / log_length mat_loss = sum(listner.logs['mat_loss']) / log_length fea_loss = sum(listner.logs['fea_loss']) / log_length ang_loss = sum(listner.logs['ang_loss']) / log_length entropy = sum( listner.logs['entropy']) / log_length #/ length / args.batchSize predict_loss = sum(listner.logs['us_loss']) / log_length writer.add_scalar("loss/rl_loss", rl_loss, idx) writer.add_scalar("loss/ml_loss", ml_loss, idx) writer.add_scalar("policy_entropy", entropy, idx) writer.add_scalar("loss/spe_loss", spe_loss, idx) writer.add_scalar("loss/pro_loss", pro_loss, idx) writer.add_scalar("loss/mat_loss", mat_loss, idx) writer.add_scalar("loss/fea_loss", fea_loss, idx) writer.add_scalar("loss/ang_loss", ang_loss, idx) writer.add_scalar("total_actions", total, idx) writer.add_scalar("max_rl_length", max_rl_length, idx) writer.add_scalar("loss/critic", critic_loss, idx) writer.add_scalar("loss/unsupervised", predict_loss, idx) print("total_actions", total) print("max_rl_length", max_rl_length) # Run validation loss_str = "" for env_name, (env, evaluator) in val_envs.items(): listner.env = env # Get validation loss under the same conditions as training iters = None if args.fast_train or env_name != 'train' else 20 # 20 * 64 = 1280 # Get validation distance from goal under test evaluation conditions listner.test(use_dropout=False, feedback='argmax', iters=iters) result = listner.get_results() score_summary, _ = evaluator.score(result) loss_str += "%s " % env_name for metric, val in score_summary.items(): if metric in ['success_rate']: loss_str += ', %s: %.4f' % (metric, val) writer.add_scalar("%s/accuracy" % env_name, val, idx) if env_name in best_val: if val > best_val[env_name]['accu']: best_val[env_name]['accu'] = val best_val[env_name]['update'] = True if metric in ['spl']: writer.add_scalar("%s/spl" % env_name, val, idx) loss_str += ', %s: %.4f' % (metric, val) loss_str += '\n' loss_str += '\n' for env_name in best_val: if best_val[env_name]['update']: best_val[env_name]['state'] = 'Iter %d \n%s' % (iter, loss_str) best_val[env_name]['update'] = False file_dir = os.path.join(output_dir, "snap", args.name, "state_dict", "best_%s" % (env_name)) listner.save(idx, file_dir) print(('%s (%d %d%%) \n%s' % (timeSince(start, float(iter) / n_iters), iter, float(iter) / n_iters * 100, loss_str))) if iter % 1000 == 0: print("BEST RESULT TILL NOW") for env_name in best_val: print(env_name, best_val[env_name]['state']) if iter % args.save_iter == 0: file_dir = os.path.join(output_dir, "snap", args.name, "state_dict", "Iter_%06d" % (iter)) listner.save(idx, file_dir)
def train(train_env, tok, n_iters, log_every=100, val_envs={}, aug_env=None): writer = SummaryWriter(logdir=log_dir) listner = Seq2SeqAgent(train_env, "", tok, args.maxAction) speaker = None if args.self_train: speaker = Speaker(train_env, listner, tok) if args.speaker is not None: print("Load the speaker from %s." % args.speaker) speaker.load(args.speaker) start_iter = 0 if args.load is not None: print("LOAD THE listener from %s" % args.load) start_iter = listner.load(os.path.join(args.load)) start = time.time() best_val = { 'val_seen': { "accu": 0., "state": "", 'update': False }, 'val_unseen': { "accu": 0., "state": "", 'update': False } } if args.fast_train: log_every = 40 for idx in range(start_iter, start_iter + n_iters, log_every): listner.logs = defaultdict(list) interval = min(log_every, n_iters - idx) iter = idx + interval # Train for log_every interval if aug_env is None: # The default training process listner.env = train_env listner.train(interval, feedback=feedback_method) # Train interval iters else: if args.accumulate_grad: for _ in range(interval // 2): listner.zero_grad() listner.env = train_env # Train with GT data args.ml_weight = 0.2 listner.accumulate_gradient(feedback_method) listner.env = aug_env # Train with Back Translation args.ml_weight = 0.6 # Sem-Configuration listner.accumulate_gradient(feedback_method, speaker=speaker) listner.optim_step() else: for _ in range(interval // 2): # Train with GT data listner.env = train_env args.ml_weight = 0.2 listner.train(1, feedback=feedback_method) # Train with Back Translation listner.env = aug_env args.ml_weight = 0.6 listner.train(1, feedback=feedback_method, speaker=speaker) # Log the training stats to tensorboard total = max(sum(listner.logs['total']), 1) length = max(len(listner.logs['critic_loss']), 1) critic_loss = sum( listner.logs['critic_loss']) / total #/ length / args.batchSize entropy = sum( listner.logs['entropy']) / total #/ length / args.batchSize predict_loss = sum(listner.logs['us_loss']) / max( len(listner.logs['us_loss']), 1) writer.add_scalar("loss/critic", critic_loss, idx) writer.add_scalar("policy_entropy", entropy, idx) writer.add_scalar("loss/unsupervised", predict_loss, idx) writer.add_scalar("total_actions", total, idx) writer.add_scalar("max_length", length, idx) print("total_actions", total) print("max_length", length) # Run validation loss_str = "" for env_name, (env, evaluator) in val_envs.items(): listner.env = env # Get validation loss under the same conditions as training iters = None if args.fast_train or env_name != 'train' else 20 # 20 * 64 = 1280 # Get validation distance from goal under test evaluation conditions listner.test(use_dropout=False, feedback='argmax', iters=iters) result = listner.get_results() score_summary, _ = evaluator.score(result) loss_str += ", %s " % env_name for metric, val in score_summary.items(): if metric in ['success_rate']: writer.add_scalar("accuracy/%s" % env_name, val, idx) if env_name in best_val: if val > best_val[env_name]['accu']: best_val[env_name]['accu'] = val best_val[env_name]['update'] = True loss_str += ', %s: %.3f' % (metric, val) for env_name in best_val: if best_val[env_name]['update']: best_val[env_name]['state'] = 'Iter %d %s' % (iter, loss_str) best_val[env_name]['update'] = False listner.save( idx, os.path.join("snap", args.name, "state_dict", "best_%s" % (env_name))) print(('%s (%d %d%%) %s' % (timeSince(start, float(iter) / n_iters), iter, float(iter) / n_iters * 100, loss_str))) if iter % 1000 == 0: print("BEST RESULT TILL NOW") for env_name in best_val: print(env_name, best_val[env_name]['state']) if iter % 50000 == 0: listner.save( idx, os.path.join("snap", args.name, "state_dict", "Iter_%06d" % (iter))) listner.save( idx, os.path.join("snap", args.name, "state_dict", "LAST_iter%d" % (idx)))
def train(train_env, tok, n_iters, log_every=100, val_envs={}): ''' Train on training set, validating on both seen and unseen. ''' writer = SummaryWriter(log_dir=log_dir) listner = Seq2SeqAgent(train_env, "", tok, args.maxAction) start_iter = 0 if args.load is not None: print("LOAD THE DICT from %s" % args.load) start_iter = listner.load(os.path.join(args.load)) start = time.time() # agent.train(encoder_optimizer, decoder_optimizer, 1000, feedback='teacher') best_val = { 'val_seen': { "accu": 0., "state": "", 'update': False }, 'val_unseen': { "accu": 0., "state": "", 'update': False } } if args.fast_train: log_every = 40 else: killer = utils.GracefulKiller() for idx in range(start_iter, start_iter + n_iters, log_every): listner.logs = defaultdict(list) interval = min(log_every, n_iters - idx) iter = idx + interval # Train for log_every interval listner.env = train_env listner.train(interval, feedback=feedback_method) # Train interval iters # listner.timer.show() # Log the tensorboard total = max(sum(listner.logs['total']), 1) length = max(len(listner.logs['critic_loss']), 1) # critic_loss = sum(listner.logs['critic_loss']) / length / args.batchSize # entropy = sum(listner.logs['entropy']) / length / args.batchSize critic_loss = sum( listner.logs['critic_loss']) / total #/ length / args.batchSize entropy = sum( listner.logs['entropy']) / total #/ length / args.batchSize predict_loss = sum(listner.logs['us_loss']) / max( len(listner.logs['us_loss']), 1) writer.add_scalar("loss/critic", critic_loss, idx) writer.add_scalar("policy_entropy", entropy, idx) writer.add_scalar("loss/unsupervised", predict_loss, idx) writer.add_scalar("total_actions", total, idx) writer.add_scalar("max_length", length, idx) loss_str = "" # Run validation for env_name, (env, evaluator) in val_envs.items(): listner.env = env # Get validation loss under the same conditions as training iters = None if args.fast_train or env_name != 'train' else 20 # 20 * 64 = 1280 # listner.test(use_dropout=True, feedback='sample', allow_cheat=True, iters=iters) # val_losses = np.array(listner.losses) # val_loss_avg = np.average(val_losses) # Get validation distance from goal under test evaluation conditions listner.test(use_dropout=False, feedback='argmax', iters=iters) result = listner.get_results() score_summary, _ = evaluator.score(result) # loss_str += ', %s loss: %.4f' % (env_name, val_loss_avg) loss_str += ", %s" % env_name for metric, val in score_summary.items(): if metric in ['success_rate']: loss_str += ', %s: %.3f' % (metric, val) writer.add_scalar("accuracy/%s" % env_name, val, idx) if env_name in best_val: if val > best_val[env_name]['accu']: best_val[env_name]['accu'] = val best_val[env_name]['update'] = True for env_name in best_val: if best_val[env_name]['update']: best_val[env_name]['state'] = 'Iter %d %s' % (iter, loss_str) best_val[env_name]['update'] = False listner.save( idx, os.path.join("snap", args.name, "state_dict", "best_%s" % (env_name))) listner.env = train_env print(('%s (%d %d%%) %s' % (timeSince(start, float(iter) / n_iters), iter, float(iter) / n_iters * 100, loss_str))) if iter % 1000 == 0: print("BEST RESULT TILL NOW") for env_name in best_val: print(env_name, best_val[env_name]['state']) if iter % 20000 == 0: import shutil listner.save( idx, os.path.join("snap", args.name, "state_dict", "Iter_%06d" % (iter))) shutil.copy( os.path.join("snap", args.name, "state_dict", "best_val_unseen"), os.path.join("snap", args.name, "state_dict", "best_val_unseen_%06d" % (iter))) if not args.fast_train: if killer.kill_now: break listner.save( idx, os.path.join("snap", args.name, "state_dict", "LAST_iter%d" % (idx)))
def train_val(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind, args): ''' Train on the training set, and validate on seen and unseen splits. ''' nav_graphs = setup(args.action_space, args.navigable_locs_path) # Create a batch training environment that will also preprocess text use_bert = (args.encoder_type in ['bert','vlbert']) # for tokenizer and dataloader if use_bert: tok = BTokenizer(MAX_INPUT_LENGTH) else: vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH) #train_env = R2RBatch(features, batch_size=batch_size, splits=['train'], tokenizer=tok, # path_type=path_type, history=history, blind=blind) feature_store = Feature(features, args.panoramic) train_env = R2RBatch(feature_store, nav_graphs, args.panoramic,args.action_space,batch_size=args.batch_size, splits=['train'], tokenizer=tok, path_type=path_type, history=history, blind=blind) # Creat validation environments #val_envs = {split: (R2RBatch(features, batch_size=batch_size, splits=[split], # tokenizer=tok, path_type=path_type, history=history, blind=blind), # Evaluation([split], path_type=path_type)) for split in ['val_seen', 'val_unseen']} val_envs = {split: (R2RBatch(feature_store,nav_graphs, args.panoramic, args.action_space,batch_size=args.batch_size, splits=[split], tokenizer=tok, path_type=path_type, history=history, blind=blind), Evaluation([split], path_type=path_type)) for split in ['val_seen','val_unseen']} # Build models and train #enc_hidden_size = hidden_size//2 if bidirectional else hidden_size if args.encoder_type == 'vlbert': if args.pretrain_model_name is not None: print("Using the pretrained lm model from %s" %(args.pretrain_model_name)) encoder = DicEncoder(FEATURE_ALL_SIZE,args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm,args.vl_layers,args.la_layers,args.bert_type) premodel = DicAddActionPreTrain.from_pretrained(args.pretrain_model_name) encoder.bert = premodel.bert encoder.drop = nn.Dropout(p=args.dropout_ratio) encoder.bert._resize_token_embeddings(len(tok)) # remember to resize tok embedding size encoder.bert.update_lang_bert, encoder.bert.config.update_lang_bert = args.transformer_update, args.transformer_update encoder.bert.update_add_layer, encoder.bert.config.update_add_layer = args.update_add_layer, args.update_add_layer encoder = encoder.cuda() else: encoder = DicEncoder(FEATURE_ALL_SIZE,args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm,args.vl_layers,args.la_layers,args.bert_type).cuda() encoder.bert._resize_token_embeddings(len(tok)) # remember to resize tok embedding size elif args.encoder_type == 'bert': if args.pretrain_model_name is not None: print("Using the pretrained lm model from %s" %(args.pretrain_model_name)) encoder = BertEncoder(args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm, args.bert_type) premodel = BertForMaskedLM.from_pretrained(args.pretrain_model_name) encoder.bert = premodel.bert encoder.drop = nn.Dropout(p=args.dropout_ratio) encoder.bert._resize_token_embeddings(len(tok)) # remember to resize tok embedding size #encoder.bert.update_lang_bert, encoder.bert.config.update_lang_bert = args.transformer_update, args.transformer_update #encoder.bert.update_add_layer, encoder.bert.config.update_add_layer = args.update_add_layer, args.update_add_layer encoder = encoder.cuda() else: encoder = BertEncoder(args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm, args.bert_type).cuda() encoder.bert._resize_token_embeddings(len(tok)) else: enc_hidden_size = hidden_size//2 if bidirectional else hidden_size encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda() #decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), # action_embedding_size, args.hidden_size, args.dropout_ratio).cuda() ctx_hidden_size = args.enc_hidden_size * (2 if args.bidirectional else 1) if use_bert and not args.top_lstm: ctx_hidden_size = 768 decoder = R2RAttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), action_embedding_size, ctx_hidden_size, args.hidden_size, args.dropout_ratio,FEATURE_SIZE, args.panoramic,args.action_space,args.dec_h_type).cuda() decoder = R2RAttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), action_embedding_size, ctx_hidden_size, args.hidden_size, args.dropout_ratio,FEATURE_SIZE, args.panoramic,args.action_space,args.dec_h_type).cuda() train(train_env, encoder, decoder, n_iters, path_type, history, feedback_method, max_episode_len, MAX_INPUT_LENGTH, model_prefix, val_envs=val_envs, args=args)
def train(train_env, encoder, decoder, n_iters, path_type, history, feedback_method, max_episode_len, MAX_INPUT_LENGTH, model_prefix, log_every=100, val_envs=None, args=None): ''' Train on training set, validating on both seen and unseen. ''' if val_envs is None: val_envs = {} if agent_type == 'seq2seq': agent = Seq2SeqAgent(train_env, "", encoder, decoder, max_episode_len, path_type=args.path_type,args=args) else: sys.exit("Unrecognized agent_type '%s'" % agent_type) print('Training a %s agent with %s feedback' % (agent_type, feedback_method)) if args.optm == 'Adam': optim_func = optim.Adam elif args.optm == 'Adamax': optim_func = optim.Adamax encoder_optimizer = optim_func(encoder.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) decoder_optimizer = optim_func(decoder.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) data_log = defaultdict(list) start = time.time() best_model = { 'iter': -1, 'encoder': copy.deepcopy(agent.encoder.state_dict()), 'decoder': copy.deepcopy(agent.decoder.state_dict()), } best_dr_model = { 'iter': -1, 'encoder': copy.deepcopy(agent.encoder.state_dict()), 'decoder': copy.deepcopy(agent.decoder.state_dict()), } best_dr = 0 best_spl = 0 best_iter = 0 best_dr_iter = 0 best_sr = 0 myidx = 0 split_string = "-".join(train_env.splits) for idx in range(0, n_iters, log_every): interval = min(log_every,n_iters-idx) iter = idx + interval data_log['iteration'].append(iter) myidx += interval print("PROGRESS: {}%".format(round((myidx) * 100 / n_iters, 4))) # Train for log_every interval agent.train(encoder_optimizer, decoder_optimizer, interval, feedback=feedback_method) train_losses = np.array(agent.losses) assert len(train_losses) == interval train_loss_avg = np.average(train_losses) data_log['train loss'].append(train_loss_avg) loss_str = 'train loss: %.4f' % train_loss_avg # Run validation for env_name, (env, evaluator) in val_envs.items(): agent.env = env agent.results_path = '%s%s_%s_iter_%d.json' % (RESULT_DIR, model_prefix, env_name, iter) # Get validation loss under the same conditions as training agent.test(use_dropout=True, feedback=feedback_method, allow_cheat=True) val_losses = np.array(agent.losses) val_loss_avg = np.average(val_losses) data_log['%s loss' % env_name].append(val_loss_avg) # Get validation distance from goal under test evaluation conditions agent.test(use_dropout=False, feedback='argmax') agent.write_results() score_summary, _ = evaluator.score(agent.results_path) loss_str += ', %s loss: %.4f' % (env_name, val_loss_avg) for metric, val in score_summary.items(): data_log['%s %s' % (env_name, metric)].append(val) if metric in ['success_rate', 'oracle success_rate', 'oracle path_success_rate', 'dist_to_end_reduction','sc_dr']: loss_str += ', %s: %.3f' % (metric, val) eval_spl = current_best(data_log, -1, 'spl_unseen') eval_dr = current_best(data_log, -1, 'dr_unseen') eval_sr = current_best(data_log, -1, 'sr_unseen') if eval_sr > best_sr: best_sr = eval_sr best_iter = iter best_model['iter'] = iter best_model['encoder'] = copy.deepcopy(agent.encoder.state_dict()) best_model['decoder'] = copy.deepcopy(agent.decoder.state_dict()) save_best_model(best_model, SNAPSHOT_DIR,model_prefix, split_string, -1) if eval_spl>best_spl: best_spl=eval_spl loss_str+=' bestSPL' if eval_dr > best_dr: best_dr = eval_dr loss_str+=' bestDR' best_dr_iter = iter best_dr_model['iter'] = iter best_dr_model['encoder'] = copy.deepcopy(agent.encoder.state_dict()) best_dr_model['decoder'] = copy.deepcopy(agent.decoder.state_dict()) save_best_model(best_dr_model, SNAPSHOT_DIR,model_prefix, split_string+"bestdr", -1) agent.env = train_env print('%s (%d %d%%) %s' % (timeSince(start, float(iter)/n_iters), iter, float(iter)/n_iters*100, loss_str)) print("EVALERR: {}%".format(best_dr)) df = pd.DataFrame(data_log) df.set_index('iteration') df_path = '%s%s-log.csv' % (PLOT_DIR, model_prefix) write_num = 0 while (write_num < 10): try: df.to_csv(df_path) break except: write_num += 1 #split_string = "-".join(train_env.splits) #enc_path = '%s%s_%s_enc_iter_%d' % (SNAPSHOT_DIR, model_prefix, split_string, iter) #dec_path = '%s%s_%s_dec_iter_%d' % (SNAPSHOT_DIR, model_prefix, split_string, iter) #agent.save(enc_path, dec_path) split_string = "-".join(train_env.splits) save_best_model(best_model, SNAPSHOT_DIR,model_prefix, split_string, best_iter) save_best_model(best_dr_model, SNAPSHOT_DIR,model_prefix, split_string+"bestdr", best_dr_iter)
def btest_submission(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind,args): ''' Train on combined training and validation sets, and generate test submission. ''' nav_graphs = setup(args.action_space, args.navigable_locs_path) # Create a batch training environment that will also preprocess text use_bert = (args.encoder_type in ['bert','vlbert']) # for tokenizer and dataloader if use_bert: tok = BTokenizer(MAX_INPUT_LENGTH) else: vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH) feature_store = Feature(features, args.panoramic) if args.encoder_type == 'vlbert': if args.pretrain_model_name is not None: print("Using the pretrained lm model from %s" %(args.pretrain_model_name)) encoder = DicEncoder(FEATURE_ALL_SIZE,args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm,args.vl_layers,args.la_layers,args.bert_type) premodel = DicAddActionPreTrain.from_pretrained(args.pretrain_model_name) encoder.bert = premodel.bert encoder.drop = nn.Dropout(p=args.dropout_ratio) encoder.bert._resize_token_embeddings(len(tok)) # remember to resize tok embedding size encoder.bert.update_lang_bert, encoder.bert.config.update_lang_bert = args.transformer_update, args.transformer_update encoder.bert.update_add_layer, encoder.bert.config.update_add_layer = args.update_add_layer, args.update_add_layer encoder = encoder.cuda() else: encoder = DicEncoder(FEATURE_ALL_SIZE,args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm,args.vl_layers,args.la_layers,args.bert_type).cuda() encoder.bert._resize_token_embeddings(len(tok)) # remember to resize tok embedding size elif args.encoder_type == 'bert': if args.pretrain_model_name is not None: print("Using the pretrained lm model from %s" %(args.pretrain_model_name)) encoder = BertEncoder(args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm, args.bert_type) premodel = BertForMaskedLM.from_pretrained(args.pretrain_model_name) encoder.bert = premodel.bert encoder.drop = nn.Dropout(p=args.dropout_ratio) encoder.bert._resize_token_embeddings(len(tok)) # remember to resize tok embedding size #encoder.bert.update_lang_bert, encoder.bert.config.update_lang_bert = args.transformer_update, args.transformer_update #encoder.bert.update_add_layer, encoder.bert.config.update_add_layer = args.update_add_layer, args.update_add_layer encoder = encoder.cuda() else: encoder = BertEncoder(args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm, args.bert_type).cuda() encoder.bert._resize_token_embeddings(len(tok)) ctx_hidden_size = args.enc_hidden_size * (2 if args.bidirectional else 1) decoder = R2RAttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(), action_embedding_size, ctx_hidden_size, args.hidden_size, args.dropout_ratio,FEATURE_SIZE, args.panoramic,args.action_space,args.dec_h_type).cuda() if args.encoder_path != "": encoder.load_state_dict(torch.load(args.encoder_path)) decoder.load_state_dict(torch.load(args.decoder_path)) encoder.eval() decoder.eval() # Generate test submission test_env = R2RBatch(feature_store, nav_graphs, args.panoramic, args.action_space,batch_size=args.batch_size, splits=['test'], tokenizer=tok, path_type=path_type, history=history, blind=blind) agent = Seq2SeqAgent(test_env, "", encoder, decoder, max_episode_len, path_type=args.path_type,args=args) agent.results_path = '%s%s_%s.json' % (RESULT_DIR, "Submit", 'test') agent.test(use_dropout=False, feedback='argmax') agent.write_results()
def create_augment_data(): setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) # Load features feat_dict = read_img_features(features) candidate_dict = utils.read_candidates(CANDIDATE_FEATURES) # The datasets to be augmented print("Start to augment the data") aug_envs = [] # aug_envs.append( # R2RBatch( # feat_dict, candidate_dict, batch_size=args.batchSize, splits=['train'], tokenizer=tok # ) # ) # aug_envs.append( # SemiBatch(False, 'tasks/R2R/data/all_paths_46_removetrain.json', # feat_dict, candidate_dict, batch_size=args.batchSize, splits=['train', 'val_seen'], tokenizer=tok) # ) aug_envs.append( SemiBatch(False, 'tasks/R2R/data/all_paths_46_removevalunseen.json', "unseen", feat_dict, candidate_dict, batch_size=args.batchSize, splits=['val_unseen'], tokenizer=tok)) aug_envs.append( SemiBatch(False, 'tasks/R2R/data/all_paths_46_removetest.json', "test", feat_dict, candidate_dict, batch_size=args.batchSize, splits=['test'], tokenizer=tok)) # aug_envs.append( # R2RBatch( # feat_dict, candidate_dict, batch_size=args.batchSize, splits=['val_seen'], tokenizer=tok # ) # ) # aug_envs.append( # R2RBatch( # feat_dict, candidate_dict, batch_size=args.batchSize, splits=['val_unseen'], tokenizer=tok # ) # ) for snapshot in os.listdir(os.path.join(log_dir, 'state_dict')): # if snapshot != "best_val_unseen_bleu": # Select a particular snapshot to process. (O/w, it will make for every snapshot) if snapshot != "best_val_unseen_bleu": continue # Create Speaker listner = Seq2SeqAgent(aug_envs[0], "", tok, args.maxAction) speaker = Speaker(aug_envs[0], listner, tok) # Load Weight load_iter = speaker.load(os.path.join(log_dir, 'state_dict', snapshot)) print("Load from iter %d" % (load_iter)) # Augment the env from aug_envs for aug_env in aug_envs: speaker.env = aug_env # Create the aug data import tqdm path2inst = speaker.get_insts(beam=args.beam, wrapper=tqdm.tqdm) data = [] for datum in aug_env.fake_data: datum = datum.copy() path_id = datum['path_id'] if path_id in path2inst: datum['instructions'] = [ tok.decode_sentence(path2inst[path_id]) ] datum.pop('instr_encoding') # Remove Redundant keys datum.pop('instr_id') data.append(datum) print("Totally, %d data has been generated for snapshot %s." % (len(data), snapshot)) print("Average Length %0.4f" % utils.average_length(path2inst)) print(datum) # Print a Sample # Save the data import json os.makedirs(os.path.join(log_dir, 'aug_data'), exist_ok=True) beam_tag = "_beam" if args.beam else "" json.dump(data, open( os.path.join( log_dir, 'aug_data', '%s_%s%s.json' % (snapshot, aug_env.name, beam_tag)), 'w'), sort_keys=True, indent=4, separators=(',', ': '))
def train_arbiter(arbiter_env, tok, n_iters, log_every=500, val_envs={}): writer = SummaryWriter(log_dir=log_dir) listner = Seq2SeqAgent(arbiter_env, "", tok, args.maxAction) arbiter = Arbiter(arbiter_env, listner, tok) best_f1 = 0. best_accu = 0. for idx in range(0, n_iters, log_every): interval = min(log_every, n_iters - idx) # Train for log_every interval arbiter.env = arbiter_env arbiter.train(interval) # Train interval iters print() print("Iter: %d" % idx) # Evaluation for env_name, env in val_envs.items(): print("............ Evaluating %s ............." % env_name) arbiter.env = env if env_name == 'train' or env_name == 'val_unseen': path2prob = arbiter.valid(total=500) else: # val_seen need accurate accuracy to evaluate the model performance (for early stopping) path2prob = arbiter.valid() print("len path2prob", len(path2prob)) path2answer = env.get_answer() print("len path2ans", len(path2answer)) false_probs = list([ path2prob[path] for path in path2prob if not path2answer[path] ]) true_positive = len([ 1 for path in path2prob if (path2prob[path] >= 0.5 and path2answer[path]) ]) false_positive = len([ 1 for path in path2prob if (path2prob[path] < 0.5 and path2answer[path]) ]) false_negative = len([ 1 for path in path2prob if (path2prob[path] >= 0.5 and not path2answer[path]) ]) true_negative = len([ 1 for path in path2prob if (path2prob[path] < 0.5 and not path2answer[path]) ]) true_accu = true_positive / (true_positive + false_positive) true_recall = true_positive / max( (true_positive + false_negative), 1) true_f1 = 2 * (true_accu * true_recall) / max( (true_accu + true_recall), 1) false_accu = true_negative / (true_negative + false_negative) print( "tp %d, fp %d, fn %d, tn %d" % (true_positive, false_positive, false_negative, true_negative)) print("All negative", true_negative + false_negative) print("All positive", true_positive + false_positive) writer.add_scalar("true_accu", true_accu, idx) writer.add_scalar("true_recall", true_recall, idx) writer.add_scalar("true_f1", true_f1, idx) writer.add_scalar("false_accu", false_accu, idx) if env_name == 'val_seen': if true_f1 > best_f1: best_f1 = true_f1 print('Save the model with %s f1 score %0.4f' % (env_name, best_f1)) arbiter.save( idx, os.path.join(log_dir, 'state_dict', 'best_%s_f1' % env_name)) if true_accu > best_accu: best_accu = true_accu print("Save the model with %s true accu %0.4f" % (env_name, best_accu)) arbiter.save( idx, os.path.join(log_dir, 'state_dict', 'best_%s_accu' % env_name)) print("True Accu %0.4f, False Accu %0.4f" % (true_accu, false_accu)) print("Avg False probs %0.4f" % (sum(false_probs) / len(false_probs))) sys.stdout.flush()
def train(train_env, tok, n_iters, log_every=2000, val_envs={}, aug_env=None): writer = SummaryWriter(log_dir=log_dir) listner = Seq2SeqAgent(train_env, "", tok, args.maxAction) record_file = open('./logs/' + args.name + '.txt', 'a') record_file.write(str(args) + '\n\n') record_file.close() start_iter = 0 if args.load is not None: if args.aug is None: start_iter = listner.load(os.path.join(args.load)) print("\nLOAD the model from {}, iteration ".format( args.load, start_iter)) else: load_iter = listner.load(os.path.join(args.load)) print("\nLOAD the model from {}, iteration ".format( args.load, load_iter)) start = time.time() print('\nListener training starts, start iteration: %s' % str(start_iter)) best_val = { 'val_unseen': { "spl": 0., "sr": 0., "state": "", 'update': False } } for idx in range(start_iter, start_iter + n_iters, log_every): listner.logs = defaultdict(list) interval = min(log_every, n_iters - idx) iter = idx + interval # Train for log_every interval if aug_env is None: listner.env = train_env listner.train(interval, feedback=feedback_method) # Train interval iters else: jdx_length = len(range(interval // 2)) for jdx in range(interval // 2): # Train with GT data listner.env = train_env args.ml_weight = 0.2 listner.train(1, feedback=feedback_method) # Train with Augmented data listner.env = aug_env args.ml_weight = 0.2 listner.train(1, feedback=feedback_method) print_progress(jdx, jdx_length, prefix='Progress:', suffix='Complete', bar_length=50) # Log the training stats to tensorboard total = max(sum(listner.logs['total']), 1) length = max(len(listner.logs['critic_loss']), 1) critic_loss = sum(listner.logs['critic_loss']) / total RL_loss = sum(listner.logs['RL_loss']) / max( len(listner.logs['RL_loss']), 1) IL_loss = sum(listner.logs['IL_loss']) / max( len(listner.logs['IL_loss']), 1) entropy = sum(listner.logs['entropy']) / total writer.add_scalar("loss/critic", critic_loss, idx) writer.add_scalar("policy_entropy", entropy, idx) writer.add_scalar("loss/RL_loss", RL_loss, idx) writer.add_scalar("loss/IL_loss", IL_loss, idx) writer.add_scalar("total_actions", total, idx) writer.add_scalar("max_length", length, idx) # print("total_actions", total, ", max_length", length) # Run validation loss_str = "iter {}".format(iter) for env_name, (env, evaluator) in val_envs.items(): listner.env = env # Get validation distance from goal under test evaluation conditions listner.test(use_dropout=False, feedback='argmax', iters=None) result = listner.get_results() score_summary, _ = evaluator.score(result) loss_str += ", %s " % env_name for metric, val in score_summary.items(): if metric in ['spl']: writer.add_scalar("spl/%s" % env_name, val, idx) if env_name in best_val: if val > best_val[env_name]['spl']: best_val[env_name]['spl'] = val best_val[env_name]['update'] = True elif (val == best_val[env_name]['spl']) and ( score_summary['success_rate'] > best_val[env_name]['sr']): best_val[env_name]['spl'] = val best_val[env_name]['update'] = True loss_str += ', %s: %.4f' % (metric, val) record_file = open('./logs/' + args.name + '.txt', 'a') record_file.write(loss_str + '\n') record_file.close() for env_name in best_val: if best_val[env_name]['update']: best_val[env_name]['state'] = 'Iter %d %s' % (iter, loss_str) best_val[env_name]['update'] = False listner.save( idx, os.path.join("snap", args.name, "state_dict", "best_%s" % (env_name))) else: listner.save( idx, os.path.join("snap", args.name, "state_dict", "latest_dict")) print(('%s (%d %d%%) %s' % (timeSince(start, float(iter) / n_iters), iter, float(iter) / n_iters * 100, loss_str))) if iter % 1000 == 0: print("BEST RESULT TILL NOW") for env_name in best_val: print(env_name, best_val[env_name]['state']) record_file = open('./logs/' + args.name + '.txt', 'a') record_file.write('BEST RESULT TILL NOW: ' + env_name + ' | ' + best_val[env_name]['state'] + '\n') record_file.close() listner.save( idx, os.path.join("snap", args.name, "state_dict", "LAST_iter%d" % (idx)))
def meta_filter(): """ Train the listener with the augmented data """ setup() # Create a batch training environment that will also preprocess text vocab = read_vocab(TRAIN_VOCAB) tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput) if args.fast_train: feat_dict = read_img_features(features_fast) else: feat_dict = read_img_features(features) candidate_dict = utils.read_candidates(CANDIDATE_FEATURES) featurized_scans = set( [key.split("_")[0] for key in list(feat_dict.keys())]) # Load the augmentation data if args.aug is None: # If aug is specified, load the "aug" speaker_snap_name = "adam_drop6_correctsave" print("Loading from %s" % speaker_snap_name) aug_path = "snap/speaker/long/%s/aug_data/best_val_unseen_loss.json" % speaker_snap_name else: # Load the path from args aug_path = args.aug # Create the training environment aug_env = R2RBatch(feat_dict, candidate_dict, batch_size=args.batchSize, splits=[aug_path], tokenizer=tok) train_env = R2RBatch(feat_dict, candidate_dict, batch_size=args.batchSize, splits=['train@3333'], tokenizer=tok) print("The augmented data_size is : %d" % train_env.size()) stats = train_env.get_statistics() print("The average instruction length of the dataset is %0.4f." % (stats['length'])) print("The average action length of the dataset is %0.4f." % (stats['path'])) # Setup the validation data val_envs = { split: (R2RBatch(feat_dict, candidate_dict, batch_size=args.batchSize, splits=[split], tokenizer=tok), Evaluation([split], featurized_scans, tok)) for split in ['train', 'val_seen', 'val_unseen@133'] } val_env, val_eval = val_envs['val_unseen@133'] listner = Seq2SeqAgent(train_env, "", tok, args.maxAction) def filter_result(): listner.env = val_env val_env.reset_epoch() listner.test(use_dropout=False, feedback='argmax') result = listner.get_results() score_summary, _ = val_eval.score(result) for metric, val in score_summary.items(): if metric in ['success_rate']: return val listner.load(args.load) base_accu = (filter_result()) print("BASE ACCU %0.4f" % base_accu) success = 0 for data_id, datum in enumerate(aug_env.data): # Reload the param of the listener listner.load(args.load) train_env.reset_epoch(shuffle=True) listner.env = train_env # Train for the datum # iters = train_env.size() // train_env.batch_size iters = 10 for i in range(iters): listner.env = train_env # train_env.reset(batch=([datum] * (train_env.batch_size // 2)), inject=True) train_env.reset(batch=[datum] * train_env.batch_size, inject=True) # train_env.reset() # train_env.reset() listner.train(1, feedback='sample', reset=False) # print("Iter %d, result %0.4f" % (i, filter_result())) now_accu = filter_result() if now_accu > base_accu: success += 1 # print("RESULT %0.4f" % filter_result()) print('Accu now %0.4f, success / total: %d / %d = %0.4f' % (now_accu, success, data_id + 1, success / (data_id + 1)))