Ejemplo n.º 1
0
def test_submission():
    ''' Train on combined training and validation sets, and generate test submission. '''

    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAINVAL_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH)
    train_env = R2RBatch(features,
                         batch_size=batch_size,
                         splits=['train', 'val_seen', 'val_unseen'],
                         tokenizer=tok)

    # Build models and train
    enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size
    encoder = EncoderLSTM(len(vocab),
                          word_embedding_size,
                          enc_hidden_size,
                          padding_idx,
                          dropout_ratio,
                          bidirectional=bidirectional).cuda()
    decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(),
                              Seq2SeqAgent.n_outputs(), action_embedding_size,
                              hidden_size, dropout_ratio).cuda()
    train(train_env, encoder, decoder, n_iters)

    # Generate test submission
    test_env = R2RBatch(features,
                        batch_size=batch_size,
                        splits=['test'],
                        tokenizer=tok)
    agent = Seq2SeqAgent(test_env, "", encoder, decoder, max_episode_len)
    agent.results_path = '%s%s_%s_iter_%d.json' % (RESULT_DIR, model_prefix,
                                                   'test', 20000)
    agent.test(use_dropout=False, feedback='argmax')
    agent.write_results()
Ejemplo n.º 2
0
def train_val():
    ''' Train on the training set, and validate on seen and unseen splits. '''

    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH)
    train_env = R2RBatch(features,
                         batch_size=batch_size,
                         splits=['train'],
                         tokenizer=tok)

    # Creat validation environments
    val_envs = {
        split: (R2RBatch(features,
                         batch_size=batch_size,
                         splits=[split],
                         tokenizer=tok), Evaluation([split]))
        for split in ['val_seen', 'val_unseen']
    }

    # Build models and train
    enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size
    encoder = EncoderLSTM(len(vocab),
                          word_embedding_size,
                          enc_hidden_size,
                          padding_idx,
                          dropout_ratio,
                          bidirectional=bidirectional).cuda()
    decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(),
                              Seq2SeqAgent.n_outputs(), action_embedding_size,
                              hidden_size, dropout_ratio).cuda()
    train(train_env, encoder, decoder, n_iters, val_envs=val_envs)
Ejemplo n.º 3
0
def train_all(eval_type, seed, max_episode_len, max_input_length, feedback,
              n_iters, prefix, blind, debug, train_vocab, trainval_vocab,
              batch_size, action_embedding_size, target_embedding_size,
              bidirectional, dropout_ratio, weight_decay, feature_size,
              hidden_size, word_embedding_size, lr, result_dir, snapshot_dir,
              plot_dir, train_splits, test_splits):
    ''' Train on the training set, and validate on the test split. '''

    setup(seed, train_vocab, trainval_vocab)
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(train_vocab if eval_type == 'val' else trainval_vocab)
    tok = Tokenizer(vocab=vocab, encoding_length=max_input_length)
    train_env = R2RBatch(batch_size=batch_size,
                         splits=train_splits,
                         tokenizer=tok,
                         seed=seed,
                         blind=blind)

    # Creat validation environments
    val_envs = {
        split: (R2RBatch(batch_size=batch_size,
                         splits=[split],
                         tokenizer=tok,
                         seed=seed,
                         blind=blind), Evaluation([split], seed=seed))
        for split in test_splits
    }

    # Build models and train
    enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size
    encoder = EncoderLSTM(len(vocab),
                          word_embedding_size,
                          enc_hidden_size,
                          padding_idx,
                          dropout_ratio,
                          bidirectional=bidirectional).cuda()
    decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(),
                              Seq2SeqAgent.n_outputs(), action_embedding_size,
                              hidden_size, dropout_ratio, feature_size).cuda()

    train(eval_type,
          train_env,
          encoder,
          decoder,
          n_iters,
          seed,
          feedback,
          max_episode_len,
          max_input_length,
          prefix,
          blind,
          lr,
          weight_decay,
          result_dir,
          snapshot_dir,
          plot_dir,
          val_envs=val_envs,
          debug=debug)
Ejemplo n.º 4
0
def train_test(path_type, max_episode_len, history, MAX_INPUT_LENGTH,
               feedback_method, n_iters, model_prefix, blind):
    ''' Train on the training set, and validate on the test split. '''

    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAINVAL_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH)
    train_env = R2RBatch(features,
                         batch_size=batch_size,
                         splits=['train', 'val_seen', 'val_unseen'],
                         tokenizer=tok,
                         path_type=path_type,
                         history=history,
                         blind=blind)

    # Creat validation environments
    val_envs = {
        split: (R2RBatch(features,
                         batch_size=batch_size,
                         splits=[split],
                         tokenizer=tok,
                         path_type=path_type,
                         history=history,
                         blind=blind), Evaluation([split],
                                                  path_type=path_type))
        for split in ['test']
    }

    # Build models and train
    enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size
    encoder = EncoderLSTM(len(vocab),
                          word_embedding_size,
                          enc_hidden_size,
                          padding_idx,
                          dropout_ratio,
                          bidirectional=bidirectional).cuda()
    decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(),
                              Seq2SeqAgent.n_outputs(), action_embedding_size,
                              hidden_size, dropout_ratio).cuda()
    train(train_env,
          encoder,
          decoder,
          n_iters,
          path_type,
          history,
          feedback_method,
          max_episode_len,
          MAX_INPUT_LENGTH,
          model_prefix,
          val_envs=val_envs)
Ejemplo n.º 5
0
def train_val(eval_type, seed, max_episode_len, history, max_input_length,
              feedback_method, n_iters, model_prefix, blind, debug):
    ''' Train on the training set, and validate on seen and unseen splits. '''

    setup(seed)
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=max_input_length)
    train_env = R2RBatch(batch_size=batch_size,
                         splits=['train'],
                         tokenizer=tok,
                         seed=seed,
                         history=history,
                         blind=blind)

    # Creat validation environments
    val_envs = {
        split: (R2RBatch(batch_size=batch_size,
                         splits=[split],
                         tokenizer=tok,
                         seed=seed,
                         history=history,
                         blind=blind), Evaluation([split], seed=seed))
        for split in ['val_seen']
    }

    # Build models and train
    enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size
    encoder = EncoderLSTM(len(vocab),
                          word_embedding_size,
                          enc_hidden_size,
                          padding_idx,
                          dropout_ratio,
                          bidirectional=bidirectional).cuda()
    decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(),
                              Seq2SeqAgent.n_outputs(), action_embedding_size,
                              hidden_size, dropout_ratio, feature_size).cuda()
    train(eval_type,
          train_env,
          encoder,
          decoder,
          n_iters,
          seed,
          history,
          feedback_method,
          max_episode_len,
          max_input_length,
          model_prefix,
          val_envs=val_envs,
          debug=debug)
Ejemplo n.º 6
0
def valid_speaker(tok, val_envs):
    import tqdm
    listner = Seq2SeqAgent(None, "", tok, args.maxAction)
    speaker = Speaker(None, listner, tok)
    speaker.load(os.path.join(log_dir, 'state_dict', 'best_val_seen_bleu'))
    # speaker.load(os.path.join(log_dir, 'state_dict', 'best_val_unseen_loss'))

    for args.beam in [False, True]:
        print("Using Beam Search %s" % args.beam)
        for env_name, (env, evaluator) in val_envs.items():
            if env_name == 'train':
                continue
            print("............ Evaluating %s ............." % env_name)
            speaker.env = env
            path2inst, loss, word_accu, sent_accu = speaker.valid(
                beam=args.beam, wrapper=tqdm.tqdm)
            path_id = next(iter(path2inst.keys()))
            print("Inference: ", tok.decode_sentence(path2inst[path_id]))
            print("GT: ", evaluator.gt[path_id]['instructions'])
            bleu_score, precisions, _ = evaluator.bleu_score(path2inst)
            print(
                "Bleu, Loss, Word_Accu, Sent_Accu for %s is: %0.4f, %0.4f, %0.4f, %0.4f"
                % (env_name, bleu_score, loss, word_accu, sent_accu))
            print(
                "Bleu 1: %0.4f Bleu 2: %0.4f, Bleu 3 :%0.4f,  Bleu 4: %0.4f" %
                tuple(precisions))
            print("Average Length %0.4f" % utils.average_length(path2inst))
Ejemplo n.º 7
0
def valid(train_env, tok, n_iters, log_every=100, val_envs={}):
    ''' Train on training set, validating on both seen and unseen. '''
    agent = Seq2SeqAgent(train_env, "", tok, args.maxAction)

    print("Loaded the listener model at iter % d" % agent.load(args.load))

    for env_name, (env, evaluator) in val_envs.items():
        agent.logs = defaultdict(list)
        agent.env = env
        # Get validation loss under the same conditions as training
        # iters = None if args.fast_train or env_name != 'train' else 20     # 20 * 64 = 1280
        iters = None
        # agent.test(use_dropout=True, feedback=feedback_method, allow_cheat=True, iters=iters)
        # val_losses = np.array(agent.losses)
        # val_loss_avg = np.average(val_losses)
        # loss_str += ', %s loss: %.4f' % (env_name), val_loss_avg)
        # Get validation distance from goal under test evaluation conditions
        # agent.logs['circle'] = 0
        agent.test(use_dropout=False, feedback='argmax', iters=iters)
        # print("In env %s, the circle cases are %d(%0.4f)" % (env_name, agent.logs['circle'],
        #                                                      agent.logs['circle']*1./env.size()))
        result = agent.get_results()
        score_summary, _ = evaluator.score(result)
        loss_str = "%s: " % env_name
        for metric, val in score_summary.items():
            loss_str += ', %s: %.3f' % (metric, val)
        print(loss_str)
Ejemplo n.º 8
0
def valid(train_env, tok, val_envs={}):
    agent = Seq2SeqAgent(train_env, "", tok, args.maxAction)

    print("Loaded the listener model at iter %d from %s" %
          (agent.load(args.load), args.load))

    for env_name, (env, evaluator) in val_envs.items():
        agent.logs = defaultdict(list)
        agent.env = env

        iters = None
        agent.test(use_dropout=False, feedback='argmax', iters=iters)
        result = agent.get_results()

        if env_name != '':
            score_summary, _ = evaluator.score(result)
            loss_str = "Env name: %s" % env_name
            for metric, val in score_summary.items():
                loss_str += ', %s: %.4f' % (metric, val)
            print(loss_str)

        if args.submit:
            json.dump(result,
                      open(os.path.join(log_dir, "submit_%s.json" % env_name),
                           'w'),
                      sort_keys=True,
                      indent=4,
                      separators=(',', ': '))
Ejemplo n.º 9
0
def train_val():
    ''' Train on the training set, and validate on seen and unseen splits. '''
  
    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH)
    train_env = R2RBatch(features, batch_size=batch_size, splits=['train'], tokenizer=tok)

    # Creat validation environments
    val_envs = {split: (R2RBatch(features, batch_size=batch_size, splits=[split], 
                tokenizer=tok), Evaluation([split])) for split in ['val_seen', 'val_unseen']}

    # Build models and train
    enc_hidden_size = hidden_size//2 if bidirectional else hidden_size
    encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, 
                  dropout_ratio, bidirectional=bidirectional).cuda()
    decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(),
                  action_embedding_size, hidden_size, dropout_ratio).cuda()
    train(train_env, encoder, decoder, n_iters, val_envs=val_envs)
Ejemplo n.º 10
0
def test():
    setup()

    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)
    listner = Seq2SeqAgent(None, "", tok, args.maxAction)

    start_iter = 0
    if args.load is not None:
        print("LOAD THE DICT from %s" % args.load)
        start_iter = listner.load(os.path.join(args.load))
Ejemplo n.º 11
0
def train_speaker(train_env, tok, n_iters, log_every=500, val_envs={}):
    writer = SummaryWriter(logdir=log_dir)
    listner = Seq2SeqAgent(train_env, "", tok, args.maxAction)
    speaker = Speaker(train_env, listner, tok)

    if args.fast_train:
        log_every = 40

    best_bleu = defaultdict(lambda: 0)
    best_loss = defaultdict(lambda: 1232)
    for idx in range(0, n_iters, log_every):
        interval = min(log_every, n_iters - idx)

        # Train for log_every interval
        speaker.env = train_env
        speaker.train(interval)   # Train interval iters

        print()
        print("Iter: %d" % idx)

        # Evaluation
        for env_name, (env, evaluator) in val_envs.items():
            if 'train' in env_name: # Ignore the large training set for the efficiency
                continue

            print("............ Evaluating %s ............." % env_name)
            speaker.env = env
            path2inst, loss, word_accu, sent_accu = speaker.valid()
            path_id = next(iter(path2inst.keys()))
            print("Inference: ", tok.decode_sentence(path2inst[path_id]))
            print("GT: ", evaluator.gt[str(path_id)]['instructions'])
            bleu_score, precisions = evaluator.bleu_score(path2inst)

            # Tensorboard log
            writer.add_scalar("bleu/%s" % (env_name), bleu_score, idx)
            writer.add_scalar("loss/%s" % (env_name), loss, idx)
            writer.add_scalar("word_accu/%s" % (env_name), word_accu, idx)
            writer.add_scalar("sent_accu/%s" % (env_name), sent_accu, idx)
            writer.add_scalar("bleu4/%s" % (env_name), precisions[3], idx)

            # Save the model according to the bleu score
            if bleu_score > best_bleu[env_name]:
                best_bleu[env_name] = bleu_score
                print('Save the model with %s BEST env bleu %0.4f' % (env_name, bleu_score))
                speaker.save(idx, os.path.join(log_dir, 'state_dict', 'best_%s_bleu' % env_name))

            if loss < best_loss[env_name]:
                best_loss[env_name] = loss
                print('Save the model with %s BEST env loss %0.4f' % (env_name, loss))
                speaker.save(idx, os.path.join(log_dir, 'state_dict', 'best_%s_loss' % env_name))

            # Screen print out
            print("Bleu 1: %0.4f Bleu 2: %0.4f, Bleu 3 :%0.4f,  Bleu 4: %0.4f" % tuple(precisions))
Ejemplo n.º 12
0
def prepare_r2r_data():
    ''' Prepare data from the training set, valseen and val_unseen splits. '''
    torch.manual_seed(1)
    torch.cuda.manual_seed(1)
    feature_data_store = imgfeat_r2r(
        '/media/diskpart2/oscar_data/r2r_vln/train.yaml')
    featurized_scans = feature_data_store.get_feat_scans()
    train_env = R2RBatch(feature_data_store,
                         batch_size=args.batchSize,
                         splits=['train'])
    listner = Seq2SeqAgent(train_env, "", tok, args.maxAction)
    listner.env = train_env
    listner.train(interval, feedback=args.feedback)  # Train interval iters
Ejemplo n.º 13
0
def test_submission():
    ''' Train on combined training and validation sets, and generate test submission. '''
  
    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAINVAL_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH)
    train_env = R2RBatch(features, batch_size=batch_size, splits=['train', 'val_seen', 'val_unseen'], tokenizer=tok)
    
    # Build models and train
    enc_hidden_size = hidden_size//2 if bidirectional else hidden_size
    encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, 
                  dropout_ratio, bidirectional=bidirectional).cuda()
    decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(),
                  action_embedding_size, hidden_size, dropout_ratio).cuda()
    train(train_env, encoder, decoder, n_iters)

    # Generate test submission
    test_env = R2RBatch(features, batch_size=batch_size, splits=['test'], tokenizer=tok)
    agent = Seq2SeqAgent(test_env, "", encoder, decoder, max_episode_len)
    agent.results_path = '%s%s_%s_iter_%d.json' % (RESULT_DIR, model_prefix, 'test', 20000)
    agent.test(use_dropout=False, feedback='argmax')
    agent.write_results()
Ejemplo n.º 14
0
def infer_speaker(env, tok):
    import tqdm
    from utils import load_datasets
    listner = Seq2SeqAgent(env, "", tok, args.maxAction)
    speaker = Speaker(env, listner, tok)
    speaker.load(args.load)

    dataset = load_datasets(env.splits)
    key_map = {}
    for i, item in enumerate(dataset):
        key_map[item["path_id"]] = i
    path2inst = speaker.get_insts(wrapper=tqdm.tqdm)
    for path_id in path2inst.keys():
        speaker_pred = tok.decode_sentence(path2inst[path_id])
        dataset[key_map[path_id]]['instructions'] = [speaker_pred]

    with open("tasks/R2R/data/aug_paths_unseen_infer.json", "w") as f:
        json.dump(dataset, f, indent=4, sort_keys=True)
Ejemplo n.º 15
0
def valid_speaker(train_env, tok, val_envs):
    import tqdm
    listner = Seq2SeqAgent(train_env, "", tok, args.maxAction)
    speaker = Speaker(train_env, listner, tok)
    speaker.load(args.load)

    for env_name, (env, evaluator) in val_envs.items():
        if env_name == 'train':
            continue
        print("............ Evaluating %s ............." % env_name)
        speaker.env = env
        path2inst, loss, word_accu, sent_accu = speaker.valid(wrapper=tqdm.tqdm)
        path_id = next(iter(path2inst.keys()))
        print("Inference: ", tok.decode_sentence(path2inst[path_id]))
        print("GT: ", evaluator.gt[str(path_id)]['instructions'])
        bleu_score, precisions = evaluator.bleu_score(path2inst)

        print(len(env.data), len(path2inst.keys()))
        import pdb; pdb.set_trace()
Ejemplo n.º 16
0
def filter_arbiter(valid_env, aug_env, tok):
    import tqdm
    listner = Seq2SeqAgent(aug_env, "", tok, args.maxAction)
    arbiter = Arbiter(aug_env, listner, tok)

    # Load the model
    arbiter.load(args.load)

    # Create Dir
    os.makedirs(os.path.join(log_dir, 'arbiter_result'), exist_ok=True)

    # Get the prob for the validation env (may be used for determining the threshold)
    # arbiter.env = valid_env
    # valid_inst2prob = arbiter.valid(wrapper=tqdm.tqdm)
    # json.dump(valid_inst2prob, open(os.path.join(log_dir, 'arbiter_result', 'valid_prob.json'), 'w'))

    # Get the prob of the augmentation data
    arbiter.env = aug_env
    aug_inst2prob = arbiter.valid(wrapper=tqdm.tqdm)
    aug_data = [datum.copy() for datum in aug_env.data]
    for datum in aug_data:
        datum['instructions'] = [datum['instructions']]
        datum.pop(
            'instr_encoding')  # Remove the redundant components in the dataset

    for datum in aug_data:
        datum['prob'] = aug_inst2prob[datum['instr_id']]
    json.dump(
        aug_data,
        open(os.path.join(log_dir, 'arbiter_result', 'aug_prob.json'), 'w'))

    # Create the Dataset
    data = [
        datum for datum in aug_data if aug_inst2prob[datum['instr_id']] > 0.5
    ]

    for datum in aug_data:
        datum.pop('instr_id')
    return data
Ejemplo n.º 17
0
def valid_speaker(tok, val_envs):
    import tqdm
    listner = Seq2SeqAgent(None, "", tok, args.maxAction)
    speaker = Speaker(None, listner, tok)
    speaker.load(args.load)

    for env_name, (env, evaluator) in val_envs.items():
        if env_name == 'train':
            continue
        print("............ Evaluating %s ............." % env_name)
        speaker.env = env
        path2inst, loss, word_accu, sent_accu = speaker.valid(wrapper=tqdm.tqdm)
        path_id = next(iter(path2inst.keys()))
        print("Inference: ", tok.decode_sentence(path2inst[path_id]))
        print("GT: ", evaluator.gt[path_id]['instructions'])
        pathXinst = list(path2inst.items())
        name2score = evaluator.lang_eval(pathXinst, no_metrics={'METEOR'})
        score_string = " "
        for score_name, score in name2score.items():
            score_string += "%s_%s: %0.4f " % (env_name, score_name, score)
        print("For env %s" % env_name)
        print(score_string)
        print("Average Length %0.4f" % utils.average_length(path2inst))
Ejemplo n.º 18
0
    features = IMAGENET_FEATURES
    CANDIDATE_FEATURES = IMAGENET_CANDIDATE_FEATURES

#load features and feature_candidates
feature_dict = read_img_features(features)
candidate_dict = utils.read_candidates(CANDIDATE_FEATURES)
#load glove and vocab
glove_path = 'tasks/R2R/data/train_glove.npy'
glove = np.load(glove_path)
vocab = read_vocab(TRAIN_VOCAB)
tok = Tokenizer(vocab=vocab)

#intantialize listener and load pre-trained model
listner = Seq2SeqAgent(None,
                       "",
                       tok,
                       feat=feature_dict,
                       candidates=candidate_dict,
                       episode_len=args.maxAction)
listner.load(
    'snap/long/ablation_cand_0208_accuGrad_envdrop_ty/state_dict/best_val_unseen'
)


# nav graph loader from env.py
def load_nav_graphs(scans):
    ''' Load connectivity graph for each scan '''
    def distance(pose1, pose2):
        ''' Euclidean distance between two graph poses '''
        return ((pose1['pose'][3]-pose2['pose'][3])**2\
          + (pose1['pose'][7]-pose2['pose'][7])**2\
          + (pose1['pose'][11]-pose2['pose'][11])**2)**0.5
Ejemplo n.º 19
0
def train(train_env, encoder, decoder, n_iters, path_type, history, feedback_method, max_episode_len, MAX_INPUT_LENGTH, model_prefix,
    log_every=100, val_envs=None):
    ''' Train on training set, validating on both seen and unseen. '''
    if val_envs is None:
        val_envs = {}

    if agent_type == 'seq2seq':
        agent = Seq2SeqAgent(train_env, "", encoder, decoder, max_episode_len)
    else:
        sys.exit("Unrecognized agent_type '%s'" % agent_type)
    print 'Training a %s agent with %s feedback' % (agent_type, feedback_method)
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate, weight_decay=weight_decay)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate, weight_decay=weight_decay) 

    data_log = defaultdict(list)
    start = time.time()
    print 'Start training'
    for idx in range(0, n_iters, log_every):

        interval = min(log_every,n_iters-idx)
        iter = idx + interval
        data_log['iteration'].append(iter)

        # Train for log_every interval
        agent.train(encoder_optimizer, decoder_optimizer, interval, feedback=feedback_method)
        train_losses = np.array(agent.losses)
        assert len(train_losses) == interval
        train_loss_avg = np.average(train_losses)
        data_log['train loss'].append(train_loss_avg)
        loss_str = 'train loss: %.4f' % train_loss_avg

        # Run validation
        for env_name, (env, evaluator) in val_envs.iteritems():
            agent.env = env
            agent.results_path = '%s%s_%s_iter_%d.json' % (RESULT_DIR, model_prefix, env_name, iter)
            # Get validation loss under the same conditions as training
            agent.test(use_dropout=True, feedback=feedback_method, allow_cheat=True)
            val_losses = np.array(agent.losses)
            val_loss_avg = np.average(val_losses)
            data_log['%s loss' % env_name].append(val_loss_avg)
            # Get validation distance from goal under test evaluation conditions
            agent.test(use_dropout=False, feedback='argmax')
            agent.write_results()
            score_summary, _ = evaluator.score(agent.results_path)
            loss_str = ', %s loss: %.4f' % (env_name, val_loss_avg)
            for metric, val in score_summary.iteritems():
                 data_log['%s %s' % (env_name, metric)].append(val)
                 if metric in ['success_rate', 'oracle success_rate', 'oracle path_success_rate', 'dist_to_end_reduction']:
                     loss_str += ', %s: %.3f' % (metric, val)

        agent.env = train_env

        print('%s (%d %d%%) %s' % (timeSince(start, float(iter)/n_iters),
                                             iter, float(iter)/n_iters*100, loss_str))
        df = pd.DataFrame(data_log)
        df.set_index('iteration')
        df_path = '%s%s-log.csv' % (PLOT_DIR, model_prefix)
        df.to_csv(df_path)
        
        split_string = "-".join(train_env.splits)
        enc_path = '%s%s_%s_enc_iter_%d' % (SNAPSHOT_DIR, model_prefix, split_string, iter)
        dec_path = '%s%s_%s_dec_iter_%d' % (SNAPSHOT_DIR, model_prefix, split_string, iter)
        agent.save(enc_path, dec_path)

    print 'Finish training'
Ejemplo n.º 20
0
def train(eval_type,
          train_env,
          encoder,
          decoder,
          n_iters,
          seed,
          feedback,
          max_episode_len,
          max_input_length,
          prefix,
          blind,
          lr,
          weight_decay,
          result_dir,
          snapshot_dir,
          plot_dir,
          log_every=50,
          val_envs=None,
          debug=False):
    ''' Train on training set, validating on both seen and unseen. '''

    if debug:
        print("Training in debug mode")
        log_every = 1

    if val_envs is None:
        val_envs = {}

    print('Training with %s feedback' % (feedback))
    agent = Seq2SeqAgent(train_env,
                         "",
                         encoder,
                         decoder,
                         max_episode_len,
                         blind=blind)
    encoder_optimizer = optim.Adam(encoder.parameters(),
                                   lr=lr,
                                   weight_decay=weight_decay)
    decoder_optimizer = optim.Adam(decoder.parameters(),
                                   lr=lr,
                                   weight_decay=weight_decay)

    data_log = defaultdict(list)
    start = time.time()

    for idx in range(0, n_iters, log_every):

        interval = min(log_every, n_iters - idx)
        iter = idx + interval
        data_log['iteration'].append(iter)

        # Train for log_every interval
        agent.train(encoder_optimizer,
                    decoder_optimizer,
                    interval,
                    feedback=feedback)
        train_losses = np.array(agent.losses)
        assert len(train_losses) == interval
        train_loss_avg = np.average(train_losses)
        data_log['train loss'].append(train_loss_avg)
        loss_str = 'train loss: %.4f' % train_loss_avg

        # Run validation
        for env_name, (env, evaluator) in val_envs.items():
            agent.env = env
            agent.results_path = '%s%s_%s_iter_%d.json' % (result_dir, prefix,
                                                           env_name, iter)
            # Get validation loss under the same conditions as training
            agent.test(use_dropout=True, feedback=feedback, allow_cheat=True)
            val_losses = np.array(agent.losses)
            val_loss_avg = np.average(val_losses)
            data_log['%s loss' % env_name].append(val_loss_avg)
            # Get validation distance from goal under test evaluation conditions
            agent.test(use_dropout=False, feedback='argmax')
            agent.write_results()
            score_summary = evaluator.score(agent.results_path)
            loss_str += ', %s loss: %.4f' % (env_name, val_loss_avg)
            for metric, val in score_summary.items():
                data_log['%s %s' % (env_name, metric)].append(val)
                loss_str += ', %s: %.3f' % (metric, val)

        agent.env = train_env

        print(('%s (%d %d%%) %s' % (timeSince(start,
                                              float(iter) / n_iters), iter,
                                    float(iter) / n_iters * 100, loss_str)))
        df = pd.DataFrame(data_log)
        df.set_index('iteration')
        df_path = '%s%s-log.csv' % (plot_dir, prefix)
        df.to_csv(df_path)

        split_string = "-".join(train_env.splits)
        enc_path = '%s%s_%s_enc_iter_%d' % (snapshot_dir, prefix, split_string,
                                            iter)
        dec_path = '%s%s_%s_dec_iter_%d' % (snapshot_dir, prefix, split_string,
                                            iter)
        agent.save(enc_path, dec_path)

        # Log data to wandb for visualization
        wandb.log(last_entry(data_log, eval_type), step=idx)
Ejemplo n.º 21
0
tok = Tokenizer(vocab=vocab, encoding_length=80)
feat_dict = read_img_features(features)

train_env = R2RBatch(feat_dict, batch_size=64, splits=['train'], tokenizer=tok)
log_dir = "snap/speaker/state_dict/best_val_seen_bleu"
val_env_names = ['val_unseen', 'val_seen']
featurized_scans = set([key.split("_")[0] for key in list(feat_dict.keys())])

val_envs = OrderedDict(((split, (R2RBatch(feat_dict,
                                          batch_size=args.batchSize,
                                          splits=[split],
                                          tokenizer=tok),
                                 Evaluation([split], featurized_scans, tok)))
                        for split in val_env_names))

listner = Seq2SeqAgent(train_env, "", tok, 35)
speaker = speaker.Speaker(train_env, listner, tok)
speaker.load(log_dir)
speaker.env = train_env
results = {}
for env_name, (env, evaluator) in val_envs.items():
    print("............ Evaluating %s ............." % env_name)
    speaker.env = env
    path2inst, loss, word_accu, sent_accu = speaker.valid()

    r = defaultdict(dict)
    for path_id in path2inst.keys():
        # internal_bleu = evaluator.compute_internal_bleu_score(path_id)
        # if internal_bleu == 1.0:
        #     import pdb;
        #     pdb.set_trace()
Ejemplo n.º 22
0
def beam_valid(train_env, tok, val_envs={}):
    listener = Seq2SeqAgent(train_env, "", tok, args.maxAction)

    speaker = Speaker(train_env, listener, tok)
    if args.speaker is not None:
        print("Load the speaker from %s." % args.speaker)
        speaker.load(args.speaker)

    print("Loaded the listener model at iter % d" % listener.load(args.load))

    final_log = ""
    for env_name, (env, evaluator) in val_envs.items():
        listener.logs = defaultdict(list)
        listener.env = env

        listener.beam_search_test(speaker)
        results = listener.results

        def cal_score(x, alpha, avg_speaker, avg_listener):
            speaker_score = sum(x["speaker_scores"]) * alpha
            if avg_speaker:
                speaker_score /= len(x["speaker_scores"])
            # normalizer = sum(math.log(k) for k in x['listener_actions'])
            normalizer = 0.
            listener_score = (sum(x["listener_scores"]) + normalizer) * (1 -
                                                                         alpha)
            if avg_listener:
                listener_score /= len(x["listener_scores"])
            return speaker_score + listener_score

        if args.param_search:
            # Search for the best speaker / listener ratio
            interval = 0.01
            logs = []
            for avg_speaker in [False, True]:
                for avg_listener in [False, True]:
                    for alpha in np.arange(0, 1 + interval, interval):
                        result_for_eval = []
                        for key in results:
                            result_for_eval.append({
                                "instr_id":
                                key,
                                "trajectory":
                                max(results[key]['paths'],
                                    key=lambda x: cal_score(
                                        x, alpha, avg_speaker, avg_listener))
                                ['trajectory']
                            })
                        score_summary, _ = evaluator.score(result_for_eval)
                        for metric, val in score_summary.items():
                            if metric in ['success_rate']:
                                print(
                                    "Avg speaker %s, Avg listener %s, For the speaker weight %0.4f, the result is %0.4f"
                                    % (avg_speaker, avg_listener, alpha, val))
                                logs.append(
                                    (avg_speaker, avg_listener, alpha, val))
            tmp_result = "Env Name %s\n" % (env_name) + \
                    "Avg speaker %s, Avg listener %s, For the speaker weight %0.4f, the result is %0.4f\n" % max(logs, key=lambda x: x[3])
            print(tmp_result)
            # print("Env Name %s" % (env_name))
            # print("Avg speaker %s, Avg listener %s, For the speaker weight %0.4f, the result is %0.4f" %
            #       max(logs, key=lambda x: x[3]))
            final_log += tmp_result
            print()
        else:
            avg_speaker = True
            avg_listener = True
            alpha = args.alpha

            result_for_eval = []
            for key in results:
                result_for_eval.append({
                    "instr_id": key,
                    "trajectory": [(vp, 0, 0) for vp in results[key]['dijk_path']] + \
                                  max(results[key]['paths'],
                                   key=lambda x: cal_score(x, alpha, avg_speaker, avg_listener)
                                  )['trajectory']
                })
            # result_for_eval = utils.add_exploration(result_for_eval)
            score_summary, _ = evaluator.score(result_for_eval)

            if env_name != 'test':
                loss_str = "Env Name: %s" % env_name
                for metric, val in score_summary.items():
                    if metric in ['success_rate']:
                        print(
                            "Avg speaker %s, Avg listener %s, For the speaker weight %0.4f, the result is %0.4f"
                            % (avg_speaker, avg_listener, alpha, val))
                    loss_str += ",%s: %0.4f " % (metric, val)
                print(loss_str)
            print()

            if args.submit:
                json.dump(result_for_eval,
                          open(
                              os.path.join(log_dir,
                                           "submit_%s.json" % env_name), 'w'),
                          sort_keys=True,
                          indent=4,
                          separators=(',', ': '))
    print(final_log)
Ejemplo n.º 23
0
def train(train_env, tok, n_iters, log_every=100, val_envs={}, aug_env=None):
    writer = SummaryWriter(logdir=log_dir)
    listner = Seq2SeqAgent(train_env, "", tok, args.maxAction)

    speaker = None
    if args.self_train:
        speaker = Speaker(train_env, listner, tok)
        if args.speaker is not None:
            if args.upload:
                print("Load the speaker from %s." % args.speaker)
                speaker.load(
                    get_sync_dir(os.path.join(args.upload_path, args.speaker)))
            else:
                print("Load the speaker from %s." % args.speaker)
                speaker.load(os.path.join(args.R2R_Aux_path, args.speaker))

    start_iter = 0
    if args.load is not None:
        if args.upload:
            refs_paths = get_outputs_refs_paths()['experiments'][0]
            print(refs_paths)
            load_model = os.path.join(refs_paths, args.load)
            print(load_model)
            print("LOAD THE listener from %s" % load_model)
            start_iter = listner.load(load_model)
        else:
            print("LOAD THE listener from %s" % args.load)
            start_iter = listner.load(
                os.path.join(args.R2R_Aux_path, args.load))

    start = time.time()

    best_val = {
        'val_seen': {
            "accu": 0.,
            "state": "",
            'update': False
        },
        'val_unseen': {
            "accu": 0.,
            "state": "",
            'update': False
        }
    }
    if args.fast_train:
        log_every = 40
    for idx in range(start_iter, start_iter + n_iters, log_every):
        listner.logs = defaultdict(list)
        interval = min(log_every, start_iter + n_iters - idx)
        iter = idx + interval

        # Train for log_every interval
        if aug_env is None:  # The default training process
            listner.env = train_env
            listner.train(interval,
                          feedback=feedback_method)  # Train interval iters
        else:
            if args.accumulate_grad:
                for _ in range(interval // 2):
                    listner.zero_grad()
                    listner.env = train_env

                    # Train with GT data
                    args.ml_weight = 0.2
                    listner.accumulate_gradient(feedback_method)
                    listner.env = aug_env

                    # Train with Back Translation
                    args.ml_weight = 0.6  # Sem-Configuration
                    listner.accumulate_gradient(feedback_method,
                                                speaker=speaker)
                    listner.optim_step()
            else:
                for _ in range(interval // 2):
                    # Train with GT data
                    listner.env = train_env
                    args.ml_weight = 0.2
                    listner.train(1, feedback=feedback_method)

                    # Train with Back Translation
                    listner.env = aug_env
                    args.ml_weight = 0.6
                    listner.train(1, feedback=feedback_method, speaker=speaker)

        # Log the training stats to tensorboard
        total = max(sum(listner.logs['total']), 1)
        # import pdb; pdb.set_trace() # length_rl == length_ml ? entropy length
        assert (max(len(listner.logs['rl_loss']),
                    1) == max(len(listner.logs['ml_loss']), 1))
        max_rl_length = max(len(listner.logs['critic_loss']), 1)
        log_length = max(len(listner.logs['rl_loss']), 1)
        rl_loss = sum(listner.logs['rl_loss']) / log_length
        ml_loss = sum(listner.logs['ml_loss']) / log_length
        critic_loss = sum(listner.logs['critic_loss']
                          ) / log_length  #/ length / args.batchSize
        spe_loss = sum(listner.logs['spe_loss']) / log_length
        pro_loss = sum(listner.logs['pro_loss']) / log_length
        mat_loss = sum(listner.logs['mat_loss']) / log_length
        fea_loss = sum(listner.logs['fea_loss']) / log_length
        ang_loss = sum(listner.logs['ang_loss']) / log_length
        entropy = sum(
            listner.logs['entropy']) / log_length  #/ length / args.batchSize
        predict_loss = sum(listner.logs['us_loss']) / log_length
        writer.add_scalar("loss/rl_loss", rl_loss, idx)
        writer.add_scalar("loss/ml_loss", ml_loss, idx)
        writer.add_scalar("policy_entropy", entropy, idx)
        writer.add_scalar("loss/spe_loss", spe_loss, idx)
        writer.add_scalar("loss/pro_loss", pro_loss, idx)
        writer.add_scalar("loss/mat_loss", mat_loss, idx)
        writer.add_scalar("loss/fea_loss", fea_loss, idx)
        writer.add_scalar("loss/ang_loss", ang_loss, idx)
        writer.add_scalar("total_actions", total, idx)
        writer.add_scalar("max_rl_length", max_rl_length, idx)
        writer.add_scalar("loss/critic", critic_loss, idx)
        writer.add_scalar("loss/unsupervised", predict_loss, idx)
        print("total_actions", total)
        print("max_rl_length", max_rl_length)

        # Run validation
        loss_str = ""
        for env_name, (env, evaluator) in val_envs.items():
            listner.env = env

            # Get validation loss under the same conditions as training
            iters = None if args.fast_train or env_name != 'train' else 20  # 20 * 64 = 1280

            # Get validation distance from goal under test evaluation conditions
            listner.test(use_dropout=False, feedback='argmax', iters=iters)
            result = listner.get_results()
            score_summary, _ = evaluator.score(result)
            loss_str += "%s " % env_name
            for metric, val in score_summary.items():
                if metric in ['success_rate']:
                    loss_str += ', %s: %.4f' % (metric, val)
                    writer.add_scalar("%s/accuracy" % env_name, val, idx)
                    if env_name in best_val:
                        if val > best_val[env_name]['accu']:
                            best_val[env_name]['accu'] = val
                            best_val[env_name]['update'] = True
                if metric in ['spl']:
                    writer.add_scalar("%s/spl" % env_name, val, idx)
                    loss_str += ', %s: %.4f' % (metric, val)
            loss_str += '\n'
        loss_str += '\n'

        for env_name in best_val:
            if best_val[env_name]['update']:
                best_val[env_name]['state'] = 'Iter %d \n%s' % (iter, loss_str)
                best_val[env_name]['update'] = False
                file_dir = os.path.join(output_dir, "snap", args.name,
                                        "state_dict", "best_%s" % (env_name))
                listner.save(idx, file_dir)
        print(('%s (%d %d%%) \n%s' % (timeSince(start,
                                                float(iter) / n_iters), iter,
                                      float(iter) / n_iters * 100, loss_str)))

        if iter % 1000 == 0:
            print("BEST RESULT TILL NOW")
            for env_name in best_val:
                print(env_name, best_val[env_name]['state'])

        if iter % args.save_iter == 0:
            file_dir = os.path.join(output_dir, "snap", args.name,
                                    "state_dict", "Iter_%06d" % (iter))
            listner.save(idx, file_dir)
Ejemplo n.º 24
0
def train(train_env, tok, n_iters, log_every=100, val_envs={}, aug_env=None):
    writer = SummaryWriter(logdir=log_dir)
    listner = Seq2SeqAgent(train_env, "", tok, args.maxAction)

    speaker = None
    if args.self_train:
        speaker = Speaker(train_env, listner, tok)
        if args.speaker is not None:
            print("Load the speaker from %s." % args.speaker)
            speaker.load(args.speaker)

    start_iter = 0
    if args.load is not None:
        print("LOAD THE listener from %s" % args.load)
        start_iter = listner.load(os.path.join(args.load))

    start = time.time()

    best_val = {
        'val_seen': {
            "accu": 0.,
            "state": "",
            'update': False
        },
        'val_unseen': {
            "accu": 0.,
            "state": "",
            'update': False
        }
    }
    if args.fast_train:
        log_every = 40
    for idx in range(start_iter, start_iter + n_iters, log_every):
        listner.logs = defaultdict(list)
        interval = min(log_every, n_iters - idx)
        iter = idx + interval

        # Train for log_every interval
        if aug_env is None:  # The default training process
            listner.env = train_env
            listner.train(interval,
                          feedback=feedback_method)  # Train interval iters
        else:
            if args.accumulate_grad:
                for _ in range(interval // 2):
                    listner.zero_grad()
                    listner.env = train_env

                    # Train with GT data
                    args.ml_weight = 0.2
                    listner.accumulate_gradient(feedback_method)
                    listner.env = aug_env

                    # Train with Back Translation
                    args.ml_weight = 0.6  # Sem-Configuration
                    listner.accumulate_gradient(feedback_method,
                                                speaker=speaker)
                    listner.optim_step()
            else:
                for _ in range(interval // 2):
                    # Train with GT data
                    listner.env = train_env
                    args.ml_weight = 0.2
                    listner.train(1, feedback=feedback_method)

                    # Train with Back Translation
                    listner.env = aug_env
                    args.ml_weight = 0.6
                    listner.train(1, feedback=feedback_method, speaker=speaker)

        # Log the training stats to tensorboard
        total = max(sum(listner.logs['total']), 1)
        length = max(len(listner.logs['critic_loss']), 1)
        critic_loss = sum(
            listner.logs['critic_loss']) / total  #/ length / args.batchSize
        entropy = sum(
            listner.logs['entropy']) / total  #/ length / args.batchSize
        predict_loss = sum(listner.logs['us_loss']) / max(
            len(listner.logs['us_loss']), 1)
        writer.add_scalar("loss/critic", critic_loss, idx)
        writer.add_scalar("policy_entropy", entropy, idx)
        writer.add_scalar("loss/unsupervised", predict_loss, idx)
        writer.add_scalar("total_actions", total, idx)
        writer.add_scalar("max_length", length, idx)
        print("total_actions", total)
        print("max_length", length)

        # Run validation
        loss_str = ""
        for env_name, (env, evaluator) in val_envs.items():
            listner.env = env

            # Get validation loss under the same conditions as training
            iters = None if args.fast_train or env_name != 'train' else 20  # 20 * 64 = 1280

            # Get validation distance from goal under test evaluation conditions
            listner.test(use_dropout=False, feedback='argmax', iters=iters)
            result = listner.get_results()
            score_summary, _ = evaluator.score(result)
            loss_str += ", %s " % env_name
            for metric, val in score_summary.items():
                if metric in ['success_rate']:
                    writer.add_scalar("accuracy/%s" % env_name, val, idx)
                    if env_name in best_val:
                        if val > best_val[env_name]['accu']:
                            best_val[env_name]['accu'] = val
                            best_val[env_name]['update'] = True
                loss_str += ', %s: %.3f' % (metric, val)

        for env_name in best_val:
            if best_val[env_name]['update']:
                best_val[env_name]['state'] = 'Iter %d %s' % (iter, loss_str)
                best_val[env_name]['update'] = False
                listner.save(
                    idx,
                    os.path.join("snap", args.name, "state_dict",
                                 "best_%s" % (env_name)))

        print(('%s (%d %d%%) %s' % (timeSince(start,
                                              float(iter) / n_iters), iter,
                                    float(iter) / n_iters * 100, loss_str)))

        if iter % 1000 == 0:
            print("BEST RESULT TILL NOW")
            for env_name in best_val:
                print(env_name, best_val[env_name]['state'])

        if iter % 50000 == 0:
            listner.save(
                idx,
                os.path.join("snap", args.name, "state_dict",
                             "Iter_%06d" % (iter)))

    listner.save(
        idx,
        os.path.join("snap", args.name, "state_dict", "LAST_iter%d" % (idx)))
Ejemplo n.º 25
0
def train(train_env, tok, n_iters, log_every=100, val_envs={}):
    ''' Train on training set, validating on both seen and unseen. '''
    writer = SummaryWriter(log_dir=log_dir)
    listner = Seq2SeqAgent(train_env, "", tok, args.maxAction)

    start_iter = 0
    if args.load is not None:
        print("LOAD THE DICT from %s" % args.load)
        start_iter = listner.load(os.path.join(args.load))

    start = time.time()

    # agent.train(encoder_optimizer, decoder_optimizer, 1000, feedback='teacher')
    best_val = {
        'val_seen': {
            "accu": 0.,
            "state": "",
            'update': False
        },
        'val_unseen': {
            "accu": 0.,
            "state": "",
            'update': False
        }
    }
    if args.fast_train:
        log_every = 40
    else:
        killer = utils.GracefulKiller()
    for idx in range(start_iter, start_iter + n_iters, log_every):
        listner.logs = defaultdict(list)
        interval = min(log_every, n_iters - idx)
        iter = idx + interval

        # Train for log_every interval
        listner.env = train_env
        listner.train(interval,
                      feedback=feedback_method)  # Train interval iters
        # listner.timer.show()

        # Log the tensorboard
        total = max(sum(listner.logs['total']), 1)
        length = max(len(listner.logs['critic_loss']), 1)
        # critic_loss = sum(listner.logs['critic_loss']) / length / args.batchSize
        # entropy = sum(listner.logs['entropy']) / length / args.batchSize
        critic_loss = sum(
            listner.logs['critic_loss']) / total  #/ length / args.batchSize
        entropy = sum(
            listner.logs['entropy']) / total  #/ length / args.batchSize
        predict_loss = sum(listner.logs['us_loss']) / max(
            len(listner.logs['us_loss']), 1)
        writer.add_scalar("loss/critic", critic_loss, idx)
        writer.add_scalar("policy_entropy", entropy, idx)
        writer.add_scalar("loss/unsupervised", predict_loss, idx)
        writer.add_scalar("total_actions", total, idx)
        writer.add_scalar("max_length", length, idx)

        loss_str = ""
        # Run validation
        for env_name, (env, evaluator) in val_envs.items():
            listner.env = env

            # Get validation loss under the same conditions as training
            iters = None if args.fast_train or env_name != 'train' else 20  # 20 * 64 = 1280
            # listner.test(use_dropout=True, feedback='sample', allow_cheat=True, iters=iters)
            # val_losses = np.array(listner.losses)
            # val_loss_avg = np.average(val_losses)

            # Get validation distance from goal under test evaluation conditions
            listner.test(use_dropout=False, feedback='argmax', iters=iters)
            result = listner.get_results()
            score_summary, _ = evaluator.score(result)
            # loss_str += ', %s loss: %.4f' % (env_name, val_loss_avg)
            loss_str += ", %s" % env_name
            for metric, val in score_summary.items():
                if metric in ['success_rate']:
                    loss_str += ', %s: %.3f' % (metric, val)
                    writer.add_scalar("accuracy/%s" % env_name, val, idx)
                    if env_name in best_val:
                        if val > best_val[env_name]['accu']:
                            best_val[env_name]['accu'] = val
                            best_val[env_name]['update'] = True

        for env_name in best_val:
            if best_val[env_name]['update']:
                best_val[env_name]['state'] = 'Iter %d %s' % (iter, loss_str)
                best_val[env_name]['update'] = False
                listner.save(
                    idx,
                    os.path.join("snap", args.name, "state_dict",
                                 "best_%s" % (env_name)))

        listner.env = train_env

        print(('%s (%d %d%%) %s' % (timeSince(start,
                                              float(iter) / n_iters), iter,
                                    float(iter) / n_iters * 100, loss_str)))

        if iter % 1000 == 0:
            print("BEST RESULT TILL NOW")
            for env_name in best_val:
                print(env_name, best_val[env_name]['state'])

        if iter % 20000 == 0:
            import shutil
            listner.save(
                idx,
                os.path.join("snap", args.name, "state_dict",
                             "Iter_%06d" % (iter)))
            shutil.copy(
                os.path.join("snap", args.name, "state_dict",
                             "best_val_unseen"),
                os.path.join("snap", args.name, "state_dict",
                             "best_val_unseen_%06d" % (iter)))

        if not args.fast_train:
            if killer.kill_now:
                break

    listner.save(
        idx,
        os.path.join("snap", args.name, "state_dict", "LAST_iter%d" % (idx)))
Ejemplo n.º 26
0
def train_val(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind, args):
    ''' Train on the training set, and validate on seen and unseen splits. '''

    nav_graphs = setup(args.action_space, args.navigable_locs_path)
    # Create a batch training environment that will also preprocess text
    use_bert = (args.encoder_type in ['bert','vlbert'])  # for tokenizer and dataloader
    if use_bert:
        tok = BTokenizer(MAX_INPUT_LENGTH)
    else:
        vocab = read_vocab(TRAIN_VOCAB)
        tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH)
    #train_env = R2RBatch(features, batch_size=batch_size, splits=['train'], tokenizer=tok,
    #                     path_type=path_type, history=history, blind=blind)

    feature_store = Feature(features, args.panoramic)
    train_env = R2RBatch(feature_store, nav_graphs, args.panoramic,args.action_space,batch_size=args.batch_size, splits=['train'], tokenizer=tok,
                         path_type=path_type, history=history, blind=blind)

    # Creat validation environments
    #val_envs = {split: (R2RBatch(features, batch_size=batch_size, splits=[split],
    #            tokenizer=tok, path_type=path_type, history=history, blind=blind),
    #            Evaluation([split], path_type=path_type)) for split in ['val_seen', 'val_unseen']}

    val_envs = {split: (R2RBatch(feature_store,nav_graphs, args.panoramic, args.action_space,batch_size=args.batch_size, splits=[split],
                tokenizer=tok, path_type=path_type, history=history, blind=blind),
                Evaluation([split], path_type=path_type)) for split in ['val_seen','val_unseen']}

    # Build models and train
    #enc_hidden_size = hidden_size//2 if bidirectional else hidden_size

    if args.encoder_type == 'vlbert':
        if args.pretrain_model_name is not None:
            print("Using the pretrained lm model from %s" %(args.pretrain_model_name))
            encoder = DicEncoder(FEATURE_ALL_SIZE,args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm,args.vl_layers,args.la_layers,args.bert_type)
            premodel = DicAddActionPreTrain.from_pretrained(args.pretrain_model_name)
            encoder.bert = premodel.bert
            encoder.drop = nn.Dropout(p=args.dropout_ratio)
            encoder.bert._resize_token_embeddings(len(tok)) # remember to resize tok embedding size
            encoder.bert.update_lang_bert, encoder.bert.config.update_lang_bert = args.transformer_update, args.transformer_update
            encoder.bert.update_add_layer, encoder.bert.config.update_add_layer = args.update_add_layer, args.update_add_layer
            encoder = encoder.cuda()

        else:
            encoder = DicEncoder(FEATURE_ALL_SIZE,args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm,args.vl_layers,args.la_layers,args.bert_type).cuda()
            encoder.bert._resize_token_embeddings(len(tok)) # remember to resize tok embedding size

    elif args.encoder_type == 'bert':
        if args.pretrain_model_name is not None:
            print("Using the pretrained lm model from %s" %(args.pretrain_model_name))
            encoder = BertEncoder(args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm, args.bert_type)
            premodel = BertForMaskedLM.from_pretrained(args.pretrain_model_name)
            encoder.bert = premodel.bert
            encoder.drop = nn.Dropout(p=args.dropout_ratio)
            encoder.bert._resize_token_embeddings(len(tok)) # remember to resize tok embedding size
            #encoder.bert.update_lang_bert, encoder.bert.config.update_lang_bert = args.transformer_update, args.transformer_update
            #encoder.bert.update_add_layer, encoder.bert.config.update_add_layer = args.update_add_layer, args.update_add_layer
            encoder = encoder.cuda()
        else:
            encoder = BertEncoder(args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm, args.bert_type).cuda()
            encoder.bert._resize_token_embeddings(len(tok))
    else:
        enc_hidden_size = hidden_size//2 if bidirectional else hidden_size
        encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx,
                            dropout_ratio, bidirectional=bidirectional).cuda()


    #decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(),
    #              action_embedding_size, args.hidden_size, args.dropout_ratio).cuda()
    ctx_hidden_size = args.enc_hidden_size * (2 if args.bidirectional else 1)
    if use_bert and not args.top_lstm:
        ctx_hidden_size = 768

    decoder = R2RAttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(),
                  action_embedding_size, ctx_hidden_size, args.hidden_size, args.dropout_ratio,FEATURE_SIZE, args.panoramic,args.action_space,args.dec_h_type).cuda()
    decoder = R2RAttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(),
                  action_embedding_size, ctx_hidden_size, args.hidden_size, args.dropout_ratio,FEATURE_SIZE, args.panoramic,args.action_space,args.dec_h_type).cuda()


    train(train_env, encoder, decoder, n_iters,
          path_type, history, feedback_method, max_episode_len, MAX_INPUT_LENGTH, model_prefix, val_envs=val_envs, args=args)
Ejemplo n.º 27
0
def train(train_env, encoder, decoder, n_iters, path_type, history, feedback_method, max_episode_len, MAX_INPUT_LENGTH, model_prefix,
    log_every=100, val_envs=None, args=None):
    ''' Train on training set, validating on both seen and unseen. '''
    if val_envs is None:
        val_envs = {}

    if agent_type == 'seq2seq':
        agent = Seq2SeqAgent(train_env, "", encoder, decoder, max_episode_len, path_type=args.path_type,args=args)
    else:
        sys.exit("Unrecognized agent_type '%s'" % agent_type)
    print('Training a %s agent with %s feedback' % (agent_type, feedback_method))
    if args.optm == 'Adam':
        optim_func = optim.Adam
    elif args.optm == 'Adamax':
        optim_func = optim.Adamax

    encoder_optimizer = optim_func(encoder.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay)
    decoder_optimizer = optim_func(decoder.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay)

    data_log = defaultdict(list)
    start = time.time()

    best_model = {
        'iter': -1,
        'encoder': copy.deepcopy(agent.encoder.state_dict()),
        'decoder': copy.deepcopy(agent.decoder.state_dict()),
    }
    best_dr_model = {
        'iter': -1,
        'encoder': copy.deepcopy(agent.encoder.state_dict()),
        'decoder': copy.deepcopy(agent.decoder.state_dict()),
    }
    best_dr = 0
    best_spl = 0
    best_iter = 0
    best_dr_iter = 0
    best_sr = 0
    myidx = 0
    split_string = "-".join(train_env.splits)
    for idx in range(0, n_iters, log_every):

        interval = min(log_every,n_iters-idx)
        iter = idx + interval
        data_log['iteration'].append(iter)

        myidx += interval
        print("PROGRESS: {}%".format(round((myidx) * 100 / n_iters, 4)))

        # Train for log_every interval
        agent.train(encoder_optimizer, decoder_optimizer, interval, feedback=feedback_method)
        train_losses = np.array(agent.losses)
        assert len(train_losses) == interval
        train_loss_avg = np.average(train_losses)
        data_log['train loss'].append(train_loss_avg)
        loss_str = 'train loss: %.4f' % train_loss_avg

        # Run validation
        for env_name, (env, evaluator) in val_envs.items():
            agent.env = env
            agent.results_path = '%s%s_%s_iter_%d.json' % (RESULT_DIR, model_prefix, env_name, iter)
            # Get validation loss under the same conditions as training
            agent.test(use_dropout=True, feedback=feedback_method, allow_cheat=True)
            val_losses = np.array(agent.losses)
            val_loss_avg = np.average(val_losses)
            data_log['%s loss' % env_name].append(val_loss_avg)
            # Get validation distance from goal under test evaluation conditions
            agent.test(use_dropout=False, feedback='argmax')
            agent.write_results()
            score_summary, _ = evaluator.score(agent.results_path)
            loss_str += ', %s loss: %.4f' % (env_name, val_loss_avg)
            for metric, val in score_summary.items():
                data_log['%s %s' % (env_name, metric)].append(val)
                if metric in ['success_rate', 'oracle success_rate', 'oracle path_success_rate', 'dist_to_end_reduction','sc_dr']:
                    loss_str += ', %s: %.3f' % (metric, val)


        eval_spl = current_best(data_log, -1, 'spl_unseen')
        eval_dr = current_best(data_log, -1, 'dr_unseen')
        eval_sr = current_best(data_log, -1, 'sr_unseen')
        if eval_sr > best_sr:
            best_sr = eval_sr
            best_iter = iter
            best_model['iter'] = iter
            best_model['encoder'] = copy.deepcopy(agent.encoder.state_dict())
            best_model['decoder'] = copy.deepcopy(agent.decoder.state_dict())
            save_best_model(best_model, SNAPSHOT_DIR,model_prefix, split_string, -1)
        if eval_spl>best_spl:
            best_spl=eval_spl
            loss_str+=' bestSPL'
        if eval_dr > best_dr:
            best_dr = eval_dr
            loss_str+=' bestDR'
            best_dr_iter = iter
            best_dr_model['iter'] = iter
            best_dr_model['encoder'] = copy.deepcopy(agent.encoder.state_dict())
            best_dr_model['decoder'] = copy.deepcopy(agent.decoder.state_dict())
            save_best_model(best_dr_model, SNAPSHOT_DIR,model_prefix, split_string+"bestdr", -1)
        agent.env = train_env

        print('%s (%d %d%%) %s' % (timeSince(start, float(iter)/n_iters),
                                             iter, float(iter)/n_iters*100, loss_str))

        print("EVALERR: {}%".format(best_dr))


        df = pd.DataFrame(data_log)
        df.set_index('iteration')
        df_path = '%s%s-log.csv' % (PLOT_DIR, model_prefix)
        write_num = 0
        while (write_num < 10):
            try:
                df.to_csv(df_path)
                break
            except:
                write_num += 1

        #split_string = "-".join(train_env.splits)
        #enc_path = '%s%s_%s_enc_iter_%d' % (SNAPSHOT_DIR, model_prefix, split_string, iter)
        #dec_path = '%s%s_%s_dec_iter_%d' % (SNAPSHOT_DIR, model_prefix, split_string, iter)
        #agent.save(enc_path, dec_path)
    split_string = "-".join(train_env.splits)
    save_best_model(best_model, SNAPSHOT_DIR,model_prefix, split_string, best_iter)
    save_best_model(best_dr_model, SNAPSHOT_DIR,model_prefix, split_string+"bestdr", best_dr_iter)
Ejemplo n.º 28
0
def btest_submission(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind,args):
    ''' Train on combined training and validation sets, and generate test submission. '''

    nav_graphs = setup(args.action_space, args.navigable_locs_path)

    # Create a batch training environment that will also preprocess text

    use_bert = (args.encoder_type in ['bert','vlbert'])  # for tokenizer and dataloader
    if use_bert:
        tok = BTokenizer(MAX_INPUT_LENGTH)
    else:
        vocab = read_vocab(TRAIN_VOCAB)
        tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH)

    feature_store = Feature(features, args.panoramic)

    if args.encoder_type == 'vlbert':
        if args.pretrain_model_name is not None:
            print("Using the pretrained lm model from %s" %(args.pretrain_model_name))
            encoder = DicEncoder(FEATURE_ALL_SIZE,args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm,args.vl_layers,args.la_layers,args.bert_type)
            premodel = DicAddActionPreTrain.from_pretrained(args.pretrain_model_name)
            encoder.bert = premodel.bert
            encoder.drop = nn.Dropout(p=args.dropout_ratio)
            encoder.bert._resize_token_embeddings(len(tok)) # remember to resize tok embedding size
            encoder.bert.update_lang_bert, encoder.bert.config.update_lang_bert = args.transformer_update, args.transformer_update
            encoder.bert.update_add_layer, encoder.bert.config.update_add_layer = args.update_add_layer, args.update_add_layer
            encoder = encoder.cuda()
        else:
            encoder = DicEncoder(FEATURE_ALL_SIZE,args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm,args.vl_layers,args.la_layers,args.bert_type).cuda()
            encoder.bert._resize_token_embeddings(len(tok)) # remember to resize tok embedding size

    elif args.encoder_type == 'bert':
        if args.pretrain_model_name is not None:
            print("Using the pretrained lm model from %s" %(args.pretrain_model_name))
            encoder = BertEncoder(args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm, args.bert_type)
            premodel = BertForMaskedLM.from_pretrained(args.pretrain_model_name)
            encoder.bert = premodel.bert
            encoder.drop = nn.Dropout(p=args.dropout_ratio)
            encoder.bert._resize_token_embeddings(len(tok)) # remember to resize tok embedding size
            #encoder.bert.update_lang_bert, encoder.bert.config.update_lang_bert = args.transformer_update, args.transformer_update
            #encoder.bert.update_add_layer, encoder.bert.config.update_add_layer = args.update_add_layer, args.update_add_layer
            encoder = encoder.cuda()
        else:
            encoder = BertEncoder(args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm, args.bert_type).cuda()
            encoder.bert._resize_token_embeddings(len(tok))


    ctx_hidden_size = args.enc_hidden_size * (2 if args.bidirectional else 1)
    decoder = R2RAttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(),
                  action_embedding_size, ctx_hidden_size, args.hidden_size, args.dropout_ratio,FEATURE_SIZE, args.panoramic,args.action_space,args.dec_h_type).cuda()


    if args.encoder_path != "":
        encoder.load_state_dict(torch.load(args.encoder_path))
        decoder.load_state_dict(torch.load(args.decoder_path))

    encoder.eval()
    decoder.eval()
    # Generate test submission
    test_env = R2RBatch(feature_store, nav_graphs, args.panoramic, args.action_space,batch_size=args.batch_size, splits=['test'], tokenizer=tok,
                        path_type=path_type, history=history, blind=blind)
    agent = Seq2SeqAgent(test_env, "", encoder, decoder, max_episode_len, path_type=args.path_type,args=args)
    agent.results_path = '%s%s_%s.json' % (RESULT_DIR, "Submit", 'test')
    agent.test(use_dropout=False, feedback='argmax')
    agent.write_results()
Ejemplo n.º 29
0
def create_augment_data():
    setup()

    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)

    # Load features
    feat_dict = read_img_features(features)
    candidate_dict = utils.read_candidates(CANDIDATE_FEATURES)

    # The datasets to be augmented
    print("Start to augment the data")
    aug_envs = []
    # aug_envs.append(
    #     R2RBatch(
    #         feat_dict, candidate_dict, batch_size=args.batchSize, splits=['train'], tokenizer=tok
    #     )
    # )
    # aug_envs.append(
    #     SemiBatch(False, 'tasks/R2R/data/all_paths_46_removetrain.json',
    #         feat_dict, candidate_dict, batch_size=args.batchSize, splits=['train', 'val_seen'], tokenizer=tok)
    # )
    aug_envs.append(
        SemiBatch(False,
                  'tasks/R2R/data/all_paths_46_removevalunseen.json',
                  "unseen",
                  feat_dict,
                  candidate_dict,
                  batch_size=args.batchSize,
                  splits=['val_unseen'],
                  tokenizer=tok))
    aug_envs.append(
        SemiBatch(False,
                  'tasks/R2R/data/all_paths_46_removetest.json',
                  "test",
                  feat_dict,
                  candidate_dict,
                  batch_size=args.batchSize,
                  splits=['test'],
                  tokenizer=tok))
    # aug_envs.append(
    #     R2RBatch(
    #         feat_dict, candidate_dict, batch_size=args.batchSize, splits=['val_seen'], tokenizer=tok
    #     )
    # )
    # aug_envs.append(
    #     R2RBatch(
    #         feat_dict, candidate_dict, batch_size=args.batchSize, splits=['val_unseen'], tokenizer=tok
    #     )
    # )

    for snapshot in os.listdir(os.path.join(log_dir, 'state_dict')):
        # if snapshot != "best_val_unseen_bleu":  # Select a particular snapshot to process. (O/w, it will make for every snapshot)
        if snapshot != "best_val_unseen_bleu":
            continue

        # Create Speaker
        listner = Seq2SeqAgent(aug_envs[0], "", tok, args.maxAction)
        speaker = Speaker(aug_envs[0], listner, tok)

        # Load Weight
        load_iter = speaker.load(os.path.join(log_dir, 'state_dict', snapshot))
        print("Load from iter %d" % (load_iter))

        # Augment the env from aug_envs
        for aug_env in aug_envs:
            speaker.env = aug_env

            # Create the aug data
            import tqdm
            path2inst = speaker.get_insts(beam=args.beam, wrapper=tqdm.tqdm)
            data = []
            for datum in aug_env.fake_data:
                datum = datum.copy()
                path_id = datum['path_id']
                if path_id in path2inst:
                    datum['instructions'] = [
                        tok.decode_sentence(path2inst[path_id])
                    ]
                    datum.pop('instr_encoding')  # Remove Redundant keys
                    datum.pop('instr_id')
                    data.append(datum)

            print("Totally, %d data has been generated for snapshot %s." %
                  (len(data), snapshot))
            print("Average Length %0.4f" % utils.average_length(path2inst))
            print(datum)  # Print a Sample

            # Save the data
            import json
            os.makedirs(os.path.join(log_dir, 'aug_data'), exist_ok=True)
            beam_tag = "_beam" if args.beam else ""
            json.dump(data,
                      open(
                          os.path.join(
                              log_dir, 'aug_data', '%s_%s%s.json' %
                              (snapshot, aug_env.name, beam_tag)), 'w'),
                      sort_keys=True,
                      indent=4,
                      separators=(',', ': '))
Ejemplo n.º 30
0
def train_arbiter(arbiter_env, tok, n_iters, log_every=500, val_envs={}):
    writer = SummaryWriter(log_dir=log_dir)
    listner = Seq2SeqAgent(arbiter_env, "", tok, args.maxAction)
    arbiter = Arbiter(arbiter_env, listner, tok)
    best_f1 = 0.
    best_accu = 0.
    for idx in range(0, n_iters, log_every):
        interval = min(log_every, n_iters - idx)

        # Train for log_every interval
        arbiter.env = arbiter_env
        arbiter.train(interval)  # Train interval iters

        print()
        print("Iter: %d" % idx)

        # Evaluation
        for env_name, env in val_envs.items():
            print("............ Evaluating %s ............." % env_name)
            arbiter.env = env
            if env_name == 'train' or env_name == 'val_unseen':
                path2prob = arbiter.valid(total=500)
            else:  # val_seen need accurate accuracy to evaluate the model performance (for early stopping)
                path2prob = arbiter.valid()
            print("len path2prob", len(path2prob))
            path2answer = env.get_answer()
            print("len path2ans", len(path2answer))
            false_probs = list([
                path2prob[path] for path in path2prob if not path2answer[path]
            ])
            true_positive = len([
                1 for path in path2prob
                if (path2prob[path] >= 0.5 and path2answer[path])
            ])
            false_positive = len([
                1 for path in path2prob
                if (path2prob[path] < 0.5 and path2answer[path])
            ])
            false_negative = len([
                1 for path in path2prob
                if (path2prob[path] >= 0.5 and not path2answer[path])
            ])
            true_negative = len([
                1 for path in path2prob
                if (path2prob[path] < 0.5 and not path2answer[path])
            ])
            true_accu = true_positive / (true_positive + false_positive)
            true_recall = true_positive / max(
                (true_positive + false_negative), 1)
            true_f1 = 2 * (true_accu * true_recall) / max(
                (true_accu + true_recall), 1)
            false_accu = true_negative / (true_negative + false_negative)
            print(
                "tp %d, fp %d, fn %d, tn %d" %
                (true_positive, false_positive, false_negative, true_negative))
            print("All negative", true_negative + false_negative)
            print("All positive", true_positive + false_positive)
            writer.add_scalar("true_accu", true_accu, idx)
            writer.add_scalar("true_recall", true_recall, idx)
            writer.add_scalar("true_f1", true_f1, idx)
            writer.add_scalar("false_accu", false_accu, idx)

            if env_name == 'val_seen':
                if true_f1 > best_f1:
                    best_f1 = true_f1
                    print('Save the model with %s f1 score %0.4f' %
                          (env_name, best_f1))
                    arbiter.save(
                        idx,
                        os.path.join(log_dir, 'state_dict',
                                     'best_%s_f1' % env_name))

                if true_accu > best_accu:
                    best_accu = true_accu
                    print("Save the model with %s true accu %0.4f" %
                          (env_name, best_accu))
                    arbiter.save(
                        idx,
                        os.path.join(log_dir, 'state_dict',
                                     'best_%s_accu' % env_name))

            print("True Accu %0.4f, False Accu %0.4f" %
                  (true_accu, false_accu))
            print("Avg False probs %0.4f" %
                  (sum(false_probs) / len(false_probs)))
            sys.stdout.flush()
Ejemplo n.º 31
0
def train(train_env, tok, n_iters, log_every=2000, val_envs={}, aug_env=None):
    writer = SummaryWriter(log_dir=log_dir)
    listner = Seq2SeqAgent(train_env, "", tok, args.maxAction)

    record_file = open('./logs/' + args.name + '.txt', 'a')
    record_file.write(str(args) + '\n\n')
    record_file.close()

    start_iter = 0
    if args.load is not None:
        if args.aug is None:
            start_iter = listner.load(os.path.join(args.load))
            print("\nLOAD the model from {}, iteration ".format(
                args.load, start_iter))
        else:
            load_iter = listner.load(os.path.join(args.load))
            print("\nLOAD the model from {}, iteration ".format(
                args.load, load_iter))

    start = time.time()
    print('\nListener training starts, start iteration: %s' % str(start_iter))

    best_val = {
        'val_unseen': {
            "spl": 0.,
            "sr": 0.,
            "state": "",
            'update': False
        }
    }

    for idx in range(start_iter, start_iter + n_iters, log_every):
        listner.logs = defaultdict(list)
        interval = min(log_every, n_iters - idx)
        iter = idx + interval

        # Train for log_every interval
        if aug_env is None:
            listner.env = train_env
            listner.train(interval,
                          feedback=feedback_method)  # Train interval iters
        else:
            jdx_length = len(range(interval // 2))
            for jdx in range(interval // 2):
                # Train with GT data
                listner.env = train_env
                args.ml_weight = 0.2
                listner.train(1, feedback=feedback_method)

                # Train with Augmented data
                listner.env = aug_env
                args.ml_weight = 0.2
                listner.train(1, feedback=feedback_method)

                print_progress(jdx,
                               jdx_length,
                               prefix='Progress:',
                               suffix='Complete',
                               bar_length=50)

        # Log the training stats to tensorboard
        total = max(sum(listner.logs['total']), 1)
        length = max(len(listner.logs['critic_loss']), 1)
        critic_loss = sum(listner.logs['critic_loss']) / total
        RL_loss = sum(listner.logs['RL_loss']) / max(
            len(listner.logs['RL_loss']), 1)
        IL_loss = sum(listner.logs['IL_loss']) / max(
            len(listner.logs['IL_loss']), 1)
        entropy = sum(listner.logs['entropy']) / total
        writer.add_scalar("loss/critic", critic_loss, idx)
        writer.add_scalar("policy_entropy", entropy, idx)
        writer.add_scalar("loss/RL_loss", RL_loss, idx)
        writer.add_scalar("loss/IL_loss", IL_loss, idx)
        writer.add_scalar("total_actions", total, idx)
        writer.add_scalar("max_length", length, idx)
        # print("total_actions", total, ", max_length", length)

        # Run validation
        loss_str = "iter {}".format(iter)
        for env_name, (env, evaluator) in val_envs.items():
            listner.env = env

            # Get validation distance from goal under test evaluation conditions
            listner.test(use_dropout=False, feedback='argmax', iters=None)
            result = listner.get_results()
            score_summary, _ = evaluator.score(result)
            loss_str += ", %s " % env_name
            for metric, val in score_summary.items():
                if metric in ['spl']:
                    writer.add_scalar("spl/%s" % env_name, val, idx)
                    if env_name in best_val:
                        if val > best_val[env_name]['spl']:
                            best_val[env_name]['spl'] = val
                            best_val[env_name]['update'] = True
                        elif (val == best_val[env_name]['spl']) and (
                                score_summary['success_rate'] >
                                best_val[env_name]['sr']):
                            best_val[env_name]['spl'] = val
                            best_val[env_name]['update'] = True
                loss_str += ', %s: %.4f' % (metric, val)

        record_file = open('./logs/' + args.name + '.txt', 'a')
        record_file.write(loss_str + '\n')
        record_file.close()

        for env_name in best_val:
            if best_val[env_name]['update']:
                best_val[env_name]['state'] = 'Iter %d %s' % (iter, loss_str)
                best_val[env_name]['update'] = False
                listner.save(
                    idx,
                    os.path.join("snap", args.name, "state_dict",
                                 "best_%s" % (env_name)))
            else:
                listner.save(
                    idx,
                    os.path.join("snap", args.name, "state_dict",
                                 "latest_dict"))

        print(('%s (%d %d%%) %s' % (timeSince(start,
                                              float(iter) / n_iters), iter,
                                    float(iter) / n_iters * 100, loss_str)))

        if iter % 1000 == 0:
            print("BEST RESULT TILL NOW")
            for env_name in best_val:
                print(env_name, best_val[env_name]['state'])

                record_file = open('./logs/' + args.name + '.txt', 'a')
                record_file.write('BEST RESULT TILL NOW: ' + env_name + ' | ' +
                                  best_val[env_name]['state'] + '\n')
                record_file.close()

    listner.save(
        idx,
        os.path.join("snap", args.name, "state_dict", "LAST_iter%d" % (idx)))
Ejemplo n.º 32
0
def meta_filter():
    """
    Train the listener with the augmented data
    """
    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)

    if args.fast_train:
        feat_dict = read_img_features(features_fast)
    else:
        feat_dict = read_img_features(features)
    candidate_dict = utils.read_candidates(CANDIDATE_FEATURES)
    featurized_scans = set(
        [key.split("_")[0] for key in list(feat_dict.keys())])

    # Load the augmentation data
    if args.aug is None:  # If aug is specified, load the "aug"
        speaker_snap_name = "adam_drop6_correctsave"
        print("Loading from %s" % speaker_snap_name)
        aug_path = "snap/speaker/long/%s/aug_data/best_val_unseen_loss.json" % speaker_snap_name
    else:  # Load the path from args
        aug_path = args.aug

    # Create the training environment
    aug_env = R2RBatch(feat_dict,
                       candidate_dict,
                       batch_size=args.batchSize,
                       splits=[aug_path],
                       tokenizer=tok)
    train_env = R2RBatch(feat_dict,
                         candidate_dict,
                         batch_size=args.batchSize,
                         splits=['train@3333'],
                         tokenizer=tok)
    print("The augmented data_size is : %d" % train_env.size())
    stats = train_env.get_statistics()
    print("The average instruction length of the dataset is %0.4f." %
          (stats['length']))
    print("The average action length of the dataset is %0.4f." %
          (stats['path']))

    # Setup the validation data
    val_envs = {
        split:
        (R2RBatch(feat_dict,
                  candidate_dict,
                  batch_size=args.batchSize,
                  splits=[split],
                  tokenizer=tok), Evaluation([split], featurized_scans, tok))
        for split in ['train', 'val_seen', 'val_unseen@133']
    }

    val_env, val_eval = val_envs['val_unseen@133']

    listner = Seq2SeqAgent(train_env, "", tok, args.maxAction)

    def filter_result():
        listner.env = val_env
        val_env.reset_epoch()
        listner.test(use_dropout=False, feedback='argmax')
        result = listner.get_results()
        score_summary, _ = val_eval.score(result)
        for metric, val in score_summary.items():
            if metric in ['success_rate']:
                return val

    listner.load(args.load)
    base_accu = (filter_result())
    print("BASE ACCU %0.4f" % base_accu)

    success = 0

    for data_id, datum in enumerate(aug_env.data):
        # Reload the param of the listener
        listner.load(args.load)
        train_env.reset_epoch(shuffle=True)

        listner.env = train_env

        # Train for the datum
        # iters = train_env.size() // train_env.batch_size
        iters = 10
        for i in range(iters):
            listner.env = train_env
            # train_env.reset(batch=([datum] * (train_env.batch_size // 2)), inject=True)
            train_env.reset(batch=[datum] * train_env.batch_size, inject=True)
            # train_env.reset()
            # train_env.reset()
            listner.train(1, feedback='sample', reset=False)
        # print("Iter %d, result %0.4f" % (i, filter_result()))
        now_accu = filter_result()
        if now_accu > base_accu:
            success += 1
        # print("RESULT %0.4f" % filter_result())
        print('Accu now %0.4f, success / total: %d / %d = %0.4f' %
              (now_accu, success, data_id + 1, success / (data_id + 1)))