Esempio n. 1
0
 def _setup(self, config):
     inject_tuned_hyperparameters(config, config)
     os.chdir(os.path.dirname(os.path.realpath(__file__)))
     print('Trainable got the following config after injection', config)
     self.config = config
     self.device = self.config['device']
     self.exp, self.model, self.train_dataloader, self.eval_dataloader = setup_training(
         self.config)
     self.exp.set_name(config['experiment_name'] + self._experiment_id)
     self.exp_name = config['experiment_name'] + self._experiment_id
     self.exp.send_notification(title='Experiment ' +
                                str(self._experiment_id) + ' ended')
     self.train_data_iter = iter(self.train_dataloader)
     self.model = self.model.to(self.device)
     self.model.train()
     n_params = sum(p.numel() for p in self.model.parameters()
                    if p.requires_grad)
     log_dict = flatten_dict(config)
     log_dict.update({'trainable_params': n_params})
     self.exp.log_parameters(log_dict)
     self.optimizers = get_optimizers(self.model, self.config)
     self.evaluator = Evaluation(self.eval_dataloader, self.config)
     self.num_examples = 0
     self.batch_idx = 0
     self.epoch = 1
     self.ewma = EWMA(beta=0.75)
     self.last_accu = -1.0
     self.max_accu = -1.0
     self.back_prop_every_n_batches = config['training'][
         'back_prop_every_n_batches']
     self.checkpoint_best = config['training']['checkpoint_best']
Esempio n. 2
0
    def __init__(self, env, vocab_size, results_path, batch_size, episode_len=20):
        super(ActorCriticAgent, self).__init__(env, results_path)

        #For evaluation
        self.ev = Evaluation(['train'])

        #For navigation
        self.episode_len = episode_len
        self.losses = []

        ''' Define instruction encoder '''
        word_embedding_size = 256
        hidden_size = 512
        bidirectional = False
        dropout_ratio = 0.5

	enc_hidden_size = hidden_size//2 if bidirectional else hidden_size
	self.encoder = EncoderLSTM(vocab_size, word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda()

        context_size = 1024
        self.hist_encoder = EncoderHistory(len(self.model_actions), 32, 2048, context_size).cuda()
        self.a2c_agent = A2CAgent(enc_hidden_size, context_size, len(self.model_actions) - 2).cuda()
        self.saved_actions = []

        params = list(self.encoder.parameters()) + list(self.hist_encoder.parameters()) + list(self.a2c_agent.parameters())
	self.losses = []
        self.optimizer = torch.optim.Adam(params, lr=0.001, weight_decay=1e-5)
Esempio n. 3
0
def train_val():
    ''' Train on the training set, and validate on seen and unseen splits. '''

    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH)
    train_env = R2RBatch(features,
                         batch_size=batch_size,
                         splits=['train'],
                         tokenizer=tok)

    # Creat validation environments
    val_envs = {
        split: (R2RBatch(features,
                         batch_size=batch_size,
                         splits=[split],
                         tokenizer=tok), Evaluation([split]))
        for split in ['val_seen', 'val_unseen']
    }

    # Build models and train
    enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size
    encoder = EncoderLSTM(len(vocab),
                          word_embedding_size,
                          enc_hidden_size,
                          padding_idx,
                          dropout_ratio,
                          bidirectional=bidirectional).cuda()
    decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(),
                              Seq2SeqAgent.n_outputs(), action_embedding_size,
                              hidden_size, dropout_ratio).cuda()
    train(train_env, encoder, decoder, n_iters, val_envs=val_envs)
Esempio n. 4
0
def normal_training(config):
    device = torch.device(config['device'])
    print('Using device', device)
    exp, model, train_dataloader, eval_dataloader, loss_func = setup_training(
        config)
    exp.set_name(config['experiment_name'])
    model.train()
    model = model.to(device)
    optimizers = get_optimizers(model, config)
    evaluator = Evaluation(eval_dataloader, config)

    num_examples = 0
    for epoch in range(config['training']['training_epochs']):
        for idx, batch in enumerate(train_dataloader):
            batch = (batch[0].to(device), batch[1].to(device))
            num_examples += len(batch[0])
            loss, train_accuracy = training_step(batch, model, optimizers,
                                                 loss_func)
            if idx % config['training']['log_every_n_batches'] == 0:
                print(epoch, num_examples, loss.detach().cpu().numpy())
                exp.log_metric('train_loss',
                               loss.detach().cpu().numpy(),
                               step=num_examples,
                               epoch=epoch)

            if idx % config['training']['eval_every_n_batches'] == 0:
                results = evaluator.eval_model(model, loss_func)
                for metric in results:
                    print(metric, results[metric])
                    exp.log_metric(metric,
                                   results[metric],
                                   step=num_examples,
                                   epoch=epoch)
Esempio n. 5
0
def finetune():
    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)

    if args.fast_train:
        feat_dict = read_img_features(features_fast)
    else:
        feat_dict = read_img_features(features)

    candidate_dict = utils.read_candidates(CANDIDATE_FEATURES)
    featurized_scans = set(
        [key.split("_")[0] for key in list(feat_dict.keys())])

    train_env = R2RBatch(feat_dict,
                         candidate_dict,
                         batch_size=args.batchSize,
                         splits=['train'],
                         tokenizer=tok)
    print("The finetune data_size is : %d\n" % train_env.size())
    val_envs = {
        split:
        (R2RBatch(feat_dict,
                  candidate_dict,
                  batch_size=args.batchSize,
                  splits=[split],
                  tokenizer=tok), Evaluation([split], featurized_scans, tok))
        for split in ['train', 'val_seen', 'val_unseen']
    }

    train(train_env, tok, args.iters, val_envs=val_envs)
def plot_final_scores():
    ''' Plot the scores '''
    font = {'size': 12}
    mpl.rc('font', **font)
    fig, ax = plt.subplots(nrows=1, ncols=1,
                           figsize=(7, 4))  # create figure & 1 axis
    outfiles = [
        RESULT_DIR + 'seq2seq_teacher_imagenet_%s_iter_5000.json',
        RESULT_DIR + 'seq2seq_sample_imagenet_%s_iter_20000.json',
        RESULT_DIR + '%s_stop_agent.json', RESULT_DIR + '%s_random_agent.json'
    ]
    for split in ['val_seen']:
        ev = Evaluation([split])
        for i, outfile in enumerate(outfiles):
            score_summary, scores = ev.score(outfile % split)
            if i == 0:
                method = 'Teacher-forcing'
                ax.hist(scores['nav_errors'],
                        bins=range(0, 30, 3),
                        label=method,
                        normed=True,
                        histtype='step',
                        linewidth=2.5,
                        color='C1')
            elif i == 1:
                method = 'Student-forcing'
                ax.hist(scores['nav_errors'],
                        bins=range(0, 30, 3),
                        label=method,
                        alpha=0.7,
                        normed=True,
                        color='C0')
            elif i == 2:
                method = 'Start locations'
                ax.hist(scores['nav_errors'],
                        bins=range(0, 30, 3),
                        label=method,
                        normed=True,
                        histtype='step',
                        linewidth=2.5,
                        color='C3')
            elif i == 3:
                method = 'Random agent'
                ax.hist(scores['nav_errors'],
                        bins=range(0, 30, 3),
                        label=method,
                        normed=True,
                        histtype='step',
                        linewidth=2.5,
                        color='C2')
    ax.set_title('Val Seen Navigation Error')
    ax.set_xlabel('Error (m)')
    ax.set_ylabel('Frequency')
    ax.set_ylim([0, 0.14])
    ax.set_xlim([0, 30])
    plt.axvline(x=3, color='black', linestyle='--')
    legend = ax.legend(loc='upper right')
    plt.tight_layout()
    plt.savefig('%s/val_seen_error.png' % (PLOT_DIR))
    plt.close(fig)
Esempio n. 7
0
def train_val_augment():
    """
    Train the listener with the augmented data
    """
    setup()

    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)

    # Load the env img features
    feat_dict = read_img_features(features)
    featurized_scans = set(
        [key.split("_")[0] for key in list(feat_dict.keys())])

    # Load the augmentation data
    aug_path = args.aug

    # Create the training environment
    aug_env = R2RBatch(feat_dict,
                       batch_size=args.batchSize,
                       splits=[aug_path],
                       tokenizer=tok,
                       name='aug')

    # import sys
    # sys.exit()
    train_env = R2RBatch(feat_dict,
                         batch_size=args.batchSize,
                         splits=['train'],
                         tokenizer=tok)

    # Printing out the statistics of the dataset
    stats = train_env.get_statistics()
    print("The training data_size is : %d" % train_env.size())
    print("The average instruction length of the dataset is %0.4f." %
          (stats['length']))
    print("The average action length of the dataset is %0.4f." %
          (stats['path']))
    stats = aug_env.get_statistics()
    print("The augmentation data size is %d" % aug_env.size())
    print("The average instruction length of the dataset is %0.4f." %
          (stats['length']))
    print("The average action length of the dataset is %0.4f." %
          (stats['path']))

    # Setup the validation data
    val_envs = {
        split:
        (R2RBatch(feat_dict,
                  batch_size=args.batchSize,
                  splits=[split],
                  tokenizer=tok), Evaluation([split], featurized_scans, tok))
        for split in ['train', 'val_seen', 'val_unseen']
    }

    # Start training
    train(train_env, tok, args.iters, val_envs=val_envs, aug_env=aug_env)
Esempio n. 8
0
def train_all(eval_type, seed, max_episode_len, max_input_length, feedback,
              n_iters, prefix, blind, debug, train_vocab, trainval_vocab,
              batch_size, action_embedding_size, target_embedding_size,
              bidirectional, dropout_ratio, weight_decay, feature_size,
              hidden_size, word_embedding_size, lr, result_dir, snapshot_dir,
              plot_dir, train_splits, test_splits):
    ''' Train on the training set, and validate on the test split. '''

    setup(seed, train_vocab, trainval_vocab)
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(train_vocab if eval_type == 'val' else trainval_vocab)
    tok = Tokenizer(vocab=vocab, encoding_length=max_input_length)
    train_env = R2RBatch(batch_size=batch_size,
                         splits=train_splits,
                         tokenizer=tok,
                         seed=seed,
                         blind=blind)

    # Creat validation environments
    val_envs = {
        split: (R2RBatch(batch_size=batch_size,
                         splits=[split],
                         tokenizer=tok,
                         seed=seed,
                         blind=blind), Evaluation([split], seed=seed))
        for split in test_splits
    }

    # Build models and train
    enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size
    encoder = EncoderLSTM(len(vocab),
                          word_embedding_size,
                          enc_hidden_size,
                          padding_idx,
                          dropout_ratio,
                          bidirectional=bidirectional).cuda()
    decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(),
                              Seq2SeqAgent.n_outputs(), action_embedding_size,
                              hidden_size, dropout_ratio, feature_size).cuda()

    train(eval_type,
          train_env,
          encoder,
          decoder,
          n_iters,
          seed,
          feedback,
          max_episode_len,
          max_input_length,
          prefix,
          blind,
          lr,
          weight_decay,
          result_dir,
          snapshot_dir,
          plot_dir,
          val_envs=val_envs,
          debug=debug)
Esempio n. 9
0
def train_val(test_only=False):
    ''' Train on the training set, and validate on seen and unseen splits. '''
    setup()
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)

    feat_dict = read_img_features(features, test_only=test_only)

    if test_only:
        featurized_scans = None
        val_env_names = ['val_train_seen']
    else:
        featurized_scans = set(
            [key.split("_")[0] for key in list(feat_dict.keys())])
        val_env_names = ['val_train_seen', 'val_seen', 'val_unseen']

    if not args.test_obj:
        print('Loading compact pano-caffe object features ... (~3 seconds)')
        import pickle as pkl
        with open('img_features/objects/pano_object_class.pkl', 'rb') as f_pc:
            pano_caffe = pkl.load(f_pc)
    else:
        pano_caffe = None

    train_env = R2RBatch(feat_dict,
                         pano_caffe,
                         batch_size=args.batchSize,
                         splits=['train'],
                         tokenizer=tok)
    from collections import OrderedDict

    if args.submit:
        val_env_names.append('test')

    val_envs = OrderedDict(((split, (R2RBatch(feat_dict,
                                              pano_caffe,
                                              batch_size=args.batchSize,
                                              splits=[split],
                                              tokenizer=tok),
                                     Evaluation([split], featurized_scans,
                                                tok)))
                            for split in val_env_names))

    if args.train == 'listener':
        train(train_env, tok, args.iters, val_envs=val_envs)
    elif args.train == 'validlistener':
        if args.beam:
            beam_valid(train_env, tok, val_envs=val_envs)
        else:
            valid(train_env, tok, val_envs=val_envs)
    elif args.train == 'speaker':
        train_speaker(train_env, tok, args.iters, val_envs=val_envs)
    elif args.train == 'validspeaker':
        valid_speaker(tok, val_envs)
    else:
        assert False
Esempio n. 10
0
def train_val():
    ''' Train on the training set, and validate on seen and unseen splits. '''
    # args.fast_train = True
    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)

    feat_dict = read_img_features(features)

    featurized_scans = set([key.split("_")[0] for key in list(feat_dict.keys())])

    train_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=['train'], tokenizer=tok)
    from collections import OrderedDict

    val_env_names = ['val_unseen', 'val_seen']
    if args.submit:
        val_env_names.append('test')
    else:
        pass
        #val_env_names.append('train')

    if not args.beam:
        val_env_names.append("train")

    val_envs = OrderedDict(
        ((split,
          (R2RBatch(feat_dict, batch_size=args.batchSize, splits=[split], tokenizer=tok),
           Evaluation([split], featurized_scans, tok))
          )
         for split in val_env_names
         )
    )

    if args.train == 'listener':
        train(train_env, tok, args.iters, val_envs=val_envs)
    elif args.train == 'vae_agent':
        train_vae_agent(train_env, tok, args.iters, val_envs=val_envs)
    elif args.train == 'validlistener':
        if args.beam:
            beam_valid(train_env, tok, val_envs=val_envs)
        else:
            valid(train_env, tok, val_envs=val_envs)
    elif args.train == 'speaker':
        train_speaker(train_env, tok, args.iters, val_envs=val_envs)
    elif args.train == 'validspeaker':
        valid_speaker(train_env, tok, val_envs)
    elif args.train == 'inferspeaker':
        unseen_env = R2RBatch(feat_dict, batch_size=args.batchSize, splits=['tasks/R2R/data/aug_paths_test.json'], tokenizer=None)
        infer_speaker(unseen_env, tok)
    else:
        assert False
Esempio n. 11
0
def train_test(path_type, max_episode_len, history, MAX_INPUT_LENGTH,
               feedback_method, n_iters, model_prefix, blind):
    ''' Train on the training set, and validate on the test split. '''

    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAINVAL_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH)
    train_env = R2RBatch(features,
                         batch_size=batch_size,
                         splits=['train', 'val_seen', 'val_unseen'],
                         tokenizer=tok,
                         path_type=path_type,
                         history=history,
                         blind=blind)

    # Creat validation environments
    val_envs = {
        split: (R2RBatch(features,
                         batch_size=batch_size,
                         splits=[split],
                         tokenizer=tok,
                         path_type=path_type,
                         history=history,
                         blind=blind), Evaluation([split],
                                                  path_type=path_type))
        for split in ['test']
    }

    # Build models and train
    enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size
    encoder = EncoderLSTM(len(vocab),
                          word_embedding_size,
                          enc_hidden_size,
                          padding_idx,
                          dropout_ratio,
                          bidirectional=bidirectional).cuda()
    decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(),
                              Seq2SeqAgent.n_outputs(), action_embedding_size,
                              hidden_size, dropout_ratio).cuda()
    train(train_env,
          encoder,
          decoder,
          n_iters,
          path_type,
          history,
          feedback_method,
          max_episode_len,
          MAX_INPUT_LENGTH,
          model_prefix,
          val_envs=val_envs)
Esempio n. 12
0
def get_scores(output_file, split):
    output_ids = []

    eval = Evaluation([split], 'lstm')
    eval.scores = defaultdict(list)
    instr_ids = set(eval.instr_ids)
    with open(output_file) as f:
        for item in json.load(f):
            if item['instr_id'] in instr_ids:
                output_ids.append(item['instr_id'])
                instr_ids.remove(item['instr_id'])
                eval._score_item(item['instr_id'], item['trajectory'])

    return output_ids, eval.scores
Esempio n. 13
0
def train_val(eval_type, seed, max_episode_len, history, max_input_length,
              feedback_method, n_iters, model_prefix, blind, debug):
    ''' Train on the training set, and validate on seen and unseen splits. '''

    setup(seed)
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=max_input_length)
    train_env = R2RBatch(batch_size=batch_size,
                         splits=['train'],
                         tokenizer=tok,
                         seed=seed,
                         history=history,
                         blind=blind)

    # Creat validation environments
    val_envs = {
        split: (R2RBatch(batch_size=batch_size,
                         splits=[split],
                         tokenizer=tok,
                         seed=seed,
                         history=history,
                         blind=blind), Evaluation([split], seed=seed))
        for split in ['val_seen']
    }

    # Build models and train
    enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size
    encoder = EncoderLSTM(len(vocab),
                          word_embedding_size,
                          enc_hidden_size,
                          padding_idx,
                          dropout_ratio,
                          bidirectional=bidirectional).cuda()
    decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(),
                              Seq2SeqAgent.n_outputs(), action_embedding_size,
                              hidden_size, dropout_ratio, feature_size).cuda()
    train(eval_type,
          train_env,
          encoder,
          decoder,
          n_iters,
          seed,
          history,
          feedback_method,
          max_episode_len,
          max_input_length,
          model_prefix,
          val_envs=val_envs,
          debug=debug)
Esempio n. 14
0
def train_val_augment(test_only=False):
    """
    Train the listener with the augmented data
    """
    setup()

    # Create a batch training environment that will also preprocess text
    tok_bert = get_tokenizer(args)

    # Load the env img features
    feat_dict = read_img_features(features, test_only=test_only)

    if test_only:
        featurized_scans = None
        val_env_names = ['val_train_seen']
    else:
        featurized_scans = set(
            [key.split("_")[0] for key in list(feat_dict.keys())])
        val_env_names = ['val_train_seen', 'val_seen', 'val_unseen']

    # Load the augmentation data
    aug_path = args.aug
    # Create the training environment
    train_env = R2RBatch(feat_dict,
                         batch_size=args.batchSize,
                         splits=['train'],
                         tokenizer=tok_bert)
    aug_env = R2RBatch(feat_dict,
                       batch_size=args.batchSize,
                       splits=[aug_path],
                       tokenizer=tok_bert,
                       name='aug')

    # Setup the validation data
    val_envs = {
        split: (R2RBatch(feat_dict,
                         batch_size=args.batchSize,
                         splits=[split],
                         tokenizer=tok_bert),
                Evaluation([split], featurized_scans, tok_bert))
        for split in val_env_names
    }

    # Start training
    train(train_env, tok_bert, args.iters, val_envs=val_envs, aug_env=aug_env)
Esempio n. 15
0
def train_val():
    ''' Train on the training set, and validate on seen and unseen splits. '''
    # args.fast_train = True
    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)

    if args.fast_train:
        feat_dict = read_img_features(features_fast)
    else:
        feat_dict = read_img_features(features)

    candidate_dict = utils.read_candidates(CANDIDATE_FEATURES)
    featurized_scans = set(
        [key.split("_")[0] for key in list(feat_dict.keys())])

    train_env = R2RBatch(feat_dict,
                         candidate_dict,
                         batch_size=args.batchSize,
                         splits=['train'],
                         tokenizer=tok)
    from collections import OrderedDict
    val_envs = OrderedDict(((split, (R2RBatch(feat_dict,
                                              candidate_dict,
                                              batch_size=args.batchSize,
                                              splits=[split],
                                              tokenizer=tok),
                                     Evaluation([split], featurized_scans,
                                                tok)))
                            for split in ['val_seen', 'val_unseen', 'train']))

    if args.train == 'listener':
        train(train_env, tok, args.iters, val_envs=val_envs)
    elif args.train == 'validlistener':
        valid(train_env, tok, args.iters, val_envs=val_envs)
    elif args.train == 'speaker':
        train_speaker(train_env, tok, args.iters, val_envs=val_envs)
    elif args.train == 'validspeaker':
        valid_speaker(tok, val_envs)
    else:
        assert False
Esempio n. 16
0
def train_val(test_only=False):
    ''' Train on the training set, and validate on seen and unseen splits. '''
    setup()
    tok = get_tokenizer(args)

    feat_dict = read_img_features(features, test_only=test_only)

    if test_only:
        featurized_scans = None
        val_env_names = ['val_train_seen']
    else:
        featurized_scans = set(
            [key.split("_")[0] for key in list(feat_dict.keys())])
        val_env_names = ['val_train_seen', 'val_seen', 'val_unseen']

    train_env = R2RBatch(feat_dict,
                         batch_size=args.batchSize,
                         splits=['train'],
                         tokenizer=tok)
    from collections import OrderedDict

    if args.submit:
        val_env_names.append('test')
    else:
        pass

    val_envs = OrderedDict(((split, (R2RBatch(feat_dict,
                                              batch_size=args.batchSize,
                                              splits=[split],
                                              tokenizer=tok),
                                     Evaluation([split], featurized_scans,
                                                tok)))
                            for split in val_env_names))

    if args.train == 'listener':
        train(train_env, tok, args.iters, val_envs=val_envs)
    elif args.train == 'validlistener':
        valid(train_env, tok, val_envs=val_envs)
    else:
        assert False
Esempio n. 17
0
def train_val():
    ''' Train on the training set, and validate on seen and unseen splits. '''
    # args.fast_train = True
    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(train_vocab)
    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)

    feat_dict = read_img_features(features)

    # load object feature
    obj_s_feat = None
    if args.sparseObj:
        obj_s_feat = utils.read_obj_sparse_features(sparse_obj_feat,
                                                    args.objthr)

    obj_d_feat = None
    if args.denseObj:
        obj_d_feat = utils.read_obj_dense_features(dense_obj_feat1,
                                                   dense_obj_feat2, bbox,
                                                   sparse_obj_feat,
                                                   args.objthr)

    featurized_scans = set(
        [key.split("_")[0] for key in list(feat_dict.keys())])

    train_env = R2RBatch(feat_dict,
                         obj_d_feat=obj_d_feat,
                         obj_s_feat=obj_s_feat,
                         batch_size=args.batchSize,
                         splits=['train'],
                         tokenizer=tok)

    val_env_names = ['val_unseen', 'val_seen']
    if args.submit:
        val_env_names.append('test')
    else:
        pass
        #val_env_names.append('train')

    if not args.beam:
        val_env_names.append("train")

    val_envs = OrderedDict(((split, (R2RBatch(feat_dict,
                                              obj_d_feat=obj_d_feat,
                                              obj_s_feat=obj_s_feat,
                                              batch_size=args.batchSize,
                                              splits=[split],
                                              tokenizer=tok),
                                     Evaluation([split], featurized_scans,
                                                tok)))
                            for split in val_env_names))

    if args.train == 'listener':
        train(train_env, tok, args.iters, val_envs=val_envs)
    elif args.train == 'validlistener':
        if args.beam:
            beam_valid(train_env, tok, val_envs=val_envs)
        else:
            valid(train_env, tok, val_envs=val_envs)
    elif args.train == 'speaker':
        train_speaker(train_env, tok, args.iters, val_envs=val_envs)
    elif args.train == 'validspeaker':
        valid_speaker(tok, val_envs)
    else:
        assert False
Esempio n. 18
0
from eval import Evaluation
# import nltk
# emb_path='D:\\IOM\\word2vec\\GoogleNews-vectors-negative300.bin'
# import jieba
emb_path = 'D:\\IOM\\word2vec\\merge_sgns_bigram_char300.bin'
from gensim.models import KeyedVectors
wv_from_bin = KeyedVectors.load_word2vec_format(emb_path, binary=True)
eval_class = Evaluation('', wv_from_bin)

sep2 = '*#*'
sep1 = '|||'


def cut_triples(line):
    global notriple
    line = line.strip()
    triples = []
    for triple_str in line.split(sep2):
        triple_es = triple_str.split(sep1)
        # #没有三元组的修正
        # if len(triple_es)>3:
        #     return []
        triples.append(triple_es)
    return triples


# pres=['Twitter.100w.test.att','Twitter.100w.test.attbeam.num','Twitter.100w.test.nmt']
# key='Twitter.100w.test.key'
# key_path='D:\\ieee\\code\\idef\\ex\\2\\new\\'

pres = [
Esempio n. 19
0
def main(opts):
    # set manual_seed and build vocab
    print(opts, flush=True)

    setup(opts, opts.seed)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    print(f"Usando {device} :)")

    # create a batch training environment that will also preprocess text
    vocab = read_vocab(opts.train_vocab)
    tok = Tokenizer(opts.remove_punctuation == 1, opts.reversed == 1, vocab=vocab, encoding_length=opts.max_cap_length)

    # create language instruction encoder
    encoder_kwargs = {
        'opts': opts,
        'vocab_size': len(vocab),
        'embedding_size': opts.word_embedding_size,
        'hidden_size': opts.rnn_hidden_size,
        'padding_idx': padding_idx,
        'dropout_ratio': opts.rnn_dropout,
        'bidirectional': opts.bidirectional == 1,
        'num_layers': opts.rnn_num_layers
    }
    print('Using {} as encoder ...'.format(opts.lang_embed))
    if 'lstm' in opts.lang_embed:
        encoder = EncoderRNN(**encoder_kwargs)
    else:
        raise ValueError('Unknown {} language embedding'.format(opts.lang_embed))
    print(encoder)

    # create policy model
    policy_model_kwargs = {
        'opts':opts,
        'img_fc_dim': opts.img_fc_dim,
        'img_fc_use_batchnorm': opts.img_fc_use_batchnorm == 1,
        'img_dropout': opts.img_dropout,
        'img_feat_input_dim': opts.img_feat_input_dim,
        'rnn_hidden_size': opts.rnn_hidden_size,
        'rnn_dropout': opts.rnn_dropout,
        'max_len': opts.max_cap_length,
        'max_navigable': opts.max_navigable
    }

    if opts.arch == 'regretful':
        model = Regretful(**policy_model_kwargs)
    elif opts.arch == 'self-monitoring':
        model = SelfMonitoring(**policy_model_kwargs)
    elif opts.arch == 'speaker-baseline':
        model = SpeakerFollowerBaseline(**policy_model_kwargs)
    else:
        raise ValueError('Unknown {} model for seq2seq agent'.format(opts.arch))
    print(model)

    encoder = encoder.to(device)
    model = model.to(device)

    params = list(encoder.parameters()) + list(model.parameters())
    optimizer = torch.optim.Adam(params, lr=opts.learning_rate)

    # optionally resume from a checkpoint
    if opts.resume:
        model, encoder, optimizer, best_success_rate = resume_training(opts, model, encoder, optimizer)

    # if a secondary exp name is specified, this is useful when resuming from a previous saved
    # experiment and save to another experiment, e.g., pre-trained on synthetic data and fine-tune on real data
    if opts.exp_name_secondary:
        opts.exp_name += opts.exp_name_secondary

    feature, img_spec = load_features(opts.img_feat_dir, opts.blind)

    if opts.test_submission:
        assert opts.resume, 'The model was not resumed before running for submission.'
        test_env = ('test', (R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size,
                                 splits=['test'], tokenizer=tok), Evaluation(['test'], opts)))
        agent_kwargs = {
            'opts': opts,
            'env': test_env[1][0],
            'results_path': "",
            'encoder': encoder,
            'model': model,
            'feedback': opts.feedback
        }
        agent = PanoSeq2SeqAgent(**agent_kwargs)
        # setup trainer
        trainer = PanoSeq2SeqTrainer(opts, agent, optimizer)
        epoch = opts.start_epoch - 1
        trainer.eval(epoch, test_env)
        return

    # set up R2R environments
    if not opts.train_data_augmentation:
        train_env = R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed,
                                 splits=['train'], tokenizer=tok)
    else:
        train_env = R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed,
                                 splits=['synthetic'], tokenizer=tok)

    val_craft_splits = ['craft_seen', 'craft_unseen']
    val_splits = ['val_seen', 'val_unseen']
    if opts.craft_eval:
        val_splits += val_craft_splits
    val_envs = {split: (R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size,
                                     splits=[split], tokenizer=tok), Evaluation([split], opts))
                for split in val_splits}
    # create agent
    agent_kwargs = {
        'opts': opts,
        'env': train_env,
        'results_path': "",
        'encoder': encoder,
        'model': model,
        'feedback': opts.feedback
    }
    agent = PanoSeq2SeqAgent(**agent_kwargs)

    # setup trainer
    trainer = PanoSeq2SeqTrainer(opts, agent, optimizer, opts.train_iters_epoch)

    if opts.eval_only:
        success_rate = []
        for val_env in val_envs.items():
            success_rate.append(trainer.eval(opts.start_epoch - 1, val_env, tb_logger=None))
        return

    # set up tensorboard logger
    tb_logger = set_tb_logger(opts.log_dir, opts.exp_name, opts.resume)
    sys.stdout.flush()
    best_success_rate = best_success_rate if opts.resume else 0.0
    for epoch in range(opts.start_epoch, opts.max_num_epochs + 1):
        trainer.train(epoch, train_env, tb_logger)

        if epoch % opts.eval_every_epochs == 0:
            success_rate = []
            for val_env in val_envs.items():
                success_rate.append(trainer.eval(epoch, val_env, tb_logger))

            success_rate_compare = success_rate[1]

            if is_experiment():
                # remember best val_seen success rate and save checkpoint
                is_best = success_rate_compare >= best_success_rate
                best_success_rate = max(success_rate_compare, best_success_rate)
                print("--> Highest val_unseen success rate: {}".format(best_success_rate))
                sys.stdout.flush()

                # save the model if it is the best so far
                save_checkpoint({
                    'opts': opts,
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'encoder_state_dict': encoder.state_dict(),
                    'best_success_rate': best_success_rate,
                    'optimizer': optimizer.state_dict(),
                    'max_episode_len': opts.max_episode_len,
                }, is_best, checkpoint_dir=opts.checkpoint_dir, name=opts.exp_name)

        if opts.train_data_augmentation and epoch == opts.epochs_data_augmentation:
            train_env = R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed,
                                     splits=['train'], tokenizer=tok)

    print("--> Finished training")
Esempio n. 20
0
def test(cfg, dataLoader, model, models_info=None, models_vtx=None):
    model.eval()
    if cfg.pytorch.exp_mode == 'val':
        from eval import Evaluation
        Eval = Evaluation(cfg.pytorch, models_info, models_vtx)
    elif cfg.pytorch.exp_mode == 'test':
        csv_file = open(cfg.pytorch.save_csv_path, 'w')
        fieldnames = ['scene_id', 'im_id', 'obj_id', 'score', 'R', 't', 'time']
        csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        csv_writer.writeheader()
        rst_collect = []

    preds = {}
    nIters = len(dataLoader)
    bar = Bar('{}_{}'.format(cfg.pytorch.dataset, cfg.pytorch.object),
              max=nIters)
    wall_time = 0
    for i, (input, pose, bbox, center, size, clsIdx, imgPath, scene_id,
            image_id, score) in enumerate(dataLoader):
        input_var = input.cuda(cfg.pytorch.gpu,
                               async=True).float().cuda(cfg.pytorch.gpu)
        batch_size = len(input)
        # time begin
        T_begin = time.time()
        output_conf, output_coor_x, output_coor_y, output_coor_z = model(
            input_var)
        output_coor_x = output_coor_x.data.cpu().numpy().copy()
        output_coor_y = output_coor_y.data.cpu().numpy().copy()
        output_coor_z = output_coor_z.data.cpu().numpy().copy()
        outConf = output_conf.data.cpu().numpy().copy()
        output_trans = np.zeros(batch_size)
        collector = list(
            zip(clsIdx.numpy(), output_coor_x, output_coor_y,
                output_coor_z, outConf, pose.numpy(), bbox.numpy(),
                center.numpy(), size.numpy(), input.numpy(), scene_id.numpy(),
                image_id.numpy(), score.numpy()))
        colLen = len(collector)
        for idx in range(colLen):
            clsIdx_, output_coor_x_, output_coor_y_, output_coor_z_, output_conf_, pose_gt, bbox_, center_, size_, input_, scene_id_, image_id_, score_ = collector[
                idx]
            if cfg.pytorch.dataset.lower() == 'lmo':
                cls = ref.lmo_id2obj[int(clsIdx_)]
            elif cfg.pytorch.dataset.lower() == 'tless':
                cls = ref.tless_id2obj[int(clsIdx_)]
            elif cfg.pytorch.dataset.lower() == 'ycbv':
                cls = ref.ycbv_id2obj[int(clsIdx_)]
            elif cfg.pytorch.dataset.lower() == 'tudl':
                cls = ref.tudl_id2obj[int(clsIdx_)]
            elif cfg.pytorch.dataset.lower() == 'hb':
                cls = ref.hb_id2obj[int(clsIdx_)]
            elif cfg.pytorch.dataset.lower() == 'icbin':
                cls = ref.icbin_id2obj[clsIdx_]
            elif cfg.pytorch.dataset.lower() == 'itodd':
                cls = ref.itodd_id2obj[int(clsIdx_)]

            select_pts_2d = []
            select_pts_3d = []
            center_h = center_[0]
            center_w = center_[1]
            size_ = int(size_)
            output_coor_x_ = output_coor_x_.squeeze()
            output_coor_y_ = output_coor_y_.squeeze()
            output_coor_z_ = output_coor_z_.squeeze()
            output_coor_ = np.stack([
                np.argmax(output_coor_x_, axis=0),
                np.argmax(output_coor_y_, axis=0),
                np.argmax(output_coor_z_, axis=0)
            ],
                                    axis=2)
            output_coor_[output_coor_ == cfg.network.coor_bin] = 0
            output_coor_ = 2.0 * output_coor_ / float(cfg.network.coor_bin -
                                                      1) - 1.0
            output_coor_[:, :, 0] = output_coor_[:, :, 0] * abs(
                models_info[clsIdx_]['min_x'])
            output_coor_[:, :, 1] = output_coor_[:, :, 1] * abs(
                models_info[clsIdx_]['min_y'])
            output_coor_[:, :, 2] = output_coor_[:, :, 2] * abs(
                models_info[clsIdx_]['min_z'])
            output_conf_ = np.argmax(output_conf_, axis=0)
            output_conf_ = (output_conf_ - output_conf_.min()) / (
                output_conf_.max() - output_conf_.min())
            min_x = 0.001 * abs(models_info[clsIdx_]['min_x'])
            min_y = 0.001 * abs(models_info[clsIdx_]['min_y'])
            min_z = 0.001 * abs(models_info[clsIdx_]['min_z'])
            w_begin = center_w - size_ / 2.
            h_begin = center_h - size_ / 2.
            w_unit = size_ * 1.0 / cfg.dataiter.rot_output_res
            h_unit = size_ * 1.0 / cfg.dataiter.rot_output_res
            output_conf_ = output_conf_.tolist()
            output_coor_ = output_coor_.tolist()
            for x in range(cfg.dataiter.rot_output_res):
                for y in range(cfg.dataiter.rot_output_res):
                    if output_conf_[x][y] < cfg.test.mask_threshold:
                        continue
                    if abs(output_coor_[x][y][0]) < min_x  and abs(output_coor_[x][y][1]) < min_y  and \
                        abs(output_coor_[x][y][2]) < min_z:
                        continue
                    select_pts_2d.append(
                        [w_begin + y * w_unit, h_begin + x * h_unit])
                    select_pts_3d.append(output_coor_[x][y])
            model_points = np.asarray(select_pts_3d, dtype=np.float32)
            image_points = np.asarray(select_pts_2d, dtype=np.float32)
            try:
                _, R_vector, T_vector, inliers = cv2.solvePnPRansac(
                    model_points,
                    image_points,
                    cfg.pytorch.camera_matrix,
                    np.zeros((4, 1)),
                    flags=cv2.SOLVEPNP_EPNP)
                cur_wall_time = time.time() - T_begin
                wall_time += cur_wall_time
                R_matrix = cv2.Rodrigues(R_vector, jacobian=0)[0]
                if R_matrix[0, 0] == 1.0:
                    continue
                if cfg.pytorch.exp_mode == 'val':
                    pose_est = np.concatenate(
                        (R_matrix, np.asarray(T_vector).reshape(3, 1)), axis=1)
                    Eval.pose_est_all[cls].append(pose_est)
                    Eval.pose_gt_all[cls].append(pose_gt)
                    Eval.num[cls] += 1
                    Eval.numAll += 1
                elif cfg.pytorch.exp_mode == 'test':
                    rst = {
                        'scene_id': int(scene_id_),
                        'im_id': int(image_id_),
                        'R': R_matrix.reshape(-1).tolist(),
                        't': T_vector.reshape(-1).tolist(),
                        'score': float(score_),
                        'obj_id': int(clsIdx),
                        'time': cur_wall_time
                    }
                    rst_collect.append(rst)
            except:
                if cfg.pytorch.exp_mode == 'val':
                    Eval.num[cls] += 1
                    Eval.numAll += 1
        Bar.suffix = '{0} [{1}/{2}]| Total: {total:} | ETA: {eta:}'.format(
            cfg.pytorch.exp_mode,
            i,
            nIters,
            total=bar.elapsed_td,
            eta=bar.eta_td)
        bar.next()
    if cfg.pytorch.exp_mode == 'val':
        Eval.evaluate_pose()
    elif cfg.pytorch.exp_mode == 'test':
        for item in rst_collect:
            csv_writer.writerow(item)
        csv_file.close()
    print("Wall time of object {}: total {} seconds for {} samples".format(
        cfg.pytorch.object, wall_time, nIters))
    bar.finish()
Esempio n. 21
0
def test():
    print('current directory', os.getcwd())
    os.chdir('..')
    print('current directory', os.getcwd())

    visible_gpu = "0"
    os.environ["CUDA_VISIBLE_DEVICES"] = visible_gpu

    args.name = 'SSM'
    args.attn = 'soft'
    args.train = 'listener'
    args.featdropout = 0.3
    args.angle_feat_size = 128
    args.feedback = 'sample'
    args.ml_weight = 0.2
    args.sub_out = 'max'
    args.dropout = 0.5
    args.optim = 'adam'
    args.lr = 3e-4
    args.iters = 80000
    args.maxAction = 35
    args.batchSize = 24
    args.target_batch_size = 24

    args.self_train = True
    args.aug = 'tasks/R2R/data/aug_paths.json'

    args.speaker = 'snap/speaker/state_dict/best_val_unseen_bleu'

    args.featdropout = 0.4
    args.iters = 200000

    if args.optim == 'rms':
        print("Optimizer: Using RMSProp")
        args.optimizer = torch.optim.RMSprop
    elif args.optim == 'adam':
        print("Optimizer: Using Adam")
        args.optimizer = torch.optim.Adam
    elif args.optim == 'sgd':
        print("Optimizer: sgd")
        args.optimizer = torch.optim.SGD

    log_dir = 'snap/%s' % args.name
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)

    logdir = '%s/eval' % log_dir
    writer = SummaryWriter(logdir=logdir)

    TRAIN_VOCAB = 'tasks/R2R/data/train_vocab.txt'
    TRAINVAL_VOCAB = 'tasks/R2R/data/trainval_vocab.txt'

    IMAGENET_FEATURES = 'img_features/ResNet-152-imagenet.tsv'

    if args.features == 'imagenet':
        features = IMAGENET_FEATURES

    if args.fast_train:
        name, ext = os.path.splitext(features)
        features = name + "-fast" + ext

    print(args)

    def setup():
        torch.manual_seed(1)
        torch.cuda.manual_seed(1)
        # Check for vocabs
        if not os.path.exists(TRAIN_VOCAB):
            write_vocab(build_vocab(splits=['train']), TRAIN_VOCAB)
        if not os.path.exists(TRAINVAL_VOCAB):
            write_vocab(
                build_vocab(splits=['train', 'val_seen', 'val_unseen']),
                TRAINVAL_VOCAB)

    #
    setup()

    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)

    feat_dict = read_img_features(features)

    print('start extract keys...')
    featurized_scans = set(
        [key.split("_")[0] for key in list(feat_dict.keys())])
    print('keys extracted...')

    val_envs = {
        split: R2RBatch(feat_dict,
                        batch_size=args.batchSize,
                        splits=[split],
                        tokenizer=tok)
        for split in ['train', 'val_seen', 'val_unseen']
    }

    evaluators = {
        split: Evaluation([split], featurized_scans, tok)
        for split in ['train', 'val_seen', 'val_unseen']
    }

    learner = Learner(val_envs,
                      "",
                      tok,
                      args.maxAction,
                      process_num=2,
                      visible_gpu=visible_gpu)
    learner.eval_init()

    for i in range(0, 10000):
        ckpt = '%s/state_dict/Iter_%06d' % (log_dir, (i + 1) * 100)
        while not os.path.exists(ckpt):
            time.sleep(10)

        time.sleep(10)

        learner.load_eval(ckpt)

        results = learner.eval()
        loss_str = ''
        for key in results:
            evaluator = evaluators[key]
            result = results[key]

            score_summary, _ = evaluator.score(result)

            loss_str += ", %s \n" % key

            for metric, val in score_summary.items():
                loss_str += ', %s: %.3f' % (metric, val)
                writer.add_scalar('%s/%s' % (metric, key), val, (i + 1) * 100)

            loss_str += '\n'

        print(loss_str)
Esempio n. 22
0
def train_val(seed=None):
    ''' Train on the training set, and validate on seen and unseen splits. '''

    # which GPU to use
    device = torch.device('cuda', hparams.device_id)

    # Resume from lastest checkpoint (if any)
    if os.path.exists(hparams.load_path):
        print('Load model from %s' % hparams.load_path)
        ckpt = load(hparams.load_path, device)
        start_iter = ckpt['iter']
    else:
        if hasattr(args, 'load_path') and hasattr(args, 'eval_only') and args.eval_only:
            sys.exit('load_path %s does not exist!' % hparams.load_path)
        ckpt = None
        start_iter = 0
    end_iter = hparams.n_iters

    # Setup seed and read vocab
    setup(seed=seed)

    train_vocab_path = os.path.join(hparams.data_path, 'train_vocab.txt')
    if hasattr(hparams, 'external_main_vocab') and hparams.external_main_vocab:
        train_vocab_path = hparams.external_main_vocab

    if 'verbal' in hparams.advisor:
        subgoal_vocab_path = os.path.join(hparams.data_path, hparams.subgoal_vocab)
        vocab = read_vocab([train_vocab_path, subgoal_vocab_path])
    else:
        vocab = read_vocab([train_vocab_path])
    tok = Tokenizer(vocab=vocab, encoding_length=hparams.max_input_length)

    # Create a training environment
    train_env = VNLABatch(hparams, split='train', tokenizer=tok)

    # Create validation environments
    val_splits = ['val_seen', 'val_unseen']
    eval_mode = hasattr(hparams, 'eval_only') and hparams.eval_only
    if eval_mode:
        if '_unseen' in hparams.load_path:
            val_splits = ['test_unseen']
        if '_seen' in hparams.load_path:
            val_splits = ['test_seen']
        end_iter = start_iter + hparams.log_every

    val_envs = { split: (VNLABatch(hparams, split=split, tokenizer=tok,
        from_train_env=train_env, traj_len_estimates=train_env.traj_len_estimates),
        Evaluation(hparams, [split], hparams.data_path)) for split in val_splits}

    # Build models
    model = AttentionSeq2SeqModel(len(vocab), hparams, device).to(device)

    optimizer = optim.Adam(model.parameters(), lr=hparams.lr,
        weight_decay=hparams.weight_decay)

    best_metrics = { 'val_seen'  : -1,
                     'val_unseen': -1,
                     'combined'  : -1 }

    # Load model parameters from a checkpoint (if any)
    if ckpt is not None:
        model.load_state_dict(ckpt['model_state_dict'])
        optimizer.load_state_dict(ckpt['optim_state_dict'])
        best_metrics = ckpt['best_metrics']
        train_env.ix = ckpt['data_idx']

    print('')
    pprint(vars(hparams), width=1)
    print('')
    print(model)

    # Initialize agent
    if 'verbal' in hparams.advisor:
        agent = VerbalAskAgent(model, hparams, device)
    elif hparams.advisor == 'direct':
        agent = AskAgent(model, hparams, device)

    # Train
    return train(train_env, val_envs, agent, model, optimizer, start_iter, end_iter,
          best_metrics, eval_mode)
Esempio n. 23
0
image_features_list = ImageFeatures.from_args(args)
vocab = read_vocab(TRAIN_VOCAB, args.language)
tok = Tokenizer(vocab)
env = R2RBatch(image_features_list,
               batch_size=256,
               splits=['train', 'val_seen', 'val_unseen'],
               tokenizer=tok)
env.batch = env.data

from eval import Evaluation

test_envs = {
    split: (R2RBatch(image_features_list,
                     batch_size=64,
                     splits=[split],
                     tokenizer=tok), Evaluation([split]))
    for split in ['val_unseen']
}

agent = make_follower(args, vocab)


def average(_l):
    return float(sum(_l)) / len(_l)


def load_data(filenames):
    all_data = []
    for fn in filenames:
        with open(fn, 'r') as f:
            train_file = json.loads(f.read())
Esempio n. 24
0
def train_val():
    ''' Train on the training set, and validate on seen and unseen splits. '''
    # args.fast_train = True
    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)

    feat_dict = read_img_features(features)

    featurized_scans = set(
        [key.split("_")[0] for key in list(feat_dict.keys())])

    if not args.test_obj:
        print('Loading compact pano-caffe object features ... (~3 seconds)')
        import pickle as pkl
        with open(
                '/egr/research-hlr/joslin/Matterdata/v1/scans/img_features/pano_object_class.pkl',
                'rb') as f_pc:
            pano_caffe = pkl.load(f_pc)
    else:
        pano_caffe = None

    train_env = R2RBatch(feat_dict,
                         pano_caffe,
                         batch_size=args.batchSize,
                         splits=['train'],
                         tokenizer=tok)
    from collections import OrderedDict

    val_env_names = ['val_unseen', 'val_seen']
    if args.submit:
        val_env_names.append('test')
    else:
        pass
        # if you want to test "train", just uncomment this
        #val_env_names.append('train')

    if not args.beam:
        val_env_names.append("train")

    val_envs = OrderedDict(((split, (R2RBatch(feat_dict,
                                              pano_caffe,
                                              batch_size=args.batchSize,
                                              splits=[split],
                                              tokenizer=tok),
                                     Evaluation([split], featurized_scans,
                                                tok)))
                            for split in val_env_names))

    # import sys
    # sys.exit()
    if args.train == 'listener':
        train(train_env, tok, args.iters, val_envs=val_envs)
    elif args.train == 'validlistener':
        if args.beam:
            beam_valid(train_env, tok, val_envs=val_envs)
        else:
            valid(train_env, tok, val_envs=val_envs)
    elif args.train == 'speaker':
        train_speaker(train_env, tok, args.iters, val_envs=val_envs)
    elif args.train == 'validspeaker':
        valid_speaker(tok, val_envs)
    else:
        assert False
Esempio n. 25
0
def train_val(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind, args):
    ''' Train on the training set, and validate on seen and unseen splits. '''

    nav_graphs = setup(args.action_space, args.navigable_locs_path)
    # Create a batch training environment that will also preprocess text
    use_bert = (args.encoder_type in ['bert','vlbert'])  # for tokenizer and dataloader
    if use_bert:
        tok = BTokenizer(MAX_INPUT_LENGTH)
    else:
        vocab = read_vocab(TRAIN_VOCAB)
        tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH)
    #train_env = R2RBatch(features, batch_size=batch_size, splits=['train'], tokenizer=tok,
    #                     path_type=path_type, history=history, blind=blind)

    feature_store = Feature(features, args.panoramic)
    train_env = R2RBatch(feature_store, nav_graphs, args.panoramic,args.action_space,batch_size=args.batch_size, splits=['train'], tokenizer=tok,
                         path_type=path_type, history=history, blind=blind)

    # Creat validation environments
    #val_envs = {split: (R2RBatch(features, batch_size=batch_size, splits=[split],
    #            tokenizer=tok, path_type=path_type, history=history, blind=blind),
    #            Evaluation([split], path_type=path_type)) for split in ['val_seen', 'val_unseen']}

    val_envs = {split: (R2RBatch(feature_store,nav_graphs, args.panoramic, args.action_space,batch_size=args.batch_size, splits=[split],
                tokenizer=tok, path_type=path_type, history=history, blind=blind),
                Evaluation([split], path_type=path_type)) for split in ['val_seen','val_unseen']}

    # Build models and train
    #enc_hidden_size = hidden_size//2 if bidirectional else hidden_size

    if args.encoder_type == 'vlbert':
        if args.pretrain_model_name is not None:
            print("Using the pretrained lm model from %s" %(args.pretrain_model_name))
            encoder = DicEncoder(FEATURE_ALL_SIZE,args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm,args.vl_layers,args.la_layers,args.bert_type)
            premodel = DicAddActionPreTrain.from_pretrained(args.pretrain_model_name)
            encoder.bert = premodel.bert
            encoder.drop = nn.Dropout(p=args.dropout_ratio)
            encoder.bert._resize_token_embeddings(len(tok)) # remember to resize tok embedding size
            encoder.bert.update_lang_bert, encoder.bert.config.update_lang_bert = args.transformer_update, args.transformer_update
            encoder.bert.update_add_layer, encoder.bert.config.update_add_layer = args.update_add_layer, args.update_add_layer
            encoder = encoder.cuda()

        else:
            encoder = DicEncoder(FEATURE_ALL_SIZE,args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm,args.vl_layers,args.la_layers,args.bert_type).cuda()
            encoder.bert._resize_token_embeddings(len(tok)) # remember to resize tok embedding size

    elif args.encoder_type == 'bert':
        if args.pretrain_model_name is not None:
            print("Using the pretrained lm model from %s" %(args.pretrain_model_name))
            encoder = BertEncoder(args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm, args.bert_type)
            premodel = BertForMaskedLM.from_pretrained(args.pretrain_model_name)
            encoder.bert = premodel.bert
            encoder.drop = nn.Dropout(p=args.dropout_ratio)
            encoder.bert._resize_token_embeddings(len(tok)) # remember to resize tok embedding size
            #encoder.bert.update_lang_bert, encoder.bert.config.update_lang_bert = args.transformer_update, args.transformer_update
            #encoder.bert.update_add_layer, encoder.bert.config.update_add_layer = args.update_add_layer, args.update_add_layer
            encoder = encoder.cuda()
        else:
            encoder = BertEncoder(args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm, args.bert_type).cuda()
            encoder.bert._resize_token_embeddings(len(tok))
    else:
        enc_hidden_size = hidden_size//2 if bidirectional else hidden_size
        encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx,
                            dropout_ratio, bidirectional=bidirectional).cuda()


    #decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(),
    #              action_embedding_size, args.hidden_size, args.dropout_ratio).cuda()
    ctx_hidden_size = args.enc_hidden_size * (2 if args.bidirectional else 1)
    if use_bert and not args.top_lstm:
        ctx_hidden_size = 768

    decoder = R2RAttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(),
                  action_embedding_size, ctx_hidden_size, args.hidden_size, args.dropout_ratio,FEATURE_SIZE, args.panoramic,args.action_space,args.dec_h_type).cuda()
    decoder = R2RAttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(),
                  action_embedding_size, ctx_hidden_size, args.hidden_size, args.dropout_ratio,FEATURE_SIZE, args.panoramic,args.action_space,args.dec_h_type).cuda()


    train(train_env, encoder, decoder, n_iters,
          path_type, history, feedback_method, max_episode_len, MAX_INPUT_LENGTH, model_prefix, val_envs=val_envs, args=args)
Esempio n. 26
0
def train_val_augment():
    """
    Train the listener with the augmented data
    """
    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)

    if args.fast_train:
        feat_dict = read_img_features(features_fast)
    else:
        feat_dict = read_img_features(features)
    candidate_dict = utils.read_candidates(CANDIDATE_FEATURES)
    featurized_scans = set(
        [key.split("_")[0] for key in list(feat_dict.keys())])

    # Load the augmentation data
    if args.aug is None:  # If aug is specified, load the "aug"
        speaker_snap_name = "adam_drop6_correctsave"
        print("Loading from %s" % speaker_snap_name)
        aug_path = "snap/speaker/long/%s/aug_data/best_val_unseen_loss.json" % speaker_snap_name
    else:  # Load the path from args
        aug_path = args.aug

    # The dataset used in training
    splits = [aug_path, 'train'] if args.combineAug else [aug_path]

    # Create the training environment
    if args.half_half:
        assert args.aug is not None
        gt_env = R2RBatch(feat_dict,
                          candidate_dict,
                          batch_size=args.batchSize,
                          splits=['train'],
                          tokenizer=tok)
        aug_env = R2RBatch(feat_dict,
                           candidate_dict,
                           batch_size=args.batchSize,
                           splits=[aug_path],
                           tokenizer=tok)
        train_env = ArbiterBatch(gt_env,
                                 aug_env,
                                 args.batchSize // 2,
                                 args.batchSize // 2,
                                 feat_dict,
                                 candidate_dict,
                                 batch_size=args.batchSize,
                                 splits=[],
                                 tokenizer=tok)
    else:
        train_env = R2RBatch(feat_dict,
                             candidate_dict,
                             batch_size=args.batchSize,
                             splits=splits,
                             tokenizer=tok)

    print("The augmented data_size is : %d" % train_env.size())
    # stats = train_env.get_statistics()
    # print("The average instruction length of the dataset is %0.4f." % (stats['length']))
    # print("The average action length of the dataset is %0.4f." % (stats['path']))

    # Setup the validation data
    val_envs = {
        split:
        (R2RBatch(feat_dict,
                  candidate_dict,
                  batch_size=args.batchSize,
                  splits=[split],
                  tokenizer=tok), Evaluation([split], featurized_scans, tok))
        for split in ['train', 'val_seen', 'val_unseen']
    }

    # Start training
    train(train_env, tok, args.iters, val_envs=val_envs)
Esempio n. 27
0
def meta_filter():
    """
    Train the listener with the augmented data
    """
    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)

    if args.fast_train:
        feat_dict = read_img_features(features_fast)
    else:
        feat_dict = read_img_features(features)
    candidate_dict = utils.read_candidates(CANDIDATE_FEATURES)
    featurized_scans = set(
        [key.split("_")[0] for key in list(feat_dict.keys())])

    # Load the augmentation data
    if args.aug is None:  # If aug is specified, load the "aug"
        speaker_snap_name = "adam_drop6_correctsave"
        print("Loading from %s" % speaker_snap_name)
        aug_path = "snap/speaker/long/%s/aug_data/best_val_unseen_loss.json" % speaker_snap_name
    else:  # Load the path from args
        aug_path = args.aug

    # Create the training environment
    aug_env = R2RBatch(feat_dict,
                       candidate_dict,
                       batch_size=args.batchSize,
                       splits=[aug_path],
                       tokenizer=tok)
    train_env = R2RBatch(feat_dict,
                         candidate_dict,
                         batch_size=args.batchSize,
                         splits=['train@3333'],
                         tokenizer=tok)
    print("The augmented data_size is : %d" % train_env.size())
    stats = train_env.get_statistics()
    print("The average instruction length of the dataset is %0.4f." %
          (stats['length']))
    print("The average action length of the dataset is %0.4f." %
          (stats['path']))

    # Setup the validation data
    val_envs = {
        split:
        (R2RBatch(feat_dict,
                  candidate_dict,
                  batch_size=args.batchSize,
                  splits=[split],
                  tokenizer=tok), Evaluation([split], featurized_scans, tok))
        for split in ['train', 'val_seen', 'val_unseen@133']
    }

    val_env, val_eval = val_envs['val_unseen@133']

    listner = Seq2SeqAgent(train_env, "", tok, args.maxAction)

    def filter_result():
        listner.env = val_env
        val_env.reset_epoch()
        listner.test(use_dropout=False, feedback='argmax')
        result = listner.get_results()
        score_summary, _ = val_eval.score(result)
        for metric, val in score_summary.items():
            if metric in ['success_rate']:
                return val

    listner.load(args.load)
    base_accu = (filter_result())
    print("BASE ACCU %0.4f" % base_accu)

    success = 0

    for data_id, datum in enumerate(aug_env.data):
        # Reload the param of the listener
        listner.load(args.load)
        train_env.reset_epoch(shuffle=True)

        listner.env = train_env

        # Train for the datum
        # iters = train_env.size() // train_env.batch_size
        iters = 10
        for i in range(iters):
            listner.env = train_env
            # train_env.reset(batch=([datum] * (train_env.batch_size // 2)), inject=True)
            train_env.reset(batch=[datum] * train_env.batch_size, inject=True)
            # train_env.reset()
            # train_env.reset()
            listner.train(1, feedback='sample', reset=False)
        # print("Iter %d, result %0.4f" % (i, filter_result()))
        now_accu = filter_result()
        if now_accu > base_accu:
            success += 1
        # print("RESULT %0.4f" % filter_result())
        print('Accu now %0.4f, success / total: %d / %d = %0.4f' %
              (now_accu, success, data_id + 1, success / (data_id + 1)))
Esempio n. 28
0
def train_val_augment():
    """
    Train the listener with the augmented data
    """
    setup()

    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(train_vocab)
    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)

    # Load the env img features
    feat_dict = read_img_features(features)
    featurized_scans = set(
        [key.split("_")[0] for key in list(feat_dict.keys())])

    # Load the augmentation data
    if args.upload:
        aug_path = get_sync_dir(os.path.join(args.upload_path, args.aug))
    else:
        aug_path = os.path.join(args.R2R_Aux_path, args.aug)

    # Create the training environment

    # load object feature
    obj_s_feat = None
    if args.sparseObj:
        obj_s_feat = utils.read_obj_sparse_features(sparse_obj_feat,
                                                    args.objthr)

    obj_d_feat = None
    if args.denseObj:
        obj_d_feat = utils.read_obj_dense_features(dense_obj_feat1,
                                                   dense_obj_feat2, bbox,
                                                   sparse_obj_feat,
                                                   args.objthr)

    train_env = R2RBatch(feat_dict,
                         obj_d_feat=obj_d_feat,
                         obj_s_feat=obj_s_feat,
                         batch_size=args.batchSize,
                         splits=['train'],
                         tokenizer=tok)
    aug_env = R2RBatch(feat_dict,
                       obj_d_feat=obj_d_feat,
                       obj_s_feat=obj_s_feat,
                       batch_size=args.batchSize,
                       splits=[aug_path],
                       tokenizer=tok,
                       name='aug')

    # Printing out the statistics of the dataset
    stats = train_env.get_statistics()
    print("The training data_size is : %d" % train_env.size())
    print("The average instruction length of the dataset is %0.4f." %
          (stats['length']))
    print("The average action length of the dataset is %0.4f." %
          (stats['path']))
    stats = aug_env.get_statistics()
    print("The augmentation data size is %d" % aug_env.size())
    print("The average instruction length of the dataset is %0.4f." %
          (stats['length']))
    print("The average action length of the dataset is %0.4f." %
          (stats['path']))

    # Setup the validation data
    val_envs = {
        split:
        (R2RBatch(feat_dict,
                  batch_size=args.batchSize,
                  splits=[split],
                  tokenizer=tok), Evaluation([split], featurized_scans, tok))
        for split in ['train', 'val_seen', 'val_unseen']
    }

    val_envs = OrderedDict(((split, (R2RBatch(feat_dict,
                                              obj_d_feat=obj_d_feat,
                                              obj_s_feat=obj_s_feat,
                                              batch_size=args.batchSize,
                                              splits=[split],
                                              tokenizer=tok),
                                     Evaluation([split], featurized_scans,
                                                tok)))
                            for split in ['train', 'val_seen', 'val_unseen']))

    # Start training
    train(train_env, tok, args.iters, val_envs=val_envs, aug_env=aug_env)
Esempio n. 29
0
def main(opts):

    # set manual_seed and build vocab
    setup(opts, opts.seed)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # create a batch training environment that will also preprocess text
    vocab = read_vocab(opts.train_vocab)
    tok = Tokenizer(
        opts.remove_punctuation == 1,
        opts.reversed == 1,
        vocab=vocab,
        encoding_length=opts.max_cap_length,
    )

    # create language instruction encoder
    encoder_kwargs = {
        "opts": opts,
        "vocab_size": len(vocab),
        "embedding_size": opts.word_embedding_size,
        "hidden_size": opts.rnn_hidden_size,
        "padding_idx": padding_idx,
        "dropout_ratio": opts.rnn_dropout,
        "bidirectional": opts.bidirectional == 1,
        "num_layers": opts.rnn_num_layers,
    }
    print("Using {} as encoder ...".format(opts.lang_embed))
    if "lstm" in opts.lang_embed:
        encoder = EncoderRNN(**encoder_kwargs)
    else:
        raise ValueError("Unknown {} language embedding".format(
            opts.lang_embed))
    print(encoder)

    # create policy model
    policy_model_kwargs = {
        "opts": opts,
        "img_fc_dim": opts.img_fc_dim,
        "img_fc_use_batchnorm": opts.img_fc_use_batchnorm == 1,
        "img_dropout": opts.img_dropout,
        "img_feat_input_dim": opts.img_feat_input_dim,
        "rnn_hidden_size": opts.rnn_hidden_size,
        "rnn_dropout": opts.rnn_dropout,
        "max_len": opts.max_cap_length,
        "max_navigable": opts.max_navigable,
    }

    if opts.arch == "self-monitoring":
        model = SelfMonitoring(**policy_model_kwargs)
    elif opts.arch == "speaker-baseline":
        model = SpeakerFollowerBaseline(**policy_model_kwargs)
    else:
        raise ValueError("Unknown {} model for seq2seq agent".format(
            opts.arch))
    print(model)

    encoder = encoder.to(device)
    model = model.to(device)

    params = list(encoder.parameters()) + list(model.parameters())
    optimizer = torch.optim.Adam(params, lr=opts.learning_rate)

    # optionally resume from a checkpoint
    if opts.resume:
        model, encoder, optimizer, best_success_rate = resume_training(
            opts, model, encoder, optimizer)

    # if a secondary exp name is specified, this is useful when resuming from a previous saved
    # experiment and save to another experiment, e.g., pre-trained on synthetic data and fine-tune on real data
    if opts.exp_name_secondary:
        opts.exp_name += opts.exp_name_secondary

    feature, img_spec = load_features(opts.img_feat_dir)

    if opts.test_submission:
        assert (opts.resume
                ), "The model was not resumed before running for submission."
        test_env = (
            "test",
            (
                R2RPanoBatch(
                    opts,
                    feature,
                    img_spec,
                    batch_size=opts.batch_size,
                    splits=["test"],
                    tokenizer=tok,
                ),
                Evaluation(["test"]),
            ),
        )
        agent_kwargs = {
            "opts": opts,
            "env": test_env[1][0],
            "results_path": "",
            "encoder": encoder,
            "model": model,
            "feedback": opts.feedback,
        }
        agent = PanoSeq2SeqAgent(**agent_kwargs)
        # setup trainer
        trainer = PanoSeq2SeqTrainer(opts, agent, optimizer)
        epoch = opts.start_epoch - 1
        trainer.eval(epoch, test_env)
        return

    # set up R2R environments
    if not opts.train_data_augmentation:
        train_env = R2RPanoBatch(
            opts,
            feature,
            img_spec,
            batch_size=opts.batch_size,
            seed=opts.seed,
            splits=["train"],
            tokenizer=tok,
        )
    else:
        train_env = R2RPanoBatch(
            opts,
            feature,
            img_spec,
            batch_size=opts.batch_size,
            seed=opts.seed,
            splits=["synthetic"],
            tokenizer=tok,
        )

    val_envs = {
        split: (
            R2RPanoBatch(
                opts,
                feature,
                img_spec,
                batch_size=opts.batch_size,
                splits=[split],
                tokenizer=tok,
            ),
            Evaluation([split]),
        )
        for split in ["val_seen", "val_unseen"]
    }

    # create agent
    agent_kwargs = {
        "opts": opts,
        "env": train_env,
        "results_path": "",
        "encoder": encoder,
        "model": model,
        "feedback": opts.feedback,
    }
    agent = PanoSeq2SeqAgent(**agent_kwargs)

    # setup trainer
    trainer = PanoSeq2SeqTrainer(opts, agent, optimizer,
                                 opts.train_iters_epoch)

    if opts.eval_beam or opts.eval_only:
        success_rate = []
        for val_env in val_envs.items():
            success_rate.append(
                trainer.eval(opts.start_epoch - 1, val_env, tb_logger=None))
        return

    # set up tensorboard logger
    tb_logger = set_tb_logger(opts.log_dir, opts.exp_name, opts.resume)

    best_success_rate = best_success_rate if opts.resume else 0.0

    for epoch in range(opts.start_epoch, opts.max_num_epochs + 1):
        trainer.train(epoch, train_env, tb_logger)

        if epoch % opts.eval_every_epochs == 0:
            success_rate = []
            for val_env in val_envs.items():
                success_rate.append(trainer.eval(epoch, val_env, tb_logger))

            success_rate_compare = success_rate[1]

            if is_experiment():
                # remember best val_seen success rate and save checkpoint
                is_best = success_rate_compare >= best_success_rate
                best_success_rate = max(success_rate_compare,
                                        best_success_rate)
                print("--> Highest val_unseen success rate: {}".format(
                    best_success_rate))

                # save the model if it is the best so far
                save_checkpoint(
                    {
                        "opts": opts,
                        "epoch": epoch + 1,
                        "state_dict": model.state_dict(),
                        "encoder_state_dict": encoder.state_dict(),
                        "best_success_rate": best_success_rate,
                        "optimizer": optimizer.state_dict(),
                        "max_episode_len": opts.max_episode_len,
                    },
                    is_best,
                    checkpoint_dir=opts.checkpoint_dir,
                    name=opts.exp_name,
                )

        if (opts.train_data_augmentation
                and epoch == opts.epochs_data_augmentation):
            train_env = R2RPanoBatch(
                opts,
                feature,
                img_spec,
                batch_size=opts.batch_size,
                seed=opts.seed,
                splits=["train"],
                tokenizer=tok,
            )

    print("--> Finished training")
Esempio n. 30
0
    'prog_monitor': True,
    'dev_monitor': False,
    'attn_only_verb': False,
    'soft_align': False,
    'scorer': None,
    'load_follower': 'tasks/R2R/experiments/pretrain_cgPm_pertraj/snapshots/follower_cg_pm_sample2step_imagenet_mean_pooled_1heads_train_iter_1900_val_unseen-success_rate=0.478'
})

image_features_list = ImageFeatures.from_args(args)
vocab = read_vocab(TRAIN_VOCAB)
tok = Tokenizer(vocab)
env = R2RBatch(image_features_list, batch_size=256, splits=['train','val_seen','val_unseen'],tokenizer=tok)
env.batch = env.data

from eval import Evaluation
test_envs = {split: (R2RBatch(image_features_list, batch_size=64,splits=[split], tokenizer=tok), Evaluation([split])) for split in ['val_unseen']}

agent = make_follower(args, vocab)

def average(_l):
    return float(sum(_l)) / len(_l)

def load_data(filenames):
    all_data = []
    for fn in filenames:
        with open(fn,'r') as f:
            train_file = json.loads(f.read())
        train_instrs = list(train_file.keys())
        train_data = {}

        for instr_id in train_instrs: