def plot_final_scores():
    ''' Plot the scores '''
    font = {
      'size'   : 12
    }
    mpl.rc('font', **font)
    fig, ax = plt.subplots( nrows=1, ncols=1, figsize=(7,4) )  # create figure & 1 axis
    outfiles = [
        RESULT_DIR + 'seq2seq_sample_imagenet_%s_iter_20000.json',
        RESULT_DIR + 'seq2seq_teacher_imagenet_%s_iter_5000.json',
        RESULT_DIR + '%s_stop_agent.json',
        RESULT_DIR + '%s_random_agent.json'
    ]
    for split in ['val_seen']:
        ev = Evaluation([split])
        for i,outfile in enumerate(outfiles):
            score_summary,scores = ev.score(outfile % split)
            if i == 1:
                method = 'Teacher-forcing'
                ax.hist(scores['nav_errors'], bins=range(0,30,3), label=method, normed=True, histtype = 'step', linewidth=2.5, color='C1')
            elif i == 0:
                method = 'Student-forcing'
                ax.hist(scores['nav_errors'], bins=range(0,30,3), label=method, alpha=0.7, normed=True, color='C0')
            elif i == 2:
                method = 'Start locations'
                ax.hist(scores['nav_errors'], bins=range(0,30,3), label=method, normed=True, histtype = 'step', linewidth=2.5, color='C3')
            elif i == 3:
                method = 'Random agent'
                ax.hist(scores['nav_errors'], bins=range(0,30,3), label=method, normed=True, histtype = 'step', linewidth=2.5, color='C2')
    ax.set_title('Val Seen Navigation Error')
    ax.set_xlabel('Error (m)')
    ax.set_ylabel('Frequency')
    ax.set_ylim([0,0.14])
    ax.set_xlim([0,30])
    plt.axvline(x=3, color='black', linestyle='--')
    legend = ax.legend(loc='upper right')
    plt.tight_layout()
    plt.savefig('%s/val_seen_error.png' % (PLOT_DIR))
    plt.close(fig)
Example #2
0
def train_val():
    ''' Train on the training set, and validate on seen and unseen splits. '''

    # Set which GPU to use
    device = torch.device('cuda', hparams.device_id)

    # Load hyperparameters from checkpoint (if exists)
    if os.path.exists(hparams.load_path):
        print('Load model from %s' % hparams.load_path)
        ckpt = load(hparams.load_path, device)
        start_iter = ckpt['iter']
    else:
        if not hparams.forward_agent and not hparams.random_agent and not hparams.shortest_agent:
            if hasattr(hparams, 'load_path') and hasattr(hparams, 'eval_only') and hparams.eval_only:
                sys.exit('load_path %s does not exist!' % hparams.load_path)
        ckpt = None
    start_iter = 0
    end_iter = hparams.n_iters

    if not hasattr(hparams, 'ask_baseline'):
        hparams.ask_baseline = None
    if not hasattr(hparams, 'instruction_baseline'):
        hparams.instruction_baseline = None

    # Set random seeds
    torch.manual_seed(hparams.seed)
    torch.cuda.manual_seed(hparams.seed)
    np.random.seed(hparams.seed)
    random.seed(hparams.seed)

    # Create or load vocab
    train_vocab_path = os.path.join(hparams.data_path, 'vocab.txt')
    if not os.path.exists(train_vocab_path):
        raise Exception('Vocab file not found at %s' % train_vocab_path)
    vocab = read_vocab([train_vocab_path])
    hparams.instr_padding_idx = vocab.index('<PAD>')

    tokenizer = Tokenizer(vocab=vocab, encoding_length=hparams.max_instr_len)
    if hparams.encoder_type == 'dic':
        tokenizer = BTokenizer(vocab=vocab,encoding_length=hparams.max_instr_len)
    featurizer = ImageFeatures(hparams.img_features, device)
    simulator = Simulator(hparams)

    # Create train environment
    train_env = Batch(hparams, simulator, featurizer, tokenizer, split='train')

    # Create validation environments
    val_splits = ['val_seen', 'val_unseen']
    eval_mode = hasattr(hparams, 'eval_only') and hparams.eval_only
    if eval_mode:
        if 'val_seen' in hparams.load_path:
            val_splits = ['test_seen']
        elif 'val_unseen' in hparams.load_path:
            val_splits = ['test_unseen']
        else:
            val_splits = ['test_seen', 'test_unseen']
        end_iter = start_iter + 1

    if hparams.eval_on_val:
        val_splits = [x.replace('test_', 'val_') for x in val_splits]

    val_envs_tmp = { split: (
        Batch(hparams, simulator, featurizer, tokenizer, split=split),
        Evaluation(hparams, [split], hparams.data_path))
            for split in val_splits }

    val_envs = {}
    for key, value in val_envs_tmp.items():
        if '_seen' in key:
            val_envs[key + '_env_seen_anna'] = value
            val_envs[key + '_env_unseen_anna'] = value
        else:
            assert '_unseen' in key
            val_envs[key] = value

    # Build model and optimizer
    model = AgentModel(len(vocab), hparams, device).to(device)
    optimizer = optim.Adam(model.parameters(), lr=hparams.lr,
        weight_decay=hparams.weight_decay)

    best_metrics = { env_name  : -1 for env_name in val_envs.keys() }
    best_metrics['combined'] = -1

    # Load model paramters from checkpoint (if exists)
    if ckpt is not None:
        model.load_state_dict(ckpt['model_state_dict'])
        optimizer.load_state_dict(ckpt['optim_state_dict'])
        best_metrics = ckpt['best_metrics']
        train_env.ix = ckpt['data_idx']

    if hparams.log_every == -1:
        hparams.log_every = round(len(train_env.data) / \
            (hparams.batch_size * 100)) * 100

    print('')
    pprint(vars(hparams), width=1)
    print('')
    print(model)
    print('Number of parameters:',
        sum(p.numel() for p in model.parameters() if p.requires_grad))

    if hparams.random_agent or hparams.forward_agent or hparams.shortest_agent:
        assert eval_mode
        agent = SimpleAgent(hparams)
    else:
        agent = VerbalAskAgent(model, hparams, device)

    return train(train_env, val_envs, agent, model, optimizer, start_iter,
        end_iter, best_metrics, eval_mode)
Example #3
0
def test(cfg, dataLoader, model, models_info=None, models_vtx=None):
    model.eval()
    if cfg.pytorch.exp_mode in ['val']:
        from eval import Evaluation
        Eval = Evaluation(cfg.pytorch, models_info, models_vtx)
    elif cfg.pytorch.exp_mode == 'test':
        csv_file = open(cfg.pytorch.save_csv_path, 'w')
        fieldnames = ['scene_id', 'im_id', 'obj_id', 'score', 'R', 't', 'time']
        csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        csv_writer.writeheader()
        rst_collect = []

    preds = {}
    nIters = len(dataLoader)
    bar = Bar('{}_{}'.format(cfg.pytorch.dataset, cfg.pytorch.object),
              max=nIters)
    wall_time = 0
    for i, (input, pose, bbox, center, size, clsIdx, imgPath, scene_id,
            image_id, score) in enumerate(dataLoader):
        # input_var = input.cuda(cfg.pytorch.gpu, async=True).float().cuda(cfg.pytorch.gpu)
        input_var = input.cuda(cfg.pytorch.gpu,
                               non_blocking=True).float().cuda(cfg.pytorch.gpu)
        batch_size = len(input)
        if cfg.pytorch.dataset.lower() == 'tless' or cfg.pytorch.dataset.lower(
        ) == 'itodd':  # camera_matrix vary with images in TLESS & ITODD
            K = np.array(imgPath).reshape(
                3, 3)  # 'imgPath' in TLESS & ITODD is camera_matrix
        # time begin
        T_begin = time.time()
        output_conf, output_coor_x, output_coor_y, output_coor_z = model(
            input_var)
        output_coor_x = output_coor_x.data.cpu().numpy().copy()
        output_coor_y = output_coor_y.data.cpu().numpy().copy()
        output_coor_z = output_coor_z.data.cpu().numpy().copy()
        outConf = output_conf.data.cpu().numpy().copy()
        output_trans = np.zeros(batch_size)
        collector = list(
            zip(clsIdx.numpy(), output_coor_x, output_coor_y,
                output_coor_z, outConf, pose.numpy(), bbox.numpy(),
                center.numpy(), size.numpy(), input.numpy(), scene_id.numpy(),
                image_id.numpy(), score.numpy()))
        colLen = len(collector)
        for idx in range(colLen):
            clsIdx_, output_coor_x_, output_coor_y_, output_coor_z_, output_conf_, pose_gt, bbox_, center_, size_, input_, scene_id_, image_id_, score_ = collector[
                idx]
            if cfg.pytorch.dataset.lower() == 'lmo':
                cls = ref.lmo_id2obj[int(clsIdx_)]
            elif cfg.pytorch.dataset.lower() == 'tless':
                cls = ref.tless_id2obj[int(clsIdx_)]
            elif cfg.pytorch.dataset.lower() == 'ycbv':
                cls = ref.ycbv_id2obj[int(clsIdx_)]
            elif cfg.pytorch.dataset.lower() == 'tudl':
                cls = ref.tudl_id2obj[int(clsIdx_)]
            elif cfg.pytorch.dataset.lower() == 'hb':
                cls = ref.hb_id2obj[int(clsIdx_)]
            elif cfg.pytorch.dataset.lower() == 'icbin':
                cls = ref.icbin_id2obj[clsIdx_]
            elif cfg.pytorch.dataset.lower() == 'itodd':
                cls = ref.itodd_id2obj[int(clsIdx_)]

            select_pts_2d = []
            select_pts_3d = []
            center_h = center_[0]
            center_w = center_[1]
            size_ = int(size_)
            output_coor_x_ = output_coor_x_.squeeze()
            output_coor_y_ = output_coor_y_.squeeze()
            output_coor_z_ = output_coor_z_.squeeze()
            output_coor_ = np.stack([
                np.argmax(output_coor_x_, axis=0),
                np.argmax(output_coor_y_, axis=0),
                np.argmax(output_coor_z_, axis=0)
            ],
                                    axis=2)
            output_coor_[output_coor_ == cfg.network.coor_bin] = 0
            output_coor_ = 2.0 * output_coor_ / float(cfg.network.coor_bin -
                                                      1) - 1.0
            output_coor_[:, :, 0] = output_coor_[:, :, 0] * abs(
                models_info[clsIdx_]['min_x'])
            output_coor_[:, :, 1] = output_coor_[:, :, 1] * abs(
                models_info[clsIdx_]['min_y'])
            output_coor_[:, :, 2] = output_coor_[:, :, 2] * abs(
                models_info[clsIdx_]['min_z'])
            output_conf_ = np.argmax(output_conf_, axis=0)
            output_conf_ = (output_conf_ - output_conf_.min()) / (
                output_conf_.max() - output_conf_.min())
            min_x = 0.001 * abs(models_info[clsIdx_]['min_x'])
            min_y = 0.001 * abs(models_info[clsIdx_]['min_y'])
            min_z = 0.001 * abs(models_info[clsIdx_]['min_z'])
            w_begin = center_w - size_ / 2.
            h_begin = center_h - size_ / 2.
            w_unit = size_ * 1.0 / cfg.dataiter.rot_output_res
            h_unit = size_ * 1.0 / cfg.dataiter.rot_output_res
            output_conf_ = output_conf_.tolist()
            output_coor_ = output_coor_.tolist()
            for x in range(cfg.dataiter.rot_output_res):
                for y in range(cfg.dataiter.rot_output_res):
                    if output_conf_[x][y] < cfg.test.mask_threshold:
                        continue
                    if abs(output_coor_[x][y][0]) < min_x  and abs(output_coor_[x][y][1]) < min_y  and \
                        abs(output_coor_[x][y][2]) < min_z:
                        continue
                    select_pts_2d.append(
                        [w_begin + y * w_unit, h_begin + x * h_unit])
                    select_pts_3d.append(output_coor_[x][y])
            model_points = np.asarray(select_pts_3d, dtype=np.float32)
            image_points = np.asarray(select_pts_2d, dtype=np.float32)
            try:
                if cfg.pytorch.dataset.lower(
                ) == 'tless' or cfg.pytorch.dataset.lower(
                ) == 'itodd':  # camera_matrix vary with images in TLESS & ITODD
                    _, R_vector, T_vector, inliers = cv2.solvePnPRansac(
                        model_points,
                        image_points,
                        K,
                        np.zeros((4, 1)),
                        flags=cv2.SOLVEPNP_EPNP)
                else:
                    _, R_vector, T_vector, inliers = cv2.solvePnPRansac(
                        model_points,
                        image_points,
                        cfg.pytorch.camera_matrix,
                        np.zeros((4, 1)),
                        flags=cv2.SOLVEPNP_EPNP)
                cur_wall_time = time.time() - T_begin
                wall_time += cur_wall_time
                R_matrix = cv2.Rodrigues(R_vector, jacobian=0)[0]
                if R_matrix[0, 0] == 1.0:
                    continue
                if cfg.pytorch.exp_mode == 'val':
                    pose_est = np.concatenate(
                        (R_matrix, np.asarray(T_vector).reshape(3, 1)), axis=1)
                    Eval.pose_est_all[cls].append(pose_est)
                    Eval.pose_gt_all[cls].append(pose_gt)
                    Eval.num[cls] += 1
                    Eval.numAll += 1
                elif cfg.pytorch.exp_mode == 'test':
                    rst = {
                        'scene_id': int(scene_id_),
                        'im_id': int(image_id_),
                        'R': R_matrix.reshape(-1).tolist(),
                        't': T_vector.reshape(-1).tolist(),
                        'score': float(score_),
                        'obj_id': int(clsIdx),
                        'time': cur_wall_time
                    }
                    rst_collect.append(rst)
            except:
                if cfg.pytorch.exp_mode in ['val']:
                    Eval.num[cls] += 1
                    Eval.numAll += 1
        Bar.suffix = '{0} [{1}/{2}]| Total: {total:} | ETA: {eta:}'.format(
            cfg.pytorch.exp_mode,
            i,
            nIters,
            total=bar.elapsed_td,
            eta=bar.eta_td)
        bar.next()
    if cfg.pytorch.exp_mode == 'val':
        Eval.evaluate_pose()
    elif cfg.pytorch.exp_mode == 'test':
        for item in rst_collect:
            csv_writer.writerow(item)
        csv_file.close()
    print("Wall time of object {}: total {} seconds for {} samples".format(
        cfg.pytorch.object, wall_time, nIters))
    bar.finish()
Example #4
0
from eval import Evaluation
# import nltk
# emb_path='D:\\IOM\\word2vec\\GoogleNews-vectors-negative300.bin'
# import jieba
emb_path = 'D:\\IOM\\word2vec\\merge_sgns_bigram_char300.bin'
from gensim.models import KeyedVectors
wv_from_bin = KeyedVectors.load_word2vec_format(emb_path, binary=True)
eval_class = Evaluation('', wv_from_bin)

sep2 = '*#*'
sep1 = '|||'


def cut_triples(line):
    global notriple
    line = line.strip()
    triples = []
    for triple_str in line.split(sep2):
        triple_es = triple_str.split(sep1)
        # #没有三元组的修正
        # if len(triple_es)>3:
        #     return []
        triples.append(triple_es)
    return triples


args = [(0, 100)]
for arg in args:
    print(arg)
    begin = arg[0]
    end = arg[1]
Example #5
0
def train_val():
    ''' Train on the training set, and validate on seen and unseen splits. '''
    # args.fast_train = True
    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)

    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)

    feat_dict = read_img_features(features)

    # load object feature
    obj_s_feat = None
    if args.sparseObj:
        print("Start loading the object sparse feature")
        start = time.time()
        obj_s_feat = np.load(sparse_obj_feat, allow_pickle=True).item()
        print(
            "Finish Loading the object sparse feature from %s in %0.4f seconds"
            % (sparse_obj_feat, time.time() - start))

    obj_d_feat = None
    if args.denseObj:
        print("Start loading the object dense feature")
        start = time.time()
        obj_d_feat1 = np.load(dense_obj_feat1, allow_pickle=True).item()
        obj_d_feat2 = np.load(dense_obj_feat2, allow_pickle=True).item()
        obj_d_feat = {**obj_d_feat1, **obj_d_feat2}
        print(
            "Finish Loading the dense object dense feature from %s and %s in %0.4f seconds"
            % (dense_obj_feat1, dense_obj_feat2, time.time() - start))

    featurized_scans = set(
        [key.split("_")[0] for key in list(feat_dict.keys())])

    train_env = R2RBatch(feat_dict,
                         obj_d_feat=obj_d_feat,
                         obj_s_feat=obj_s_feat,
                         batch_size=args.batchSize,
                         splits=['train'],
                         tokenizer=tok)
    from collections import OrderedDict

    val_env_names = ['val_unseen', 'val_seen']
    if args.submit:
        val_env_names.append('test')
    else:
        pass
        #val_env_names.append('train')

    if not args.beam:
        val_env_names.append("train")

    val_envs = OrderedDict(((split, (R2RBatch(feat_dict,
                                              obj_d_feat=obj_d_feat,
                                              obj_s_feat=obj_s_feat,
                                              batch_size=args.batchSize,
                                              splits=[split],
                                              tokenizer=tok),
                                     Evaluation([split], featurized_scans,
                                                tok)))
                            for split in val_env_names))

    if args.train == 'listener':
        train(train_env, tok, args.iters, val_envs=val_envs)
    elif args.train == 'validlistener':
        if args.beam:
            beam_valid(train_env, tok, val_envs=val_envs)
        else:
            valid(train_env, tok, val_envs=val_envs)
    elif args.train == 'speaker':
        train_speaker(train_env, tok, args.iters, val_envs=val_envs)
    elif args.train == 'validspeaker':
        valid_speaker(tok, val_envs)
    else:
        assert False
Example #6
0
class TuneTrainable(Trainable):
    def _setup(self, config):
        inject_tuned_hyperparameters(config, config)
        os.chdir(os.path.dirname(os.path.realpath(__file__)))
        print('Trainable got the following config after injection', config)
        self.config = config
        self.device = self.config['device']
        self.exp, self.model, self.train_dataloader, self.eval_dataloader = setup_training(
            self.config)
        self.exp.set_name(config['experiment_name'] + self._experiment_id)
        self.exp_name = config['experiment_name'] + self._experiment_id
        self.exp.send_notification(title='Experiment ' +
                                   str(self._experiment_id) + ' ended')
        self.train_data_iter = iter(self.train_dataloader)
        self.model = self.model.to(self.device)
        self.model.train()
        n_params = sum(p.numel() for p in self.model.parameters()
                       if p.requires_grad)
        log_dict = flatten_dict(config)
        log_dict.update({'trainable_params': n_params})
        self.exp.log_parameters(log_dict)
        self.optimizers = get_optimizers(self.model, self.config)
        self.evaluator = Evaluation(self.eval_dataloader, self.config)
        self.num_examples = 0
        self.batch_idx = 0
        self.epoch = 1
        self.ewma = EWMA(beta=0.75)
        self.last_accu = -1.0
        self.max_accu = -1.0
        self.back_prop_every_n_batches = config['training'][
            'back_prop_every_n_batches']
        self.checkpoint_best = config['training']['checkpoint_best']

    def get_batch(self):
        try:
            batch = next(self.train_data_iter)
            return batch

        except StopIteration:
            self.train_data_iter = iter(self.train_dataloader)
            batch = next(self.train_data_iter)
            self.batch_idx = 0
            self.epoch += 1
            return batch

    def _train(self):
        total_log_step_loss = 0
        total_log_step_train_accu = 0
        total_log_step_n = 0

        [opt.zero_grad() for opt in self.optimizers]
        while True:
            batch = self.get_batch()
            self.batch_idx += 1
            self.num_examples += len(batch[0])
            batch = (batch[0].to(self.device), batch[1].to(self.device))
            loss, train_accu = training_step(
                batch,
                self.model,
                self.optimizers,
                step=(self.batch_idx % self.back_prop_every_n_batches == 0))
            total_log_step_loss += loss.cpu().detach().numpy()
            total_log_step_train_accu += train_accu
            total_log_step_n += 1

            if self.batch_idx % self.config['training'][
                    'log_every_n_batches'] == 0:
                avg_loss = total_log_step_loss / total_log_step_n
                avg_accu = total_log_step_train_accu / total_log_step_n
                total_log_step_n = 0
                print(f'{Fore.YELLOW}Total number of seen examples:',
                      self.num_examples, 'Average loss of current log step:',
                      avg_loss, 'Average train accuracy of current log step:',
                      avg_accu, f"{Style.RESET_ALL}")
                self.exp.log_metric('train_loss',
                                    avg_loss,
                                    step=self.num_examples,
                                    epoch=self.epoch)
                self.exp.log_metric('train_accuracy',
                                    avg_accu,
                                    step=self.num_examples,
                                    epoch=self.epoch)
                total_log_step_loss = 0
                total_log_step_train_accu = 0

            if (self.batch_idx +
                    1) % self.config['training']['eval_every_n_batches'] == 0:
                results, assets, image_fns = self.evaluator.eval_model(
                    self.model)
                print(self.config['tune']['discriminating_metric'],
                      results[self.config['tune']['discriminating_metric']])
                self.exp.log_metrics(results,
                                     step=self.num_examples,
                                     epoch=self.epoch)
                [
                    self.exp.log_asset_data(asset, step=self.num_examples)
                    for asset in assets
                ]
                [
                    self.exp.log_image(fn, step=self.num_examples)
                    for fn in image_fns
                ]

                accu_diff_avg = abs(
                    results[self.config['tune']['discriminating_metric']] -
                    self.ewma.get())
                accu_diff_cons = abs(
                    results[self.config['tune']['discriminating_metric']] -
                    self.last_accu)

                no_change_in_accu = 1 if accu_diff_avg < 0.0005 and accu_diff_cons < 0.002 and self.num_examples > 70000 else 0
                self.ewma.update(
                    results[self.config['tune']['discriminating_metric']])
                self.last_accu = results[self.config['tune']
                                         ['discriminating_metric']]

                if self.max_accu < results[self.config['tune']
                                           ['discriminating_metric']]:
                    self.max_accu = results[self.config['tune']
                                            ['discriminating_metric']]
                    if self.checkpoint_best:
                        self.save_checkpoint('checkpoints',
                                             self.exp_name + '.pt')
                        print(
                            f'{Fore.GREEN}New best model saved.{Style.RESET_ALL}'
                        )

                self.exp.log_metric('max_accuracy',
                                    self.max_accu,
                                    step=self.num_examples,
                                    epoch=self.epoch)

                training_results = {
                    self.config['tune']['discriminating_metric']:
                    self.max_accu,
                    'num_examples': self.num_examples,
                    'no_change_in_accu': no_change_in_accu
                }

                return training_results

    def _save(self, checkpoint_dir):
        return self.save_checkpoint(checkpoint_dir, 'checkpoint_file.pt')

    def save_checkpoint(self, checkpoint_dir, fname='checkpoint_file.pt'):
        print(f'{Fore.CYAN}Saving model ...{Style.RESET_ALL}')
        save_dict = {'model_state_dict': self.model.state_dict()}
        for i, optimizer in enumerate(self.optimizers):
            save_dict['op_' + str(i) + '_state_dict'] = optimizer.state_dict()
        torch.save(save_dict, os.path.join(checkpoint_dir, fname))
        return os.path.join(checkpoint_dir, fname)

    def _restore(self, checkpoint_path):
        checkpoint = torch.load(checkpoint_path)
        self.model.load_state_dict(checkpoint['model_state_dict'])

        for i, optimizer in enumerate(self.optimizers):
            optimizer.load_state_dict(checkpoint['op_' + str(i) +
                                                 '_state_dict'])

    def stop(self):
        results, assets, image_fns = self.evaluator.eval_model(
            self.model, finished_training=True)
        self.exp.log_metrics(results, step=self.num_examples, epoch=self.epoch)
        [
            self.exp.log_asset_data(asset, step=self.num_examples)
            for asset in assets
        ]
        [self.exp.log_image(fn, step=self.num_examples) for fn in image_fns]

        return super().stop()
def main(opts):

    # set manual_seed and build vocab
    setup(opts, opts.seed)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # create a batch training environment that will also preprocess text
    vocab = read_vocab(opts.train_vocab)
    tok = Tokenizer(opts.remove_punctuation == 1, opts.reversed == 1, vocab=vocab, encoding_length=opts.max_cap_length)

    # create language instruction encoder
    encoder_kwargs = {
        'opts': opts,
        'vocab_size': len(vocab),
        'embedding_size': opts.word_embedding_size,
        'hidden_size': opts.rnn_hidden_size,
        'padding_idx': padding_idx,
        'dropout_ratio': opts.rnn_dropout,
        'bidirectional': opts.bidirectional == 1,
        'num_layers': opts.rnn_num_layers
    }
    print('Using {} as encoder ...'.format(opts.lang_embed))
    if 'lstm' in opts.lang_embed:
        encoder = EncoderRNN(**encoder_kwargs)
    else:
        raise ValueError('Unknown {} language embedding'.format(opts.lang_embed))
    print(encoder)

    # create policy model
    policy_model_kwargs = {
        'opts':opts,
        'img_fc_dim': opts.img_fc_dim,
        'img_fc_use_batchnorm': opts.img_fc_use_batchnorm == 1,
        'img_dropout': opts.img_dropout,
        'img_feat_input_dim': opts.img_feat_input_dim,
        'rnn_hidden_size': opts.rnn_hidden_size,
        'rnn_dropout': opts.rnn_dropout,
        'max_len': opts.max_cap_length,
        'max_navigable': opts.max_navigable
    }

    if opts.arch == 'self-monitoring':
        model = SelfMonitoring(**policy_model_kwargs)
    elif opts.arch == 'speaker-baseline':
        model = SpeakerFollowerBaseline(**policy_model_kwargs)
    else:
        raise ValueError('Unknown {} model for seq2seq agent'.format(opts.arch))
    print(model)

    encoder = encoder.to(device)
    model = model.to(device)

    params = list(encoder.parameters()) + list(model.parameters())
    optimizer = torch.optim.Adam(params, lr=opts.learning_rate)

    # optionally resume from a checkpoint
    if opts.resume:
        model, encoder, optimizer, best_success_rate = resume_training(opts, model, encoder, optimizer)

    # if a secondary exp name is specified, this is useful when resuming from a previous saved
    # experiment and save to another experiment, e.g., pre-trained on synthetic data and fine-tune on real data
    if opts.exp_name_secondary:
        opts.exp_name += opts.exp_name_secondary

    feature, img_spec = load_features(opts.img_feat_dir)

    if opts.test_submission:
        assert opts.resume, 'The model was not resumed before running for submission.'
        test_env = ('test', (R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size,
                                 splits=['test'], tokenizer=tok), Evaluation(['test'])))
        agent_kwargs = {
            'opts': opts,
            'env': test_env[1][0],
            'results_path': "",
            'encoder': encoder,
            'model': model,
            'feedback': opts.feedback
        }
        agent = PanoSeq2SeqAgent(**agent_kwargs)
        # setup trainer
        trainer = PanoSeq2SeqTrainer(opts, agent, optimizer)
        epoch = opts.start_epoch - 1
        trainer.eval(epoch, test_env)
        return

    # set up R2R environments
    if not opts.train_data_augmentation:
        train_env = R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed,
                                 splits=['train'], tokenizer=tok)
    else:
        train_env = R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed,
                                 splits=['synthetic'], tokenizer=tok)

    val_envs = {split: (R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size,
                                     splits=[split], tokenizer=tok), Evaluation([split]))
                for split in ['val_seen', 'val_unseen']}

    # create agent
    agent_kwargs = {
        'opts': opts,
        'env': train_env,
        'results_path': "",
        'encoder': encoder,
        'model': model,
        'feedback': opts.feedback
    }
    agent = PanoSeq2SeqAgent(**agent_kwargs)

    # setup trainer
    trainer = PanoSeq2SeqTrainer(opts, agent, optimizer, opts.train_iters_epoch)

    if opts.eval_beam or opts.eval_only:
        success_rate = []
        for val_env in val_envs.items():
            success_rate.append(trainer.eval(opts.start_epoch - 1, val_env, tb_logger=None))
        return

    # set up tensorboard logger
    tb_logger = set_tb_logger(opts.log_dir, opts.exp_name, opts.resume)

    best_success_rate = best_success_rate if opts.resume else 0.0

    for epoch in range(opts.start_epoch, opts.max_num_epochs + 1):
        trainer.train(epoch, train_env, tb_logger)

        if epoch % opts.eval_every_epochs == 0:
            success_rate = []
            for val_env in val_envs.items():
                success_rate.append(trainer.eval(epoch, val_env, tb_logger))

            success_rate_compare = success_rate[1]

            if is_experiment():
                # remember best val_seen success rate and save checkpoint
                is_best = success_rate_compare >= best_success_rate
                best_success_rate = max(success_rate_compare, best_success_rate)
                print("--> Highest val_unseen success rate: {}".format(best_success_rate))

                # save the model if it is the best so far
                save_checkpoint({
                    'opts': opts,
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'encoder_state_dict': encoder.state_dict(),
                    'best_success_rate': best_success_rate,
                    'optimizer': optimizer.state_dict(),
                    'max_episode_len': opts.max_episode_len,
                }, is_best, checkpoint_dir=opts.checkpoint_dir, name=opts.exp_name)

        if opts.train_data_augmentation and epoch == opts.epochs_data_augmentation:
            train_env = R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed,
                                     splits=['train'], tokenizer=tok)

    print("--> Finished training")
Example #8
0
def test():
    print('current directory', os.getcwd())
    os.chdir('..')
    print('current directory', os.getcwd())

    # os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    visible_gpu = "0"
    os.environ["CUDA_VISIBLE_DEVICES"] = visible_gpu

    args.name = 'SSM'
    args.attn = 'soft'
    args.train = 'listener'
    args.featdropout = 0.3
    args.angle_feat_size = 128
    args.feedback = 'sample'
    args.ml_weight = 0.2
    args.sub_out = 'max'
    args.dropout = 0.5
    args.optim = 'adam'
    args.lr = 3e-4
    args.iters = 80000
    args.maxAction = 15
    args.batchSize = 4
    args.target_batch_size = 4
    args.pe_dim = 128

    args.self_train = True
    args.aug = 'tasks/R2R/data/aug_paths.json'

    args.featdropout = 0.4
    args.iters = 200000

    if args.optim == 'rms':
        print("Optimizer: Using RMSProp")
        args.optimizer = torch.optim.RMSprop
    elif args.optim == 'adam':
        print("Optimizer: Using Adam")
        args.optimizer = torch.optim.Adam
    elif args.optim == 'sgd':
        print("Optimizer: sgd")
        args.optimizer = torch.optim.SGD

    TRAIN_VOCAB = 'tasks/R2R/data/train_vocab.txt'
    TRAINVAL_VOCAB = 'tasks/R2R/data/trainval_vocab.txt'

    IMAGENET_FEATURES = 'img_features/ResNet-152-imagenet.tsv'

    if args.features == 'imagenet':
        features = IMAGENET_FEATURES

    if args.fast_train:
        name, ext = os.path.splitext(features)
        features = name + "-fast" + ext

    print(args)

    def setup():
        torch.manual_seed(1)
        torch.cuda.manual_seed(1)
        # Check for vocabs
        if not os.path.exists(TRAIN_VOCAB):
            write_vocab(build_vocab(splits=['train']), TRAIN_VOCAB)
        if not os.path.exists(TRAINVAL_VOCAB):
            write_vocab(
                build_vocab(splits=['train', 'val_seen', 'val_unseen']),
                TRAINVAL_VOCAB)

    #
    setup()

    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)

    feat_dict = read_img_features(features)

    print('start extract keys...')
    featurized_scans = set(
        [key.split("_")[0] for key in list(feat_dict.keys())])
    print('keys extracted...')

    val_envs = {
        split: R2RBatch(feat_dict,
                        batch_size=args.batchSize,
                        splits=[split],
                        tokenizer=tok)
        for split in ['train', 'val_seen', 'val_unseen']
    }

    evaluators = {
        split: Evaluation([split], featurized_scans, tok)
        for split in ['train', 'val_seen', 'val_unseen']
    }

    learner = Learner(val_envs,
                      "",
                      tok,
                      args.maxAction,
                      process_num=4,
                      max_node=17,
                      visible_gpu=visible_gpu)
    learner.eval_init()

    ckpt = 'snap/%s/state_dict/ssm_ckpt' % args.name

    learner.load_eval(ckpt)

    results = learner.eval()
    loss_str = ''
    for key in results:
        evaluator = evaluators[key]
        result = results[key]

        score_summary, score_details = evaluator.score(result)

        loss_str += ", %s \n" % key

        for metric, val in score_summary.items():
            loss_str += ', %s: %.3f' % (metric, val)

        loss_str += '\n'

    print(loss_str)
Example #9
0
def train_val(path_type, max_episode_len, history, MAX_INPUT_LENGTH,
              feedback_method, n_iters, model_prefix, blind, args):
    ''' Train on the training set, and validate on seen and unseen splits. '''

    nav_graphs = setup(args.action_space, args.navigable_locs_path)
    # Create a batch training environment that will also preprocess text
    use_bert = (args.encoder_type
                in ['bert', 'vlbert'])  # for tokenizer and dataloader
    if use_bert:
        tok = BTokenizer(MAX_INPUT_LENGTH)
    else:
        vocab = read_vocab(TRAIN_VOCAB)
        tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH)
    #train_env = R2RBatch(features, batch_size=batch_size, splits=['train'], tokenizer=tok,
    #                     path_type=path_type, history=history, blind=blind)

    feature_store = Feature(features, args.panoramic)
    train_env = R2RBatch(feature_store,
                         nav_graphs,
                         args.panoramic,
                         args.action_space,
                         batch_size=args.batch_size,
                         splits=['train'],
                         tokenizer=tok,
                         path_type=path_type,
                         history=history,
                         blind=blind)

    # Creat validation environments
    #val_envs = {split: (R2RBatch(features, batch_size=batch_size, splits=[split],
    #            tokenizer=tok, path_type=path_type, history=history, blind=blind),
    #            Evaluation([split], path_type=path_type)) for split in ['val_seen', 'val_unseen']}

    val_envs = {
        split: (R2RBatch(feature_store,
                         nav_graphs,
                         args.panoramic,
                         args.action_space,
                         batch_size=args.batch_size,
                         splits=[split],
                         tokenizer=tok,
                         path_type=path_type,
                         history=history,
                         blind=blind), Evaluation([split],
                                                  path_type=path_type))
        for split in ['val_seen', 'val_unseen']
    }

    # Build models and train
    #enc_hidden_size = hidden_size//2 if bidirectional else hidden_size

    if args.encoder_type == 'vlbert':
        if args.pretrain_model_name is not None:
            print("Using the pretrained lm model from %s" %
                  (args.pretrain_model_name))
            encoder = DicEncoder(FEATURE_ALL_SIZE, args.enc_hidden_size,
                                 args.hidden_size, args.dropout_ratio,
                                 args.bidirectional, args.transformer_update,
                                 args.bert_n_layers, args.reverse_input,
                                 args.top_lstm, args.vl_layers, args.la_layers,
                                 args.bert_type)
            premodel = DicAddActionPreTrain.from_pretrained(
                args.pretrain_model_name)
            encoder.bert = premodel.bert
            encoder.drop = nn.Dropout(p=args.dropout_ratio)
            encoder.bert._resize_token_embeddings(
                len(tok))  # remember to resize tok embedding size
            encoder.bert.update_lang_bert, encoder.bert.config.update_lang_bert = args.transformer_update, args.transformer_update
            encoder.bert.update_add_layer, encoder.bert.config.update_add_layer = args.update_add_layer, args.update_add_layer
            encoder = encoder.cuda()

        else:
            encoder = DicEncoder(FEATURE_ALL_SIZE, args.enc_hidden_size,
                                 args.hidden_size, args.dropout_ratio,
                                 args.bidirectional, args.transformer_update,
                                 args.bert_n_layers, args.reverse_input,
                                 args.top_lstm, args.vl_layers, args.la_layers,
                                 args.bert_type).cuda()
            encoder.bert._resize_token_embeddings(
                len(tok))  # remember to resize tok embedding size

    elif args.encoder_type == 'bert':
        if args.pretrain_model_name is not None:
            print("Using the pretrained lm model from %s" %
                  (args.pretrain_model_name))
            encoder = BertEncoder(args.enc_hidden_size, args.hidden_size,
                                  args.dropout_ratio, args.bidirectional,
                                  args.transformer_update, args.bert_n_layers,
                                  args.reverse_input, args.top_lstm,
                                  args.bert_type)
            premodel = BertForMaskedLM.from_pretrained(
                args.pretrain_model_name)
            encoder.bert = premodel.bert
            encoder.drop = nn.Dropout(p=args.dropout_ratio)
            encoder.bert._resize_token_embeddings(
                len(tok))  # remember to resize tok embedding size
            #encoder.bert.update_lang_bert, encoder.bert.config.update_lang_bert = args.transformer_update, args.transformer_update
            #encoder.bert.update_add_layer, encoder.bert.config.update_add_layer = args.update_add_layer, args.update_add_layer
            encoder = encoder.cuda()
            pdb.set_trace()
        else:
            encoder = BertEncoder(args.enc_hidden_size, args.hidden_size,
                                  args.dropout_ratio, args.bidirectional,
                                  args.transformer_update, args.bert_n_layers,
                                  args.reverse_input, args.top_lstm,
                                  args.bert_type).cuda()
            encoder.bert._resize_token_embeddings(len(tok))
    else:
        enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size
        encoder = EncoderLSTM(len(vocab),
                              word_embedding_size,
                              enc_hidden_size,
                              padding_idx,
                              dropout_ratio,
                              bidirectional=bidirectional).cuda()

    #decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(),
    #              action_embedding_size, args.hidden_size, args.dropout_ratio).cuda()
    ctx_hidden_size = args.enc_hidden_size * (2 if args.bidirectional else 1)
    if use_bert and not args.top_lstm:
        ctx_hidden_size = 768

    decoder = R2RAttnDecoderLSTM(Seq2SeqAgent.n_inputs(),
                                 Seq2SeqAgent.n_outputs(),
                                 action_embedding_size, ctx_hidden_size,
                                 args.hidden_size, args.dropout_ratio,
                                 FEATURE_SIZE, args.panoramic,
                                 args.action_space, args.dec_h_type).cuda()

    train(train_env,
          encoder,
          decoder,
          n_iters,
          path_type,
          history,
          feedback_method,
          max_episode_len,
          MAX_INPUT_LENGTH,
          model_prefix,
          val_envs=val_envs,
          args=args)
Example #10
0
File: train.py Project: ray-97/vnla
def train_val(seed=None):
    ''' Train on the training set, and validate on seen and unseen splits. '''

    # which GPU to use
    device = torch.device('cuda', hparams.device_id)

    # Resume from lastest checkpoint (if any)
    if os.path.exists(hparams.load_path): # only present in os if not first time. present in hparam but not in os
        ckpt = load(hparams.load_path, device)
        start_iter = ckpt['iter'] # iter is a key of ckpt object that gives start_iter
        # print("start_iter:")
        # print(start_iter)
        # input()
    else:
        if hasattr(args, 'load_path') and hasattr(args, 'eval_only') and args.eval_only:
            sys.exit('load_path %s does not exist!' % hparams.load_path) # exit only if no path and eval, can still train
        ckpt = None
        start_iter = 0
    end_iter = hparams.n_iters # from config

    # Setup seed and read vocab
    setup(seed=seed)

    train_vocab_path = os.path.join(hparams.data_path, 'train_vocab.txt')
    if hasattr(hparams, 'external_main_vocab') and hparams.external_main_vocab:
        train_vocab_path = hparams.external_main_vocab # external_main_vocab likely from command line arg if present

    # verbal advisor means vocab is a list of navigation action for the agent.
    if 'verbal' in hparams.advisor:
        subgoal_vocab_path = os.path.join(hparams.data_path, hparams.subgoal_vocab) # data/asknav/verbal_hard_vocab.txt
        vocab = read_vocab([train_vocab_path, subgoal_vocab_path])
    else:
        vocab = read_vocab([train_vocab_path])
    tok = Tokenizer(vocab=vocab, encoding_length=hparams.max_input_length) # tokenize vocab

    # Create a training environment
    train_env = VNLABatch(hparams, split='train', tokenizer=tok)

    # Create validation environments
    val_splits = ['val_seen', 'val_unseen']
    
    # eval_mode code
    eval_mode = hasattr(hparams, 'eval_only') and hparams.eval_only # if command line indicates eval and value of test seen/unseen
    if eval_mode:
        if '_unseen' in hparams.load_path:
            val_splits = ['test_unseen']
        if '_seen' in hparams.load_path:
            val_splits = ['test_seen']
        end_iter = start_iter + hparams.log_every
    # end

    # create object/dict containing envs, key is 'val_seen' or 'val_unseen' values are VNLABatch respectively.
    val_envs = { split: (VNLABatch(hparams, split=split, tokenizer=tok,
        from_train_env=train_env, traj_len_estimates=train_env.traj_len_estimates),
        Evaluation(hparams, [split], hparams.data_path)) for split in val_splits} # evaluate val for both seen and unseen

    # Build models
    model = AttentionSeq2SeqModel(len(vocab), hparams, device).to(device)

    optimizer = optim.Adam(model.parameters(), lr=hparams.lr,
        weight_decay=hparams.weight_decay)

    best_metrics = { 'val_seen'  : -1,
                     'val_unseen': -1,
                     'combined'  : -1 } # probably the best scores so far if ckpt has it

    # Load model parameters from a checkpoint (if any)
    if ckpt is not None:
        model.load_state_dict(ckpt['model_state_dict'])
        optimizer.load_state_dict(ckpt['optim_state_dict'])
        best_metrics = ckpt['best_metrics']
        train_env.ix = ckpt['data_idx']

    print('')
    pprint(vars(hparams), width=1)
    print('')
    print(model)

    # Initialize agent
    if 'verbal' in hparams.advisor:
        agent = VerbalAskAgent(model, hparams, device)
    elif hparams.advisor == 'direct':
        agent = AskAgent(model, hparams, device) # agent, as well as model (in attentionSeq2SeqModel), depends on whether the advisor is direct or hint / verbal

    # Train
    return train(train_env, val_envs, agent, model, optimizer, start_iter, end_iter,
          best_metrics, eval_mode) # eval mode has splits that gives different environments.
Example #11
0
def train_val_augment(test_only=False):
    """
    Train the listener with the augmented data
    """
    setup()
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=args.maxInput)

    feat_dict = read_img_features(features, test_only=test_only)

    if test_only:
        featurized_scans = None
        val_env_names = ['val_train_seen']
    else:
        featurized_scans = set(
            [key.split("_")[0] for key in list(feat_dict.keys())])
        val_env_names = ['val_train_seen', 'val_seen', 'val_unseen']

    if not args.test_obj:
        print('Loading compact pano-caffe object features ... (~3 seconds)')
        import pickle as pkl
        with open('img_features/objects/pano_object_class.pkl', 'rb') as f_pc:
            pano_caffe = pkl.load(f_pc)
    else:
        pano_caffe = None

    aug_path = args.aug

    # Create the training environment
    train_env = R2RBatch(feat_dict,
                         pano_caffe,
                         batch_size=args.batchSize,
                         splits=['train'],
                         tokenizer=tok)
    aug_env = R2RBatch(feat_dict,
                       pano_caffe,
                       batch_size=args.batchSize,
                       splits=[aug_path],
                       tokenizer=tok,
                       name='aug')

    stats = train_env.get_statistics()
    print("The training data_size is : %d" % train_env.size())
    print("The average instruction length of the dataset is %0.4f." %
          (stats['length']))
    print("The average action length of the dataset is %0.4f." %
          (stats['path']))
    stats = aug_env.get_statistics()
    print("The augmentation data size is %d" % aug_env.size())
    print("The average instruction length of the dataset is %0.4f." %
          (stats['length']))
    print("The average action length of the dataset is %0.4f." %
          (stats['path']))

    val_envs = {
        split:
        (R2RBatch(feat_dict,
                  pano_caffe,
                  batch_size=args.batchSize,
                  splits=[split],
                  tokenizer=tok), Evaluation([split], featurized_scans, tok))
        for split in val_env_names
    }

    train(train_env, tok, args.iters, val_envs=val_envs, aug_env=aug_env)