Example #1
0
    def train_model(self):
        gen_batch_index = DataIterator(np.arange(self.users_num), batch_size=self.batchSize_G, shuffle=True)
        dis_batch_index = DataIterator(np.arange(self.users_num), batch_size=self.batchSize_D, shuffle=True)
        totalEpochs = self.epochs
        totalEpochs = int(totalEpochs / self.step_G)
        for epoch in range(totalEpochs):
            train_matrix, ZR_matrix, PM_matrix = self.get_train_data()
            # training discriminator
            for d_epoch in range(self.step_D):
                for idx in dis_batch_index:
                    train_data = train_matrix[idx].toarray()
                    train_mask = PM_matrix[idx].toarray()
                    feed = {self.realData: train_data, self.mask: train_mask, self.condition: train_data}
                    self.sess.run(self.trainer_D, feed_dict=feed)

            # training generator
            for g_epoch in range(self.step_G):
                for idx in gen_batch_index:
                    train_data = train_matrix[idx].toarray()
                    train_z_mask = ZR_matrix[idx].toarray()
                    train_p_mask = PM_matrix[idx].toarray()
                    feed = {self.realData: train_data, self.condition: train_data,
                            self.mask: train_p_mask, self.G_ZR_dims: train_z_mask}
                    self.sess.run(self.trainer_G, feed_dict=feed)

                result = self.evaluate_model()
                print("%d_G:\t%s" % (epoch*self.step_G+g_epoch, result))
    def __init__(self, train_dir, test_dir, n_epochs, init_lr, gpu, local_rank,
                 lr_schedule_type, lr_schedule_param, dataset,
                 train_batch_size, test_batch_size, valid_size, opt_type,
                 opt_param, weight_decay, label_smoothing, no_decay_keys,
                 model_init, init_div_groups, validation_frequency,
                 print_frequency, train_iters):
        self.n_epochs = n_epochs
        self.init_lr = init_lr
        self.lr_schedule_type = lr_schedule_type
        self.lr_schedule_param = lr_schedule_param

        self.dataset = dataset
        self.train_batch_size = train_batch_size
        self.test_batch_size = test_batch_size
        self.valid_size = valid_size
        self.train_dir = train_dir
        self.test_dir = test_dir

        self.opt_type = opt_type
        self.opt_param = opt_param
        self.weight_decay = weight_decay
        self.label_smoothing = label_smoothing
        self.no_decay_keys = no_decay_keys

        self.model_init = model_init
        self.init_div_groups = init_div_groups
        self.validation_frequency = validation_frequency
        self.print_frequency = print_frequency

        self._data_provider = None
        self._train_iter, self._valid_iter, self._test_iter = None, None, None

        self.train_iters = train_iters
        self.val_iters = self.valid_size // self.test_batch_size
        print('test_batch_size={}, valid_size={}, val_iters={}'.\
            format(self.test_batch_size, self.valid_size, self.val_iters))

        # Prepare data
        train_loader = get_train_dataloader(self.train_dir,
                                            self.train_batch_size,
                                            shuffle=True)
        self.train_dataprovider = DataIterator(train_loader)
        val_loader = get_val_dataloader(self.test_dir, self.test_batch_size)
        self.val_dataprovider = DataIterator(val_loader)

        self.data_shape = (3, 224, 224)
        self.n_classes = 1000
        self.gpu = gpu
Example #3
0
    def evaluate(self, model):
        # B: batch size
        # N: the number of items
        test_users = DataIterator(list(self.user_pos_test.keys()), batch_size=2048, shuffle=False, drop_last=False)
        batch_result = []
        for batch_users in test_users:
            ranking_score = model.predict_for_eval(batch_users)  # (B,N)
            # set the ranking scores of training items to -inf,
            # then the training items will be sorted at the end of the ranking list.
            for idx, user in enumerate(batch_users):
                train_items = self.user_pos_train[user]
                ranking_score[idx][train_items] = -np.inf

            test_items = []
            for user in batch_users:
                # using 'dtype=np.intc' is to ensure the right of cpp backend
                u_items = np.array(self.user_pos_test[user], dtype=np.intc, copy=True)
                test_items.append(u_items)

            result = eval_score_matrix(ranking_score, test_items, top_k=50, thread_num=None)  # (B,k*metric_num)
            batch_result.append(result)

        # concatenate the batch results to a matrix
        all_user_result = np.concatenate(batch_result, axis=0)
        final_result = np.mean(all_user_result, axis=0)  # mean
        return final_result
Example #4
0
 def train_model(self):
     self.logger.info(self.evaluator.metrics_info())
     for epoch in range(self.epochs):
         users, pos_items, neg_items = csr_to_pairwise(self.train_matrix,
                                                       neg_num=self.neg_num,
                                                       fold_neg=True)
         data = DataIterator(users,
                             pos_items,
                             neg_items,
                             batch_size=self.batch_size,
                             shuffle=True)
         for batch_users, batch_pos_items, batch_neg_items in data:
             feed = {
                 self.user_h: batch_users,
                 self.pos_item_h: batch_pos_items,
                 self.neg_item_h: batch_neg_items
             }
             self.sess.run(self.update, feed_dict=feed)
         result = self.evaluate_model()
         self.logger.info("epoch %d:\t%s" % (epoch, result))
         # save params
         if (epoch + 1) % 50 == 0:
             params = self.sess.run(self.parameters)
             with open(
                     "%s_d=%d_e=%d_dnsbpr.pkl" %
                 (self.dataset.name, epoch, self.factors_num),
                     "wb") as fout:
                 pickle.dump(params, fout)
Example #5
0
    def __init__(self,
                 train_data,
                 test_data,
                 batch_size=32,
                 randomize=True,
                 n_tasks=5):

        self.it = DataIterator(train_data,
                               test_data,
                               batch_size,
                               randomize,
                               n_tasks=n_tasks)

        self.train_x = self.it.train_x
        self.train_y = self.it.train_y
        self.test_x = self.it.test_x
        self.test_y = self.it.test_y

        self.i = 0
        self.batch_size = batch_size

        self.n_tasks = n_tasks

        assert (n_tasks == 5)
        print("labels are 0/1, 2/3, 4/5, 6/7, 8/9")
        self.generate_tasks([[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]])
        self.img_fn = self.it.img_fn
        self.reshape_dims = (28 * 28, )
        self.switch_task(0)
Example #6
0
 def predict_for_eval(self, users):
     users = DataIterator(users, batch_size=1024, shuffle=False, drop_last=False)
     all_ratings = []
     for users in users:
         tmp_rating = self.sess.run(self.pre_logits, feed_dict={self.user_h: users})
         all_ratings.extend(tmp_rating)
     all_ratings = np.array(all_ratings, dtype=np.float32)
     return all_ratings
Example #7
0
 def __init__(self,
              indexer,
              trainPairs,
              trainLens,
              testPairs,
              testLens,
              batchSize=5,
              hiddenSize=10,
              nLayers=2,
              dropout=0.1,
              residual=True,
              lr=1e-4,
              enforcingRatio=0.5,
              clip=5.0,
              resultSavePath='mscoco/results.txt'):
     """
     
     Args:
         indexer: an Indexer object.
         trainPairs, testPairs: each is a list of pairs of word index list.
         trainLens, testLens: each is a list of pairs of length of word index list.
         batchSize: int. (default=5)
         hiddenSize: int. (default=10)
         nLayers: number of GRU stacking layers. (default=2)
         dropout: dropout rate. (default=0.1)
         residual: boolean, whether to establish residual links. (default=True)
         lr: learning rate, float. (default=1e-4 with Adam)
         enforcingRatio: the percentage of teacher-enforced training. (default=0.5)
         clip: gradient clip cap, float. (default=5.0)
         resultSavePath: (input,prediction,target) sentence triples file path.
     """
     self.indexer = indexer
     self.trainIter = DataIterator(indexer, trainPairs, trainLens)
     self.testIter = DataIterator(indexer, testPairs, testLens)
     self.batchSize = batchSize
     self.hiddenSize = hiddenSize
     self.nLayers = nLayers
     self.dropout = dropout
     self.residual = residual
     self.lr = lr
     self.enforcingRatio = enforcingRatio
     self.clip = clip
     self.resultSavePath = resultSavePath
     self._build_model()
Example #8
0
    def train_model(self):
        self.logger.info(self.evaluator.metrics_info())
        for epoch in range(self.epochs):
            users, pos_items, neg_items = csr_to_pairwise(self.train_matrix, neg_num=self.dns, fold_neg=True)
            data = DataIterator(users, pos_items, neg_items, batch_size=self.batch_size, shuffle=True)
            for user, pos_item, neg_item in data:
                feed = {self.user_h: user, self.pos_item_h: pos_item, self.neg_item_h: neg_item}
                self.sess.run(self.update, feed_dict=feed)

            result = self.evaluate_model()
            self.logger.info("epoch %d:\t%s" % (epoch, result))
Example #9
0
 def predict_for_eval(self, users):
     users_iter = DataIterator(users, batch_size=1024, shuffle=False)
     all_ratings = []
     for batch in users_iter:
         eval_data = self.train_matrix[batch].toarray()
         tmp_rating = self.sess.run(self.G_output, feed_dict={self.condition: eval_data})
         all_ratings.extend(tmp_rating)
     all_ratings = np.array(all_ratings, dtype=np.float32)
     if self.mode == "itemBased":
         all_ratings = np.transpose(all_ratings)
     return all_ratings
Example #10
0
    def get_train_data(self):
        users_list = []
        items_list = []
        for user, items in self.user_pos_train.items():
            users_list.extend([user] * len(items))
            items_list.extend(items)

        dataloader = DataIterator(users_list,
                                  items_list,
                                  batch_size=self.batch_size,
                                  shuffle=True)
        return dataloader
Example #11
0
 def predict_for_eval(self, users):
     users = DataIterator(users,
                          batch_size=1024,
                          shuffle=False,
                          drop_last=False)
     all_ratings = []
     for bat_user in users:
         eval_data = self.train_matrix[bat_user].toarray()
         feed = {self.uid_ph: bat_user, self.input_ph: eval_data}
         tmp_rating = self.sess.run(self.output, feed_dict=feed)
         all_ratings.extend(tmp_rating)
     all_ratings = np.array(all_ratings, dtype=np.float32)
     return all_ratings
Example #12
0
    def get_train_data(self):
        users, pos_items, neg_items = csr_to_pairwise(self.train_matrix, neg_num=1, fold_neg=False)
        users_list, items_list, labels_list = [], [], []
        users_list.extend(users)
        items_list.extend(pos_items)
        labels_list.extend([1]*len(pos_items))

        users_list.extend(users)
        items_list.extend(neg_items)
        labels_list.extend([0] * len(pos_items))

        dataloader = DataIterator(users_list, items_list, labels_list, batch_size=self.batch_size, shuffle=True)
        return dataloader
Example #13
0
    def _pre_training(self):
        # pretrain
        self.logger.info("Pre-training")
        for epoch in range(self.adv_epoch):
            users, pos_items, neg_items = csr_to_pairwise(self.train_matrix, neg_num=self.dns, fold_neg=True)
            data = DataIterator(users, pos_items, neg_items, batch_size=self.batch_size, shuffle=True)
            for user_input, item_input_pos, item_dns_list in data:
                feed_dict = {self.user_input: user_input,
                             self.item_input_pos: item_input_pos,
                             self.item_input_neg: item_dns_list}
                self.sess.run(self.bpr_optimizer, feed_dict)

            result = self.evaluate_model()
            self.logger.info("%d:\t%s" % (epoch, result))
Example #14
0
    def _adversarial_training(self):
        # adversarial training
        self.logger.info("Adversarial training")
        for epoch in range(self.adv_epoch, self.epochs):
            users, pos_items, neg_items = csr_to_pairwise(self.train_matrix, neg_num=1, fold_neg=True)
            data = DataIterator(users, pos_items, neg_items, batch_size=self.batch_size, shuffle=True)
            for user_input, item_input_pos, item_input_neg in data:
                feed_dict = {self.user_input: user_input,
                             self.item_input_pos: item_input_pos,
                             self.item_input_neg: item_input_neg,
                             self.steps: epoch}

                self.sess.run([self.update_P, self.update_Q], feed_dict)
                self.sess.run(self.amf_optimizer, feed_dict)

            result = self.evaluate_model()
            self.logger.info("%d:\t%s" % (epoch, result))
Example #15
0
    def train_model(self):
        data = DataIterator(np.arange(self.users_num),
                            batch_size=self.batch_size,
                            shuffle=True)
        for epoch in range(self.epochs):
            corrupt_input, mask = self.get_train_data()
            for bat_user in data:
                train_data = corrupt_input[bat_user].toarray()
                train_mask = mask[bat_user].toarray()
                labels = self.train_matrix[bat_user].toarray()
                feed = {
                    self.uid_ph: bat_user,
                    self.input_ph: train_data,
                    self.mask_ph: train_mask,
                    self.label_ph: labels
                }
                self.sess.run(self.update_opt, feed_dict=feed)

            result = self.evaluate_model()
            self.logger.info("epoch %d:\t%s" % (epoch, result))
Example #16
0
    # print(len(true_label_list))
    # print(pred_label_list)
    # print(true_label_list)
    # save_weight = np.array([1.03283219, 0.97672083, 0.94315084])
    # pred_logit_list = save_weight * np.array(pred_logit_list)
    # pred_label_list = np.argmax(pred_logit_list, axis=1) - 1
    # dev_df=pd.read_csv(config.data_process + 'processed_data/new_dev_df.csv',encoding='utf_8_sig')
    # dev_df['pred_label']=pred_label_list
    # dev_df.to_csv(config.data_process+'compare_result.csv',index=False,encoding='utf_8_sig')


if __name__ == '__main__':
    config = Config()
    vocab_file = config.vocab_file
    do_lower_case = False
    tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file,
                                           do_lower_case=do_lower_case)
    print('Predicting test.txt..........')
    dev_iter = DataIterator(config.batch_size,
                            data_file=config.data_process +
                            'processed_data/new_dev_df.csv',
                            use_bert=config.use_bert,
                            seq_length=config.sequence_length,
                            is_test=True,
                            tokenizer=tokenizer)
    # print('Predicting dev.txt..........')
    # dev_iter = DataIterator(config.batch_size, data_file=result_data_dir + 'dev.txt', use_bert=config.use_bert,
    #                         seq_length=config.sequence_length, is_test=True, tokenizer=tokenizer)

    set_test(dev_iter, config.checkpoint_path)
Example #17
0
        final_ev_dict[key] = dict()
        for e_key in ev.keys():
            final_ev_dict[key][e_key] = round(sum(ev[e_key]) / len(ev[e_key]), 4)
    # print(final_ev_dict)
    for key in final_ev_dict.keys():
        ev_p = final_ev_dict[key]['precision']
        ev_r = final_ev_dict[key]['recall']
        ev_f1 = final_ev_dict[key]['f1-score']
        print(key, ev_p, ev_r, ev_f1)

    f1 = f1 / len(target_names)
    precision = precision / len(target_names)
    recall = recall / len(target_names)

    print('{:.4f} {:.4f} {:.4f}'.format(precision, recall, f1))
    with open(result_data_dir + 'result.json', 'w', encoding='utf—8') as f:
        json.dump(pred_answer, f, ensure_ascii=False)


if __name__ == '__main__':
    config = Config()
    tokenizer = BertTokenizer.from_pretrained(pretrained_model_name_or_path=config.model_path,
                                              do_lower_case=True,
                                              never_split=["[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]"])
    print('Predicting test.txt..........')
    dev_iter = DataIterator(config.batch_size,
                            config.processed_data + 'dev.txt',
                            pretrainning_model=config.pretrainning_model,
                            seq_length=config.sequence_length, is_test=True, tokenizer=tokenizer)
    set_test(dev_iter, config.checkpoint_path)
Example #18
0
def comput_p(true_list, pred_list):
    c = 0
    for i in range(len(true_list)):
        if true_list[i] == pred_list[i]:
            c += 1
    return c / (i + 1)


if __name__ == '__main__':
    config = Config()
    vocab_file = config.vocab_file  # 通用词典
    do_lower_case = True
    tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file,
                                           do_lower_case=do_lower_case)
    re_tokenzier = Tokenizer(vocab_file, do_lower_case)
    train_iter = DataIterator(config.batch_size,
                              data_file=config.process + 'train.csv',
                              use_bert=config.use_bert,
                              tokenizer=tokenizer,
                              seq_length=config.sequence_length,
                              config=config)

    dev_iter = DataIterator(config.batch_size,
                            data_file=config.process + 'dev.csv',
                            use_bert=config.use_bert,
                            tokenizer=tokenizer,
                            seq_length=config.sequence_length,
                            is_test=True,
                            config=config)
    train(train_iter, dev_iter, config)
#fold2 new_answer
Example #19
0
        questionlen_list.extend(querylen_list)
        allmaping_list.extend(mapping_list)
        context_list.extend(text_list)
        cls_prob_list.extend(pred_c)

    pred_answer, C = refind_answer(predict_df, all_uid_list, start_prob_list,
                                   end_prob_list, questionlen_list,
                                   allmaping_list, context_list, cls_prob_list)
    predict_df['answer'] = pred_answer
    predict_df.to_csv(config.processed_data + 'result_dev.csv')


if __name__ == '__main__':
    config = Config()
    tokenizer = BertTokenizer.from_pretrained(
        pretrained_model_name_or_path=config.model_path,
        do_lower_case=True,
        never_split=["[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]"])

    test_iter = DataIterator(config.batch_size,
                             data_file=config.processed_data + 'test.csv',
                             config=config,
                             tokenizer=tokenizer)

    predict_file = config.processed_data + 'NCPPolicies_test.csv'
    print('Predicting {}..........'.format(str(predict_file)))

    test_df = pd.read_csv(predict_file, sep='\t', error_bad_lines=False)

    set_test(test_iter, config.checkpoint_path, test_df)
Example #20
0
    :return:
    """
    # 计算每行的最大值
    row_max = x.max(axis=axis)

    # 每行元素都需要减去对应的最大值,否则求exp(x)会溢出,导致inf情况
    row_max = row_max.reshape(-1, 1)
    x = x - row_max
    # 计算e的指数次幂
    x_exp = np.exp(x)
    x_sum = np.sum(x_exp, axis=axis, keepdims=True)
    s = x_exp / x_sum
    return s


if __name__ == '__main__':
    config = Config()
    tokenizer = BertTokenizer.from_pretrained(
        pretrained_model_name_or_path=config.model_path,
        do_lower_case=False,
        never_split=["[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]"])
    train_iter = DataIterator(config.batch_size,
                              data_file=config.processed_data + 'train.csv',
                              config=config,
                              tokenizer=tokenizer)
    dev_iter = DataIterator(config.batch_size,
                            data_file=config.processed_data + 'dev.csv',
                            config=config,
                            tokenizer=tokenizer)
    train(train_iter, dev_iter, config=config)
Example #21
0
def prepare_data_mlc(gold_fraction, corruption_prob, corruption_type, args):
    from load_corrupted_data import CIFAR10, CIFAR100

    mean = [x / 255 for x in [125.3, 123.0, 113.9]]
    std = [x / 255 for x in [63.0, 62.1, 66.7]]

    train_transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(32, padding=4),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
    test_transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize(mean, std)])

    # since cifar10 and cifar100 have no official validation split, use gold as valid also
    if args.dataset == 'cifar10':
        train_data_gold = CIFAR10(args.data_path,
                                  True,
                                  True,
                                  gold_fraction,
                                  corruption_prob,
                                  args.corruption_type,
                                  transform=train_transform,
                                  download=True,
                                  distinguish_gold=False,
                                  seed=args.seed)
        train_data_silver = CIFAR10(
            args.data_path,
            True,
            False,
            gold_fraction,
            corruption_prob,
            args.corruption_type,
            transform=train_transform,
            download=True,
            shuffle_indices=train_data_gold.shuffle_indices,
            seed=args.seed,
            distinguish_gold=False,
            weaklabel=args.weaklabel)  # note here for the change
        train_data_gold_deterministic = CIFAR10(
            args.data_path,
            True,
            True,
            gold_fraction,
            corruption_prob,
            args.corruption_type,
            transform=test_transform,
            download=True,
            shuffle_indices=train_data_gold.shuffle_indices,
            distinguish_gold=False,
            seed=args.seed)
        test_data = CIFAR10(args.data_path,
                            train=False,
                            transform=test_transform,
                            download=True,
                            distinguish_gold=False,
                            seed=args.seed)

        # same as gold
        valid_data = CIFAR10(args.data_path,
                             True,
                             True,
                             gold_fraction,
                             corruption_prob,
                             args.corruption_type,
                             transform=train_transform,
                             download=True,
                             distinguish_gold=False,
                             seed=args.seed)

        num_classes = 10

    elif args.dataset == 'cifar100':
        train_data_gold = CIFAR100(args.data_path,
                                   True,
                                   True,
                                   gold_fraction,
                                   corruption_prob,
                                   args.corruption_type,
                                   transform=train_transform,
                                   download=True,
                                   distinguish_gold=False,
                                   seed=args.seed)
        train_data_silver = CIFAR100(
            args.data_path,
            True,
            False,
            gold_fraction,
            corruption_prob,
            args.corruption_type,
            transform=train_transform,
            download=True,
            shuffle_indices=train_data_gold.shuffle_indices,
            seed=args.seed,
            distinguish_gold=False,
            weaklabel=args.weaklabel)  # note the weaklabel arg
        train_data_gold_deterministic = CIFAR100(
            args.data_path,
            True,
            True,
            gold_fraction,
            corruption_prob,
            args.corruption_type,
            transform=test_transform,
            download=True,
            shuffle_indices=train_data_gold.shuffle_indices,
            distinguish_gold=False,
            seed=args.seed)
        test_data = CIFAR100(args.data_path,
                             train=False,
                             transform=test_transform,
                             download=True,
                             distinguish_gold=False,
                             seed=args.seed)

        # same as gold
        valid_data = CIFAR100(args.data_path,
                              True,
                              True,
                              gold_fraction,
                              corruption_prob,
                              args.corruption_type,
                              transform=train_transform,
                              download=True,
                              distinguish_gold=False,
                              seed=args.seed)

        num_classes = 100

    gold_sampler = None
    silver_sampler = None
    valid_sampler = None
    test_sampler = None
    batch_size = args.bs

    train_gold_loader = DataIterator(
        torch.utils.data.DataLoader(train_data_gold,
                                    batch_size=batch_size,
                                    shuffle=(gold_sampler is None),
                                    num_workers=args.prefetch,
                                    pin_memory=True,
                                    sampler=gold_sampler))
    train_silver_loader = torch.utils.data.DataLoader(
        train_data_silver,
        batch_size=batch_size,
        shuffle=(silver_sampler is None),
        num_workers=args.prefetch,
        pin_memory=True,
        sampler=silver_sampler)
    valid_loader = torch.utils.data.DataLoader(valid_data,
                                               batch_size=batch_size,
                                               shuffle=(valid_sampler is None),
                                               num_workers=args.prefetch,
                                               pin_memory=True,
                                               sampler=valid_sampler)
    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=batch_size,
                                              shuffle=(test_sampler is None),
                                              num_workers=args.prefetch,
                                              pin_memory=True,
                                              sampler=test_sampler)

    return train_gold_loader, train_silver_loader, valid_loader, test_loader, num_classes
Example #22
0
    """


if __name__ == '__main__':
    config = Config()
    # 得到emb矩阵
    print('loading word2vec mat1...')
    embeddings_1 = gensim.models.KeyedVectors.load_word2vec_format(config.model_path + 'w2v_model/category_256.txt',
                                                                   binary=False)
    print('loading word2vec mat2...')
    embeddings_2 = gensim.models.KeyedVectors.load_word2vec_format(config.model_path + 'w2v_model/industry_256.txt',
                                                                   binary=False)
    print('loading word2vec mat3...')
    embeddings_3 = gensim.models.KeyedVectors.load_word2vec_format(config.model_path + 'w2v_model/product_256.txt',
                                                                   binary=False)
    print('loading word2vec mat4...')
    embeddings_4 = gensim.models.KeyedVectors.load_word2vec_format(config.model_path + 'w2v_model/advertiser_256.txt',
                                                                   binary=False)
    print('loading word2vec mat5...')

    embeddings_5 = gensim.models.KeyedVectors.load_word2vec_format(config.model_path + 'w2v_model/creative_256.txt',
                                                                   binary=False)
    print('model_loadding done')

    # dev_iter = DataIterator(config.test_batch_size, data_file=config.data_processed + 'new_test_{}_{}.csv'.format(start,end),
    #                         use_bert=config.use_bert,seq_length=config.sequence_length, is_test=True, tokenizer=tokenizer)
    dev_iter = DataIterator(config.batch_size, embeddings_1, embeddings_2, embeddings_3, embeddings_4, embeddings_5,
                             data_file=config.corpus_path + 'test.csv', seq_length=config.sequence_length,config=config)

    set_test(dev_iter, config.checkpoint_path)
class RunConfig:
    def __init__(self, train_dir, test_dir, n_epochs, init_lr, gpu, local_rank,
                 lr_schedule_type, lr_schedule_param, dataset,
                 train_batch_size, test_batch_size, valid_size, opt_type,
                 opt_param, weight_decay, label_smoothing, no_decay_keys,
                 model_init, init_div_groups, validation_frequency,
                 print_frequency, train_iters):
        self.n_epochs = n_epochs
        self.init_lr = init_lr
        self.lr_schedule_type = lr_schedule_type
        self.lr_schedule_param = lr_schedule_param

        self.dataset = dataset
        self.train_batch_size = train_batch_size
        self.test_batch_size = test_batch_size
        self.valid_size = valid_size
        self.train_dir = train_dir
        self.test_dir = test_dir

        self.opt_type = opt_type
        self.opt_param = opt_param
        self.weight_decay = weight_decay
        self.label_smoothing = label_smoothing
        self.no_decay_keys = no_decay_keys

        self.model_init = model_init
        self.init_div_groups = init_div_groups
        self.validation_frequency = validation_frequency
        self.print_frequency = print_frequency

        self._data_provider = None
        self._train_iter, self._valid_iter, self._test_iter = None, None, None

        self.train_iters = train_iters
        self.val_iters = self.valid_size // self.test_batch_size
        print('test_batch_size={}, valid_size={}, val_iters={}'.\
            format(self.test_batch_size, self.valid_size, self.val_iters))

        # Prepare data
        train_loader = get_train_dataloader(self.train_dir,
                                            self.train_batch_size,
                                            shuffle=True)
        self.train_dataprovider = DataIterator(train_loader)
        val_loader = get_val_dataloader(self.test_dir, self.test_batch_size)
        self.val_dataprovider = DataIterator(val_loader)

        self.data_shape = (3, 224, 224)
        self.n_classes = 1000
        self.gpu = gpu

    @property
    def config(self):
        config = {}
        for key in self.__dict__:
            if not key.startswith('_'):
                config[key] = self.__dict__[key]
        return config

    def copy(self):
        return RunConfig(**self.config)

    """ learning rate """

    def _calc_learning_rate(self, epoch, batch=0, nBatch=None):
        if self.lr_schedule_type == 'cosine':
            T_total = self.n_epochs * nBatch
            T_cur = epoch * nBatch + batch
            lr = 0.5 * self.init_lr * (1 + math.cos(math.pi * T_cur / T_total))
        else:
            raise ValueError('do not support: %s' % self.lr_schedule_type)
        return lr

    def adjust_learning_rate(self, optimizer, epoch, batch=0, nBatch=None):
        """ adjust learning of a given optimizer and return the new learning rate """
        new_lr = self._calc_learning_rate(epoch, batch, nBatch)
        for param_group in optimizer.param_groups:
            param_group['lr'] = new_lr
        return new_lr

    """ data provider """

    @property
    def data_config(self):
        raise NotImplementedError

    @property
    def data_provider(self):
        if self._data_provider is None:
            if self.dataset == 'imagenet':
                self._data_provider = self.train_dataprovider
            else:
                raise ValueError('do not support: %s' % self.dataset)
        return self._data_provider

    @data_provider.setter
    def data_provider(self, val):
        self._data_provider = val

    @property
    def train_loader(self):
        return self.train_dataprovider

    @property
    def valid_loader(self):
        return self.val_dataprovider

    @property
    def test_loader(self):
        return self.val_dataprovider

    @property
    def train_next_batch(self):
        try:
            images, labels = self.train_loader.next()
            images = Variable(images, requires_grad=False)
            labels = Variable(labels, requires_grad=False)
        except StopIteration:
            assert ('error')
        return images, labels

    @property
    def valid_next_batch(self):
        try:
            images, labels = self.val_dataprovider.next()
            images = Variable(images, requires_grad=False)
            labels = Variable(labels, requires_grad=False)
        except StopIteration:
            assert ('error')
        return images, labels

    @property
    def test_next_batch(self):
        try:
            images, labels = self.val_dataprovider.next()
            images = Variable(images, requires_grad=False)
            labels = Variable(labels, requires_grad=False)
        except StopIteration:
            assert ('error')
        return images, labels

    """ optimizer """

    def build_optimizer(self, net_params):
        if self.opt_type == 'sgd':
            opt_param = {} if self.opt_param is None else self.opt_param
            momentum, nesterov = opt_param.get('momentum', 0.9), opt_param.get(
                'nesterov', True)
            if self.no_decay_keys:
                optimizer = torch.optim.SGD([
                    {
                        'params': net_params[0],
                        'weight_decay': self.weight_decay
                    },
                    {
                        'params': net_params[1],
                        'weight_decay': 0
                    },
                ],
                                            lr=self.init_lr,
                                            momentum=momentum,
                                            nesterov=nesterov)
            else:
                optimizer = torch.optim.SGD(net_params,
                                            self.init_lr,
                                            momentum=momentum,
                                            nesterov=nesterov,
                                            weight_decay=self.weight_decay)
        else:
            raise NotImplementedError
        return optimizer
Example #24
0
        re.write(R)
    n_df = pd.DataFrame()
    id = list(pred_answer_dict.keys())
    answer = list(pred_answer_dict.values())
    n_df['id'] = id
    n_df['answer'] = answer
    no_df = n_df[n_df['answer'] == '']
    print('{}个没找到答案'.format(no_df.__len__()))
    no_df.to_csv('/'.join(config.checkpoint_path.split('/')[:-1]) +
                 '/no_answer.csv',
                 index=False,
                 encoding='utf_8_sig')


if __name__ == '__main__':
    config = Config()
    vocab_file = config.vocab_file
    do_lower_case = False
    tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file,
                                           do_lower_case=do_lower_case)
    print('Predicting test.txt..........')
    dev_iter = DataIterator(
        config.test_batch_size,
        # data_file=Config().data + 'test_mrc.csv',
        config.process + 'test.csv',
        use_bert=config.use_bert,
        seq_length=config.sequence_length,
        is_test=True,
        tokenizer=tokenizer,
        config=config)
    set_test(dev_iter, config.checkpoint_path)
Example #25
0
    age_auc = accuracy_score(true_age_list, pred_age_list)
    gender_auc = accuracy_score(true_gender_list, pred_gender_list)
    print('focal_auc {}, age_auc {}, gender_auc {}'.format(
        age_auc + gender_auc, age_auc, gender_auc))

    return age_auc, gender_auc


if __name__ == '__main__':
    config = Config()
    vocab_file = config.vocab_file  # 通用词典
    do_lower_case = False
    tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file,
                                           do_lower_case=do_lower_case)
    train_iter = DataIterator(config.batch_size,
                              data_file=config.data_processed +
                              'new_train.csv',
                              use_bert=config.use_bert,
                              tokenizer=tokenizer,
                              seq_length=config.sequence_length)

    dev_iter = DataIterator(config.batch_size,
                            data_file=config.data_processed + 'new_dev.csv',
                            use_bert=config.use_bert,
                            tokenizer=tokenizer,
                            seq_length=config.sequence_length,
                            is_test=True)

    train(train_iter, dev_iter, config)
Example #26
0
def prepare_data_mwnet(gold_fraction, corruption_prob, corruption_type, args):
    from load_corrupted_data_mlg import CIFAR10, CIFAR100
    normalize = transforms.Normalize(
        mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
        std=[x / 255.0 for x in [63.0, 62.1, 66.7]])
    if True:  # no augment as used by mwnet
        train_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Lambda(lambda x: F.pad(x.unsqueeze(0), (4, 4, 4, 4),
                                              mode='reflect').squeeze()),
            transforms.ToPILImage(),
            transforms.RandomCrop(32),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
    else:
        train_transform = transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])
    test_transform = transforms.Compose([transforms.ToTensor(), normalize])

    args.num_meta = int(50000 * gold_fraction)

    if args.dataset == 'cifar10':
        num_classes = 10

        train_data_meta = CIFAR10(root=args.data_path,
                                  train=True,
                                  meta=True,
                                  num_meta=args.num_meta,
                                  corruption_prob=corruption_prob,
                                  corruption_type=args.corruption_type,
                                  transform=train_transform,
                                  download=True)
        train_data = CIFAR10(root=args.data_path,
                             train=True,
                             meta=False,
                             num_meta=args.num_meta,
                             corruption_prob=corruption_prob,
                             corruption_type=args.corruption_type,
                             transform=train_transform,
                             download=True,
                             seed=args.seed)
        test_data = CIFAR10(root=args.data_path,
                            train=False,
                            transform=test_transform,
                            download=True)

        valid_data = CIFAR10(root=args.data_path,
                             train=True,
                             meta=True,
                             num_meta=args.num_meta,
                             corruption_prob=corruption_prob,
                             corruption_type=args.corruption_type,
                             transform=train_transform,
                             download=True)

    elif args.dataset == 'cifar100':
        num_classes = 100

        train_data_meta = CIFAR100(root=args.data_path,
                                   train=True,
                                   meta=True,
                                   num_meta=args.num_meta,
                                   corruption_prob=corruption_prob,
                                   corruption_type=args.corruption_type,
                                   transform=train_transform,
                                   download=True)
        train_data = CIFAR100(root=args.data_path,
                              train=True,
                              meta=False,
                              num_meta=args.num_meta,
                              corruption_prob=corruption_prob,
                              corruption_type=args.corruption_type,
                              transform=train_transform,
                              download=True,
                              seed=args.seed)
        test_data = CIFAR100(root=args.data_path,
                             train=False,
                             transform=test_transform,
                             download=True)

        valid_data = CIFAR100(root=args.data_path,
                              train=True,
                              meta=True,
                              num_meta=args.num_meta,
                              corruption_prob=corruption_prob,
                              corruption_type=args.corruption_type,
                              transform=train_transform,
                              download=True)

    train_gold_loader = DataIterator(
        torch.utils.data.DataLoader(train_data_meta,
                                    batch_size=args.bs,
                                    shuffle=True,
                                    num_workers=args.prefetch,
                                    pin_memory=True))
    train_silver_loader = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.bs,
        shuffle=True,
        num_workers=args.prefetch,
        pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(valid_data,
                                               batch_size=args.bs,
                                               shuffle=True,
                                               num_workers=args.prefetch,
                                               pin_memory=True)
    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=args.bs,
                                              shuffle=False,
                                              num_workers=args.prefetch,
                                              pin_memory=True)

    return train_gold_loader, train_silver_loader, valid_loader, test_loader, num_classes
Example #27
0
def prepare_data(args):
    num_classes = 14

    # resnet recommended normalization
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    # transform
    # Note: rescaling to 224 and center-cropping already processed in img folders
    transform = transforms.Compose([
        transforms.ToTensor(),  # to [0,1]
        normalize
    ])

    train_data_gold = torchvision.datasets.ImageFolder(
        'data/clothing1M/clean_train', transform=transform)
    train_data_silver = torchvision.datasets.ImageFolder(
        'data/clothing1M/noisy_train', transform=transform)
    val_data = torchvision.datasets.ImageFolder('data/clothing1M/clean_val',
                                                transform=transform)
    test_data = torchvision.datasets.ImageFolder('data/clothing1M/clean_test',
                                                 transform=transform)

    # fix class idx to equal to class name
    _fix_cls_to_idx(train_data_gold)
    _fix_cls_to_idx(train_data_silver)
    _fix_cls_to_idx(val_data)
    _fix_cls_to_idx(test_data)

    gold_sampler = None
    silver_sampler = None
    val_sampler = None
    test_sampler = None
    batch_size = args.bs

    train_gold_loader = DataIterator(
        torch.utils.data.DataLoader(train_data_gold,
                                    batch_size=batch_size,
                                    shuffle=(gold_sampler is None),
                                    num_workers=args.prefetch,
                                    pin_memory=True,
                                    sampler=gold_sampler))
    train_silver_loader = torch.utils.data.DataLoader(
        train_data_silver,
        batch_size=batch_size,
        shuffle=(silver_sampler is None),
        num_workers=args.prefetch,
        pin_memory=True,
        sampler=silver_sampler)
    val_loader = torch.utils.data.DataLoader(val_data,
                                             batch_size=batch_size,
                                             shuffle=(val_sampler is None),
                                             num_workers=args.prefetch,
                                             pin_memory=True,
                                             sampler=val_sampler)
    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=batch_size,
                                              shuffle=(test_sampler is None),
                                              num_workers=args.prefetch,
                                              pin_memory=True,
                                              sampler=test_sampler)

    return train_gold_loader, train_silver_loader, val_loader, test_loader, num_classes
Example #28
0
        cls_pre = np.argmax(cls_probs, axis=-1)
        pred_label_list += list(cls_pre)
    print(len(pred_label_list))
    # print(pred_label_list)
    # print(true_label_list)

    test_result_pd = pd.read_csv(config.base_dir + 'dev.csv', encoding='utf8')
    test_result_pd['pred'] = pred_label_list
    true_list = test_result_pd['num_label'].tolist()
    from sklearn.metrics import f1_score
    F1 = f1_score(true_list, pred_label_list, average='micro')
    print('F1:', F1)
    test_result_pd.to_csv(config.base_dir + 'result.csv', index=False, encoding='utf-8')


if __name__ == '__main__':
    config = Config()
    vocab_file = config.vocab_file
    do_lower_case = False
    tokenizer = BertTokenizer.from_pretrained(pretrained_model_name_or_path=config.model_path,
                                              do_lower_case=True,
                                              never_split=["[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]"])
    print('Predicting test.txt..........')
    dev_iter = DataIterator(config.batch_size,
                            config.base_dir + 'dev.csv',
                            use_bert=config.use_bert,
                            seq_length=config.sequence_length, is_test=True, tokenizer=tokenizer)
    set_test(dev_iter, config.checkpoint_path)


        'y_pred_text': ldct_list_text
    }
    df = pd.DataFrame(dict_data)
    precision, recall, f1 = get_P_R_F(df)

    print('precision: {}, recall {}, f1 {}'.format(precision, recall, f1))

    return precision, recall


if __name__ == '__main__':
    config = Config()
    vocab_file = config.vocab_file  # 通用词典
    do_lower_case = False
    tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file,
                                           do_lower_case=do_lower_case)
    train_iter = DataIterator(config.batch_size,
                              data_file=result_data_dir + 'train.txt',
                              use_bert=config.use_bert,
                              tokenizer=tokenizer,
                              seq_length=config.sequence_length)
    print('GET!!')
    dev_iter = DataIterator(config.batch_size,
                            data_file=result_data_dir + 'dev.txt',
                            use_bert=config.use_bert,
                            tokenizer=tokenizer,
                            seq_length=config.sequence_length,
                            is_test=True)

    train(train_iter, dev_iter, config)
Example #30
0
class Seq2Seq:
    """Encoder-Decoder model with Luong attention, stacking and residual links."""
    def __init__(self,
                 indexer,
                 trainPairs,
                 trainLens,
                 testPairs,
                 testLens,
                 batchSize=5,
                 hiddenSize=10,
                 nLayers=2,
                 dropout=0.1,
                 residual=True,
                 lr=1e-4,
                 enforcingRatio=0.5,
                 clip=5.0,
                 resultSavePath='mscoco/results.txt'):
        """
        
        Args:
            indexer: an Indexer object.
            trainPairs, testPairs: each is a list of pairs of word index list.
            trainLens, testLens: each is a list of pairs of length of word index list.
            batchSize: int. (default=5)
            hiddenSize: int. (default=10)
            nLayers: number of GRU stacking layers. (default=2)
            dropout: dropout rate. (default=0.1)
            residual: boolean, whether to establish residual links. (default=True)
            lr: learning rate, float. (default=1e-4 with Adam)
            enforcingRatio: the percentage of teacher-enforced training. (default=0.5)
            clip: gradient clip cap, float. (default=5.0)
            resultSavePath: (input,prediction,target) sentence triples file path.
        """
        self.indexer = indexer
        self.trainIter = DataIterator(indexer, trainPairs, trainLens)
        self.testIter = DataIterator(indexer, testPairs, testLens)
        self.batchSize = batchSize
        self.hiddenSize = hiddenSize
        self.nLayers = nLayers
        self.dropout = dropout
        self.residual = residual
        self.lr = lr
        self.enforcingRatio = enforcingRatio
        self.clip = clip
        self.resultSavePath = resultSavePath
        self._build_model()

    def _build_model(self):
        """Specify computational graph."""
        self.encoder = EncoderRNN(self.indexer.size,
                                  self.hiddenSize,
                                  nLayers=self.nLayers,
                                  dropout=self.dropout)
        self.decoder = LuongDecoderRNN(self.hiddenSize,
                                       self.indexer.size,
                                       nLayers=self.nLayers,
                                       dropout=self.dropout,
                                       residual=self.residual)
        self.encoderOptim = optim.Adam(self.encoder.parameters(), self.lr)
        self.decoderOptim = optim.Adam(self.decoder.parameters(), self.lr)

    def _model_config(self):
        return 'Vocab Size = ' + str(self.indexer.size) + '\n' + \
               'Train/Test Size = ' + str(self.trainIter.size)+'/'+str(self.testIter.size) + '\n' + \
               'batchSize = ' + str(self.batchSize) + '; hiddenSize = ' + str(self.hiddenSize) + '\n' + \
               'nLayers = ' + str(self.nLayers) + '; dropout = ' + str(self.dropout) + '\n' + \
               'residual = ' + str(self.residual) + '; learning rate = ' + str(self.lr) + '\n' + \
               'teacher enforce ratio = ' + str(self.enforcingRatio) + '; clip = ' + str(self.clip) + '\nn'

    def _train_step(self):
        """One step of training."""
        inputs, inputsLen, targets, targetsLen = self.trainIter.random_batch(
            self.batchSize)
        self.encoderOptim.zero_grad()
        self.decoderOptim.zero_grad()
        loss = 0
        # Run encoder
        encoderHidden = None
        encoderOutput, encoderHidden = self.encoder(inputs, inputsLen,
                                                    encoderHidden)
        # Run decoder
        decoderInput = Variable(
            torch.LongTensor([self.indexer.get_index('SOS')] * self.batchSize))
        decoderContext = Variable(
            torch.zeros(self.batchSize, self.decoder.hiddenSize))
        decoderHidden = encoderHidden
        enforce = random.random() < self.enforcingRatio
        maxTargetLen = max(targetsLen)
        decoderOutputAll = Variable(
            torch.zeros(maxTargetLen, self.batchSize, self.decoder.outputSize))
        # <mt-max,bc,vocab>
        for di in range(maxTargetLen):
            decoderOutput, decoderHidden, decoderContext, attentionWeights = self.decoder(
                decoderInput, decoderHidden, decoderContext, encoderOutput)
            decoderOutputAll[di] = decoderOutput
            if enforce:
                decoderInput = targets[di]  # <== targets is <mt,bc>
            else:
                topValues, topIndices = decoderOutput.data.topk(1)  # <bc,1>
                decoderInput = Variable(
                    topIndices.squeeze())  # <bc,1> -> <bc,>
        # Sequence cross entropy
        loss = batch_cross_entropy(
            decoderOutputAll.transpose(0, 1).contiguous(),
            targets.transpose(0, 1).contiguous(), targetsLen, self.batchSize)
        # Backprop
        loss.backward()
        torch.nn.utils.clip_grad_norm(self.encoder.parameters(), self.clip)
        torch.nn.utils.clip_grad_norm(self.decoder.parameters(), self.clip)
        self.encoderOptim.step()
        self.decoderOptim.step()
        return loss.data[0] / targetsLen

    def train(self, nEpochs=1, epochSize=100, printEvery=5):
        """Train on loaded data upon construction.
        
        Args:
            nEpochs: number of epochs.
            epochSize: number of batches trained in an epoch.
            printEvery: frequency of results report.
        """
        averageLoss = 0
        start = time.time()
        for e in range(nEpochs):
            epochLoss = 0
            for step in range(epochSize):
                loss = self._train_step()
                if step != 0 and step % printEvery == 0:
                    print("Step %d average loss = %.4f (time: %.2f)" % (
                        step,
                        loss.mean(),  # batch mean.
                        time.time() - start))
                    start = time.time()
                epochLoss += loss.mean()
            epochLoss /= epochSize
            averageLoss += epochLoss
            print("\nEpoch %d loss = %.4f\n" % (e + 1, epochLoss))
        averageLoss /= nEpochs
        print("\nGrand average loss = %.4f\n" % averageLoss)

    def _clear_special_tokens(self, words):
        """Clear all the PAD, UNK, SOS, EOS to avoid inflated BLEU.
        
        Args:
            words: a list of tokens.
        Returns:
            a list of tokens which are not special tokens.
        """
        return [
            word for word in words
            if word not in set(["PAD", "UNK", "SOS", "EOS"])
        ]

    def evaluate_pair(self, predWords, targetWords):
        """Compute the BLEU score of a prediction given a reference.
        
        Args:
            predWords: predicted words (a list of strings).
            targetWords: reference, same type as preWords.
        Returns:
            The BLEU score (uses = nltk.translate.bleu_score.sentence_bleu).
        """
        return bleu([self._clear_special_tokens(targetWords)],
                    self._clear_special_tokens(predWords))

    def evaluate_random(self, size, saveResults, printResults=True):
        """Randomly evaluate samples from the test set (which is loaded upon construction).
        
        Args:
            size: number of samples evaluated (as a single batch).
            printResults: print input, prediction and gold translation to console. (default=True)
        Returns:
            The average BLEU score in the batch.
        """
        inputs, inputsLen, targets, targetsLen = self.testIter.random_batch(
            size)
        # Run encoder
        encoderHidden = None
        encoderOutput, encoderHidden = self.encoder(inputs, inputsLen,
                                                    encoderHidden)
        # Run decoder
        decoderInput = Variable(
            torch.LongTensor([self.indexer.get_index('SOS')] * size))
        decoderContext = Variable(torch.zeros(size, self.decoder.hiddenSize))
        decoderHidden = encoderHidden
        maxTargetLen = max(targetsLen)
        predictions = []
        for di in range(maxTargetLen):
            decoderOutput, decoderHidden, decoderContext, attentionWeights = self.decoder(
                decoderInput, decoderHidden, decoderContext, encoderOutput)
            topValues, topIndices = decoderOutput.data.topk(1)  # <bc,1>
            decoderInput = Variable(topIndices.squeeze())  # <bc,1> -> <bc,>
            predictions.append(topIndices.view(-1).numpy())
        inputs = inputs.data.numpy().transpose()
        predictions = np.array(predictions).transpose()  # <mt,bc> -> <bc,mt>
        targets = targets.data.numpy().transpose()
        bleuList = []
        results = []
        for i, (input, pred,
                target) in enumerate(zip(inputs, predictions, targets)):
            predWords = self.indexer.to_words(pred)
            targetWords = self.indexer.to_words(target)
            bleuCurr = self.evaluate_pair(predWords, targetWords)
            bleuList.append(bleuCurr)
            inputSent = self.indexer.to_sent(input)
            predSent = self.indexer.to_sent(pred)
            targetSent = self.indexer.to_sent(target)
            results.append([inputSent, predSent, targetSent])
            if printResults:
                print("Example %d" % (i + 1))
                print("INPUT >> %s" % inputSent)
                print("PRED >> %s" % predSent)
                print("TRUE >> %s" % targetSent)
                print("[BLEU] %.2f\n" % bleuCurr)
        averageBleu = np.mean(bleuList)
        if saveResults:
            return averageBleu, results
        return averageBleu

    def evaluate(self, nBatches=10, saveResults=True):
        """Randomly evaluate a given number of batches.
        
        Args:
            nBatches: the number of random batches to be evaluated.
        """
        averageBleuList = []
        for i in range(nBatches):
            if saveResults:
                averageBleu, results = self.evaluate_random(self.batchSize,
                                                            saveResults,
                                                            printResults=False)
                averageBleuList.append(averageBleu)
                with open(self.resultSavePath, 'a') as f:
                    if i == 0:
                        f.write(self._model_config())
                    for input, pred, target in results:
                        f.write('INPUT  >> ' + input + '\n')
                        f.write('PRED   >> ' + pred + '\n')
                        f.write('TARGET >> ' + target + '\n\n')
            else:
                averageBleuList.append(
                    self.evaluate_random(self.batchSize,
                                         saveResults,
                                         printResults=False))
        print("Average BLEU score over %d examples is %.4f" %
              (self.batchSize * nBatches, np.mean(averageBleuList)))

    def evaluate_given(self, sent, maxLen=20):
        """Evaluate a give sentence.
        
        Args:
            sentence: a single string. OOVs are treated as UNKs.
            maxLen: the max number of decoding steps.
        """
        sent = sent.split()
        sentCode = [self.indexer.get_index(word, add=False) for word in sent]
        if any(i == -1 for i in sentCode):
            raise Exception("This sentence contains out of vocabulary words!")
        input = Variable(torch.LongTensor(sentCode)).view(-1, 1)
        inputLen = np.array([len(sentCode)])
        # Run encoder
        encoderHidden = None
        encoderOutput, encoderHidden = self.encoder(input, inputLen,
                                                    encoderHidden)
        # Run decoder
        decoderInput = Variable(
            torch.LongTensor([self.indexer.get_index('SOS')] * 1))
        decoderContext = Variable(torch.zeros(1, self.decoder.hiddenSize))
        decoderHidden = encoderHidden
        pred = []
        for di in range(maxLen):
            decoderOutput, decoderHidden, decoderContext, attentionWeights = self.decoder(
                decoderInput, decoderHidden, decoderContext, encoderOutput)
            topValues, topIndices = decoderOutput.data.topk(1)  # <bc,1>
            decoderInput = Variable(topIndices.squeeze())  # <bc,1> -> <bc,>
            predIndex = topIndices.view(-1).numpy()[0]
            if predIndex == self.indexer.get_index('EOS'):
                break
            pred.append(predIndex)
        print("INPUT >> %s" % ' '.join(sent))
        print("PRED >> %s\n" % ' '.join(self.indexer.to_words(pred)))