Beispiel #1
0
def test(model, args, data, mode='test'):
    if mode == 'dev':
        iterator = iter(data.dev_iter)
    else:
        iterator = iter(data.test_iter)

    criterion = nn.CrossEntropyLoss()
    model.eval()
    acc, loss, size = 0, 0, 0
    ids, scores, labels = [], [], []
    losses = []
    for batch in iterator:

        s1, s2 = getattr(batch, 'q1'), getattr(batch, 'q2')
        kwargs = {'p': s1, 'h': s2}

        if args.use_char_emb:
            char_p = Variable(torch.LongTensor(data.characterize(s1)))
            char_h = Variable(torch.LongTensor(data.characterize(s2)))

            if args.gpu > -1:
                char_p = char_p.cuda(args.gpu)
                char_h = char_h.cuda(args.gpu)

            kwargs['char_p'] = char_p
            kwargs['char_h'] = char_h

        pred = model(**kwargs)

        batch_loss = criterion(pred, batch.label)
        losses.append(batch_loss.item())
        '''
        _, pred = pred.max(dim=1)
        acc += (pred == batch.label).sum().float()
        size += len(pred)
        '''
        logit = pred.detach().cpu().numpy()
        label = batch.label.to('cpu').numpy()
        ids.extend(getattr(batch, 'id'))
        scores.append(logit)
        labels.append(label)

    labels = np.concatenate(labels, 0)
    scores = np.concatenate(scores, 0)
    # print(len(ids),labels.shape,scores.shape)
    eval_DOUBAN_MRR, eval_DOUBAN_mrr, eval_DOUBAN_MAP, eval_Precision1 = compute_DOUBAN(
        ids, scores, labels)

    # acc /= size
    # acc = acc.cpu().data[0]
    return np.mean(losses), eval_DOUBAN_MRR
Beispiel #2
0
    def test_eval(self):
        data = DATAUBUNTU(debug=False, data_dir=self.data_dir)
        test_examples = data.read_examples(
            os.path.join(self.data_dir, 'test.csv'))
        print('eval_examples的数量', len(test_examples))

        ID = [x.guid for x in test_examples]

        test_features = data.convert_examples_to_features(
            test_examples, self.tokenizer, self.max_seq_length)
        all_input_ids = torch.tensor(data.select_field(test_features,
                                                       'input_ids'),
                                     dtype=torch.long)
        all_input_mask = torch.tensor(data.select_field(
            test_features, 'input_mask'),
                                      dtype=torch.long)
        all_segment_ids = torch.tensor(data.select_field(
            test_features, 'segment_ids'),
                                       dtype=torch.long)
        all_utterance_mask = torch.tensor(data.select_field(
            test_features, 'utterance_mask'),
                                          dtype=torch.long)
        all_response_mask = torch.tensor(data.select_field(
            test_features, 'response_mask'),
                                         dtype=torch.long)
        all_history_mask = torch.tensor(data.select_field(
            test_features, 'history_mask'),
                                        dtype=torch.long)

        all_label = torch.tensor([f.label for f in test_features],
                                 dtype=torch.long)

        test_data = TensorDataset(all_input_ids, all_input_mask,
                                  all_segment_ids, all_utterance_mask,
                                  all_response_mask, all_history_mask,
                                  all_label)
        # Run prediction for full data
        test_sampler = SequentialSampler(test_data)
        test_dataloader = DataLoader(test_data,
                                     sampler=test_sampler,
                                     batch_size=self.eval_batch_size)

        config = BertConfig.from_pretrained(self.model_name_or_path,
                                            num_labels=self.num_labels)
        model = BertForSequenceClassification.from_pretrained(os.path.join(
            self.output_dir, "pytorch_model.bin"),
                                                              self.args,
                                                              config=config)
        model.to(self.device)
        model.eval()

        inference_labels = []
        gold_labels = []
        scores = []

        for input_ids, input_mask, segment_ids, utterance_mask, response_mask, history_mask, label_ids in test_dataloader:
            input_ids = input_ids.to(self.device)
            input_mask = input_mask.to(self.device)
            segment_ids = segment_ids.to(self.device)
            utterance_mask = utterance_mask.to(self.device)
            response_mask = response_mask.to(self.device)
            history_mask = history_mask.to(self.device)
            label_ids = label_ids.to(self.device)

            with torch.no_grad():
                logits = model(
                    input_ids=input_ids,
                    token_type_ids=segment_ids,
                    attention_mask=input_mask,
                    utterance_mask=utterance_mask,
                    response_mask=response_mask,
                    history_mask=history_mask,
                ).detach().cpu().numpy()
            label_ids = label_ids.to('cpu').numpy()
            scores.append(logits)
            inference_labels.append(np.argmax(logits, axis=1))
            gold_labels.append(label_ids)
        gold_labels = np.concatenate(gold_labels, 0)
        scores = np.concatenate(scores, 0)
        logits = np.concatenate(inference_labels, 0)

        # 计算评价指标
        assert len(ID) == scores.shape[0] == scores.shape[0]
        eval_DOUBAN_MRR, eval_DOUBAN_mrr, eval_DOUBAN_MAP, eval_Precision1 = compute_DOUBAN(
            ID, scores, gold_labels)
        r_at_1 = r_at_k(ID, scores, gold_labels, 1)
        r_at_2 = r_at_k(ID, scores, gold_labels, 2)
        r_at_5 = r_at_k(ID, scores, gold_labels, 5)
        print(
            'eval_MRR',
            eval_DOUBAN_MRR,
            eval_DOUBAN_mrr,
            'eval_MAP',
            eval_DOUBAN_MAP,
            'eval_Precision1',
            eval_Precision1,
            'r10@1',
            r_at_1,
            'r10@2',
            r_at_2,
            'r10@5',
            r_at_5,
        )
Beispiel #3
0
    def train(self):
        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir)

        # logger.info(f'Fold {split_index + 1}')
        train_dataloader, eval_dataloader, train_examples, eval_examples = self.create_dataloader(
        )

        num_train_optimization_steps = self.train_steps

        # Prepare model
        config = BertConfig.from_pretrained(self.model_name_or_path,
                                            num_labels=self.num_labels)
        model = BertForSequenceClassification.from_pretrained(
            self.model_name_or_path, self.args, config=config)
        model.to(self.device)
        model.train()
        # Prepare optimizer
        param_optimizer = list(model.named_parameters())
        param_optimizer = [n for n in param_optimizer]

        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [{
            'params': [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            self.weight_decay
        }, {
            'params':
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            'weight_decay':
            0.0
        }]

        optimizer = AdamW(optimizer_grouped_parameters,
                          lr=self.learning_rate,
                          eps=self.adam_epsilon)
        scheduler = WarmupLinearSchedule(optimizer,
                                         warmup_steps=self.warmup_steps,
                                         t_total=self.train_steps)

        global_step = 0

        logger.info("***** Running training *****")
        logger.info("  Num examples = %d", len(train_examples))
        logger.info("  Batch size = %d", self.train_batch_size)
        logger.info("  Num steps = %d", num_train_optimization_steps)

        best_acc = 0
        best_MRR = 0
        tr_loss = 0
        nb_tr_examples, nb_tr_steps = 0, 0
        train_dataloader = cycle(train_dataloader)

        for step in range(num_train_optimization_steps):
            batch = next(train_dataloader)
            batch = tuple(t.to(self.device) for t in batch)
            input_ids, input_mask, segment_ids, utterance_mask, response_mask, history_mask, label_ids = batch
            loss = model(input_ids=input_ids,
                         token_type_ids=segment_ids,
                         attention_mask=input_mask,
                         utterance_mask=utterance_mask,
                         response_mask=response_mask,
                         history_mask=history_mask,
                         labels=label_ids)
            tr_loss += loss.item()
            train_loss = round(tr_loss / (nb_tr_steps + 1), 4)

            nb_tr_examples += input_ids.size(0)
            nb_tr_steps += 1

            loss.backward()
            if (nb_tr_steps + 1) % self.gradient_accumulation_steps == 0:

                optimizer.step()
                optimizer.zero_grad()
                scheduler.step()
                global_step += 1

            if (step + 1) % (self.eval_steps *
                             self.gradient_accumulation_steps) == 0:
                tr_loss = 0
                nb_tr_examples, nb_tr_steps = 0, 0
                logger.info("***** Report result *****")
                logger.info("  %s = %s", 'global_step', str(global_step))
                logger.info("  %s = %s", 'train loss', str(train_loss))

            if self.do_eval and (step + 1) % (
                    self.eval_steps * self.gradient_accumulation_steps) == 0:
                for file in ['dev.csv']:
                    inference_labels = []
                    gold_labels = []
                    inference_logits = []
                    scores = []
                    ID = [x.guid for x in eval_examples]

                    logger.info("***** Running evaluation *****")
                    logger.info("  Num examples = %d", len(eval_examples))
                    logger.info("  Batch size = %d", self.eval_batch_size)

                    model.eval()
                    eval_loss, eval_accuracy = 0, 0
                    nb_eval_steps, nb_eval_examples = 0, 0
                    for input_ids, input_mask, segment_ids, utterance_mask, response_mask, history_mask, label_ids in eval_dataloader:
                        input_ids = input_ids.to(self.device)
                        input_mask = input_mask.to(self.device)
                        segment_ids = segment_ids.to(self.device)
                        utterance_mask = utterance_mask.to(self.device)
                        response_mask = response_mask.to(self.device)
                        history_mask = history_mask.to(self.device)
                        label_ids = label_ids.to(self.device)

                        with torch.no_grad():
                            tmp_eval_loss = model(
                                input_ids=input_ids,
                                token_type_ids=segment_ids,
                                attention_mask=input_mask,
                                utterance_mask=utterance_mask,
                                response_mask=response_mask,
                                history_mask=history_mask,
                                labels=label_ids)
                            logits = model(
                                input_ids=input_ids,
                                token_type_ids=segment_ids,
                                attention_mask=input_mask,
                                utterance_mask=utterance_mask,
                                response_mask=response_mask,
                                history_mask=history_mask,
                            )

                        logits = logits.detach().cpu().numpy()
                        label_ids = label_ids.to('cpu').numpy()
                        inference_labels.append(np.argmax(logits, axis=1))
                        scores.append(logits)
                        gold_labels.append(label_ids)
                        inference_logits.append(logits)
                        eval_loss += tmp_eval_loss.mean().item()
                        nb_eval_examples += input_ids.size(0)
                        nb_eval_steps += 1

                    gold_labels = np.concatenate(gold_labels, 0)
                    inference_logits = np.concatenate(inference_logits, 0)
                    scores = np.concatenate(scores, 0)
                    model.train()
                    eval_loss = eval_loss / nb_eval_steps
                    eval_accuracy = accuracyCQA(inference_logits, gold_labels)
                    eval_DOUBAN_MRR, eval_DOUBAN_mrr, eval_DOUBAN_MAP, eval_Precision1 = compute_DOUBAN(
                        ID, scores, gold_labels)
                    r_at_1 = r_at_k(ID, scores, gold_labels, 1)
                    r_at_2 = r_at_k(ID, scores, gold_labels, 2)
                    r_at_5 = r_at_k(ID, scores, gold_labels, 5)
                    # print('eval_mrr',eval_mrr)
                    print('eval_F1', eval_accuracy, 'eval_MRR',
                          eval_DOUBAN_MRR, 'eval_MAP', eval_DOUBAN_MAP,
                          'eval_Precision1', eval_Precision1, 'r10@1', r_at_1,
                          'r10@2', r_at_2, 'r10@5', r_at_5, 'global_step',
                          global_step, 'loss', train_loss)
                    result = {
                        'eval_loss': eval_loss,
                        'eval_F1': eval_accuracy,
                        'eval_MRR': eval_DOUBAN_MRR,
                        'eval_MAP': eval_DOUBAN_MAP,
                        'eval_Precision1': eval_Precision1,
                        'r10@1': r_at_1,
                        'r10@2': r_at_2,
                        'r10@5': r_at_5,
                        'global_step': global_step,
                        'loss': train_loss
                    }

                    output_eval_file = os.path.join(self.output_dir,
                                                    "eval_results.txt")
                    with open(output_eval_file, "a") as writer:
                        for key in sorted(result.keys()):
                            logger.info("  %s = %s", key, str(result[key]))
                            writer.write("%s = %s\n" % (key, str(result[key])))
                        writer.write('*' * 80)
                        writer.write('\n')
                    # if eval_accuracy > best_acc :
                    if eval_DOUBAN_MRR > best_MRR:
                        print("=" * 80)
                        print("Best MRR", eval_DOUBAN_MRR)
                        print("Saving Model......")
                        # best_acc = eval_accuracy
                        best_MRR = eval_DOUBAN_MRR
                        # Save a trained model
                        model_to_save = model.module if hasattr(
                            model, 'module') else model
                        output_model_file = os.path.join(
                            self.output_dir, "pytorch_model.bin")
                        torch.save(model_to_save.state_dict(),
                                   output_model_file)
                        print("=" * 80)
                    else:
                        print("=" * 80)
Beispiel #4
0
    def train(self):
        trainset, train_dataloader, testset, test_dataloader = self.create_dataloader(
        )
        model = Classifier(1, 256, trainset.num_classes).to(self.device)
        loss_func = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.001)
        model.train()
        logger.info("***** Running training *****")
        logger.info("  Num examples = %d", len(trainset.ids))
        logger.info("  Batch size = %d", self.train_batchsize)
        logger.info("  Num steps = %d", self.train_steps)

        global_step, nb_tr_steps, tr_loss = 0, 0, 0
        best_MRR = 0
        train_dataloader = cycle(train_dataloader)

        for each_step in range(self.train_steps):
            bg, label = next(train_dataloader)
            prediction = model(bg)
            loss = loss_func(prediction, label)
            tr_loss += loss.item()
            train_loss = round(tr_loss / (nb_tr_steps + 1), 4)
            nb_tr_steps += 1

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            global_step += 1

            if (each_step + 1) % (self.eval_steps) == 0:
                tr_loss = 0
                nb_tr_steps = 0
                logger.info("***** Report result *****")
                logger.info("  %s = %s", 'global_step', str(global_step))
                logger.info("  %s = %s", 'train loss', str(train_loss))
                logger.info("***** Running evaluation *****")
                logger.info("  Num examples = %d", len(testset.ids))
                logger.info("  Batch size = %d", self.eval_batchsize)

                scores = []
                labels = []
                ids = testset.ids
                model.eval()

                for iter, (bg, label) in enumerate(test_dataloader):
                    with torch.no_grad():
                        logits = model(bg).detach().cpu().numpy()
                    label = label.detach().cpu().numpy()
                    scores.append(logits)
                    labels.append(label)
                scores = np.concatenate(scores, 0)
                labels = np.concatenate(labels, 0)
                model.train()

                assert len(ids) == len(scores) == len(labels)
                eval_DOUBAN_MRR, eval_DOUBAN_mrr, eval_DOUBAN_MAP, eval_Precision1 = compute_DOUBAN(
                    ids, scores, labels)
                print('eval_MRR', eval_DOUBAN_MRR, eval_DOUBAN_mrr, 'eval_MAP',
                      eval_DOUBAN_MAP, 'eval_Precision1', eval_Precision1)
                result = {
                    'eval_MRR': eval_DOUBAN_MRR,
                    'eval_MAP': eval_DOUBAN_MAP,
                    'eval_Precision1': eval_Precision1,
                    'global_step': global_step,
                    'loss': train_loss
                }
                output_eval_file = os.path.join(self.output_dir,
                                                "eval_results.txt")
                with open(output_eval_file, "a") as writer:
                    for key in sorted(result.keys()):
                        logger.info("  %s = %s", key, str(result[key]))
                        writer.write("%s = %s\n" % (key, str(result[key])))
                    writer.write('*' * 80)
                    writer.write('\n')
                if eval_DOUBAN_MRR > best_MRR:
                    print("=" * 80)
                    print("Best MRR", eval_DOUBAN_MRR)
                    print("Saving Model......")
                    best_MRR = eval_DOUBAN_MRR
                    model_to_save = model.module if hasattr(
                        model, 'module') else model
                    output_model_file = os.path.join(self.output_dir,
                                                     "pytorch_model.bin")
                    torch.save(model_to_save.state_dict(), output_model_file)
                    print("=" * 80)
                else:
                    print("=" * 80)
Beispiel #5
0
    def test_eval(self):
        data = DATADOUBAN(debug=False, data_dir=self.data_dir)
        test_examples = data.read_examples(
            os.path.join(self.data_dir, 'test.csv'))
        print('eval_examples的数量', len(test_examples))

        questions = [x.text_a for x in test_examples]
        ID = [x.guid for x in test_examples]

        test_features = data.convert_examples_to_features(
            test_examples, self.tokenizer, self.max_seq_length)
        all_input_ids = torch.tensor(data.select_field(test_features,
                                                       'input_ids'),
                                     dtype=torch.long)
        all_input_mask = torch.tensor(data.select_field(
            test_features, 'input_mask'),
                                      dtype=torch.long)
        all_segment_ids = torch.tensor(data.select_field(
            test_features, 'segment_ids'),
                                       dtype=torch.long)
        all_label = torch.tensor([f.label for f in test_features],
                                 dtype=torch.long)

        test_data = TensorDataset(all_input_ids, all_input_mask,
                                  all_segment_ids, all_label)
        # Run prediction for full data
        test_sampler = SequentialSampler(test_data)
        test_dataloader = DataLoader(test_data,
                                     sampler=test_sampler,
                                     batch_size=self.eval_batch_size)

        config = BertConfig.from_pretrained(self.model_name_or_path,
                                            num_labels=self.num_labels)
        model = BertForSequenceClassification.from_pretrained(os.path.join(
            self.output_dir, "pytorch_model.bin"),
                                                              self.args,
                                                              config=config)
        model.to(self.device)
        model.eval()

        inference_labels = []
        gold_labels = []
        scores = []

        for input_ids, input_mask, segment_ids, label_ids in test_dataloader:
            input_ids = input_ids.to(self.device)
            input_mask = input_mask.to(self.device)
            segment_ids = segment_ids.to(self.device)
            label_ids = label_ids.to(self.device)

            with torch.no_grad():
                logits = model(
                    input_ids=input_ids,
                    token_type_ids=segment_ids,
                    attention_mask=input_mask).detach().cpu().numpy()
            label_ids = label_ids.to('cpu').numpy()
            scores.append(logits)
            inference_labels.append(np.argmax(logits, axis=1))
            gold_labels.append(label_ids)
        gold_labels = np.concatenate(gold_labels, 0)
        scores = np.concatenate(scores, 0)
        logits = np.concatenate(inference_labels, 0)

        # 计算评价指标
        assert len(ID) == scores.shape[0] == scores.shape[0]
        # eval_accuracy = accuracyCQA(inference_logits, gold_labels)
        # eval_mrr = compute_MRR_CQA(scores, gold_labels, questions)
        eval_5R20 = compute_5R20(scores, gold_labels, questions)
        # print(type(ID), type(scores), type(gold_labels))
        # exit()
        eval_DOUBAN_MRR, eval_DOUBAN_mrr, eval_DOUBAN_MAP, eval_Precision1 = compute_DOUBAN(
            ID, scores, gold_labels)
        # print('eval_mrr',eval_mrr)
        print('eval_MRR', eval_DOUBAN_MRR, eval_DOUBAN_mrr, 'eval_MAP',
              eval_DOUBAN_MAP, 'eval_Precision1', eval_Precision1)
        print('eval_5R20', eval_5R20)