예제 #1
0
def evaluation_hotpot(model, eval_file, config, args):

    dataset = HotpotDataset(config["system"]['test_data'], config["model"],
                            False, args.tokenizer)
    device = args.device
    dataloader = DataLoader(dataset=dataset,
                            batch_size=config['training']['test_batch_size'],
                            collate_fn=batcher_hotpot(device),
                            shuffle=False,
                            num_workers=0)
    logging.info(
        "================================================================================="
    )
    total = 0
    pred_dict = dict()
    span_correct = 0
    label_correct = 0
    for batch in tqdm(dataloader):

        logits, mrc_logits = model.network(batch, device)

        values, index = logits.topk(1)

        label = batch[1]
        g = batch[0]

        B_start = g.ndata['B_start'][index.item()]

        start_logits, end_logits = mrc_logits.split(1, dim=-1)
        ### find the span, where the end position is within 10 distance of start position

        start_values, indices = start_logits.squeeze(-1)[index.item(),
                                                         B_start:].topk(1)
        start_index = indices[0]
        start = start_index + B_start
        ending = start + 10
        end_values, end_idx = end_logits.squeeze(-1)[index.item(),
                                                     start:ending].topk(1)
        end = end_idx[0] + start

        start_pred = start - B_start
        start_pred = start_pred.item()
        end_pred = end - B_start
        end_pred = end_pred.item()
        total += 1

        if batch[2][index.item()] == start.item() and batch[3][
                index.item()] == end.item():
            span_correct += 1

        if label[index.item()].item() == 1:
            label_correct += 1

        pred_dict[batch[5][0]] = {
            'node': index.item(),
            'span': [start_pred, end_pred]
        }

    #### Generate prediction file for official eval
    graphs = dataset.data
    final_answer = dict()
    for graph in graphs:
        qid = graph['qid']
        pred = pred_dict[qid]

        for node in graph['node']:
            if node['node_id'] == pred['node']:
                ctx_str = ''.join(node['full_name'])
                context_tokens = node['context']
                span = pred['span']
                pred_str = context_tokens[span[0]:span[1] + 1]
                res = find_start_end_before_tokenized(ctx_str, [pred_str])
                if res[0] == (0, 0):
                    tok_text = " ".join(pred_str)
                    tok_text = tok_text.replace(" ##", "")
                    tok_text = tok_text.replace("##", "")

                    tok_text = tok_text.strip()
                    final_answer[qid] = " ".join(tok_text.split())

                else:
                    final_answer[qid] = ctx_str[res[0][0]:res[0][1] + 1]
    final_pred = {'answer': final_answer, 'sp': dict()}

    ## the test file does not have correct labels, therefore we don't count the accuracy.
    accu = label_correct / total
    logging.info("********* Node accuracy ************{}".format(accu))
    accu = span_correct / total
    logging.info("********* Span accuracy ************{}".format(accu))
    return final_pred
예제 #2
0
def train_hotpot(model, index, config, args, best_score, optimizer, scheduler):
    model.train()
    dataset = HotpotDataset(config["system"]['train_data'], config["model"],
                            True, args.tokenizer)
    device = args.device
    train_sampler = RandomSampler(dataset)
    dataloader = DataLoader(dataset=dataset,
                            sampler=train_sampler,
                            batch_size=config['training']['train_batch_size'],
                            collate_fn=batcher_hotpot(device),
                            num_workers=0)

    print_loss = 0
    bce_loss_logits = nn.BCEWithLogitsLoss()

    for step, batch in enumerate(tqdm(dataloader)):

        logits, mrc_logits = model.network(batch, device)
        pos_node_idx = [
            i for i in range(batch[1].size(0)) if batch[1][i].item() != -1
        ]
        if args.fp16:
            node_loss = bce_loss_logits(logits[pos_node_idx],
                                        batch[1][pos_node_idx].half())
        else:
            node_loss = bce_loss_logits(logits[pos_node_idx],
                                        batch[1][pos_node_idx])

        start_logits, end_logits = mrc_logits.split(1, dim=-1)
        start_logits = start_logits.squeeze(-1)
        end_logits = end_logits.squeeze(-1)

        pos_idx = [
            i for i in range(batch[4].size(0)) if batch[4][i].item() == 0
        ]
        start_positions = batch[2]
        end_positions = batch[3]

        # sometimes the start/end positions are outside our model inputs, we ignore these exs
        ignored_index = start_logits.size(1)
        start_positions.clamp_(0, ignored_index)
        end_positions.clamp_(0, ignored_index)

        loss_fct = CrossEntropyLoss(ignore_index=ignored_index)
        start_loss = loss_fct(start_logits[pos_idx], start_positions[pos_idx])
        end_loss = loss_fct(end_logits[pos_idx], end_positions[pos_idx])

        loss = (start_loss + end_loss) / 2 + node_loss

        if args.n_gpu > 1:
            loss = loss.mean()

        if args.gradient_accumulation_steps > 1:
            loss = loss / args.gradient_accumulation_steps
        print_loss += loss.data.cpu().numpy()

        if args.fp16:
            optimizer.backward(loss)
        else:
            loss.backward()

        if (step + 1) % args.gradient_accumulation_steps == 0:
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()

        if (step + 1) % args.checkpoint == 0:
            logging.info("********* loss ************{}".format(print_loss))
            print_loss = 0
            model.eval()
            eval_file = config['system']['validation_data']
            auc, _ = evaluation_hotpot(model, eval_file, config, args)
            if auc > best_score:
                best_score = auc
                model.save(
                    os.path.join(
                        base_dir, config['name'],
                        "saved_models/model_finetuned_epoch_{}.pt".format(0)))

        model.train()

    return best_score