Esempio n. 1
0
def evaluate(predictions, labels):
    label = int(labels[-1])
    hr = metrics.hit(label, predictions)
    mrr = metrics.mrr(label, predictions)
    ndcg = metrics.ndcg(label, predictions)

    return hr, mrr, ndcg
def evaluate(model, tokenizer, eval_file, checkpoint, output_dir=None):
    eval_data = TrainData(data_file=eval_file,
                          doc_file=doc_file,
                          max_length=args.max_length,
                          tokenizer=tokenizer,
                          attacked_file=None)

    eval_dataLoader = DataLoader(dataset=eval_data,
                                 batch_size=args.batch_size,
                                 shuffle=False)

    logger.debug("***** Running evaluation {} *****".format(checkpoint))
    logger.debug("  Num examples = %d", len(eval_dataLoader))
    logger.debug("  Batch size = %d", args.batch_size)

    loss = []

    mrrs = []
    maps = []

    all_labels = None
    all_logits = None
    model.eval()

    for batch in tqdm(eval_dataLoader, desc="Evaluating"):
        batch = tuple(t.to('cuda') for t in batch[:4])
        input_ids, token_type_ids, attention_mask, labels = batch

        with torch.no_grad():
            outputs = model(input_ids=input_ids.long(),
                            token_type_ids=token_type_ids.long(),
                            attention_mask=attention_mask,
                            labels=labels)

            eval_loss, logits = outputs[:2]

            loss.append(eval_loss.item())

            if all_labels is None:
                all_labels = labels.detach().cpu().numpy()
                all_logits = logits.detach().cpu().numpy()
            else:
                all_labels = np.concatenate(
                    (all_labels, labels.detach().cpu().numpy()), axis=0)
                all_logits = np.concatenate(
                    (all_logits, logits.detach().cpu().numpy()), axis=0)

    # 评价指标
    start = 0

    for key in eval_data.docs_keys:
        end = start + len(eval_data.docs[key])
        maps.append(map(all_labels[start:end], all_logits[start:end]))
        mrrs.append(mrr(all_labels[start:end], all_logits[start:end]))
        start = end

    return np.array(loss).mean(), np.array(maps).mean(), np.array(mrrs).mean()