Exemple #1
0
def eval_model(model,
               data,
               metric_meta,
               use_cuda=True,
               with_label=True,
               label_mapper=None,
               task_type=TaskType.Classification):
    if use_cuda:
        model.cuda()
    predictions = []
    golds = []
    scores = []
    ids = []
    metrics = {}
    for idx, (batch_info, batch_data) in enumerate(data):
        # if idx % 100 == 0:
        #     print("predicting {}".format(idx))
        batch_info, batch_data = Collater.patch_data(use_cuda, batch_info,
                                                     batch_data)
        score, pred, gold = model.predict(batch_info, batch_data)
        predictions.extend(pred)
        golds.extend(gold)
        scores.extend(score)
        ids.extend(batch_info['uids'])

    if task_type == TaskType.Span:
        from experiments.squad import squad_utils
        golds = squad_utils.merge_answers(ids, golds)
        predictions, scores = squad_utils.select_answers(
            ids, predictions, scores)
    if with_label:
        metrics = calc_metrics(metric_meta, golds, predictions, scores,
                               label_mapper)
    return metrics, predictions, scores, golds, ids
Exemple #2
0
def eval_model(model,
               data,
               metric_meta,
               use_cuda=True,
               with_label=True,
               label_mapper=None):
    if use_cuda:
        model.cuda()
    predictions = []
    golds = []
    scores = []
    ids = []
    metrics = {}
    for batch_info, batch_data in data:
        batch_info, batch_data = Collater.patch_data(use_cuda, batch_info,
                                                     batch_data)
        score, pred, gold = model.predict(batch_info, batch_data)
        predictions.extend(pred)
        golds.extend(gold)
        scores.extend(score)
        ids.extend(batch_info['uids'])
    if with_label:
        metrics = calc_metrics(metric_meta, golds, predictions, scores,
                               label_mapper)
    return metrics, predictions, scores, golds, ids
Exemple #3
0
def eval_model(
    model,
    data,
    metric_meta,
    device,
    with_label=True,
    label_mapper=None,
    task_type=TaskType.Classification,
):
    predictions = []
    golds = []
    scores = []
    ids = []
    metrics = {}
    for (batch_info, batch_data) in tqdm(data, total=len(data)):
        batch_info, batch_data = Collater.patch_data(device, batch_info,
                                                     batch_data)
        score, pred, gold = model.predict(batch_info, batch_data)
        scores = merge(score, scores)
        golds = merge(gold, golds)
        predictions = merge(pred, predictions)
        ids = merge(batch_info["uids"], ids)

    if task_type == TaskType.Span:
        predictions, golds = postprocess_qa_predictions(
            golds, scores, version_2_with_negative=False)
    elif task_type == TaskType.SpanYN:
        predictions, golds = postprocess_qa_predictions(
            golds, scores, version_2_with_negative=True)

    if with_label:
        metrics = calc_metrics(metric_meta, golds, predictions, scores,
                               label_mapper)
    return metrics, predictions, scores, golds, ids
Exemple #4
0
def eval_model(model,
               data,
               metric_meta,
               device,
               with_label=True,
               label_mapper=None,
               task_type=TaskType.Classification):
    predictions = []
    golds = []
    scores = []
    ids = []
    metrics = {}
    for (batch_info, batch_data) in data:
        batch_info, batch_data = Collater.patch_data(device, batch_info,
                                                     batch_data)
        score, pred, gold = model.predict(batch_info, batch_data)
        predictions.extend(pred)
        golds.extend(gold)
        scores.extend(score)
        ids.extend(batch_info['uids'])

    if task_type == TaskType.Span:
        from experiments.squad import squad_utils
        golds = squad_utils.merge_answers(ids, golds)
        predictions, scores = squad_utils.select_answers(
            ids, predictions, scores)
    if with_label:
        metrics = calc_metrics(metric_meta, golds, predictions, scores,
                               label_mapper)
    return metrics, predictions, scores, golds, ids
def eval_model(model, data, metric_meta, use_cuda=True, with_label=True):
    data.reset()
    if use_cuda:
        model.cuda()
    predictions = []
    golds = []
    scores = []
    ids = []
    metrics = {}
    for batch_meta, batch_data in data:
        score, pred, gold = model.predict(batch_meta, batch_data)
        predictions.extend(pred)
        golds.extend(gold)
        scores.extend(score)
        ids.extend(batch_meta['uids'])
    if with_label:
        metrics = calc_metrics(metric_meta, golds, predictions, scores)
    return metrics, predictions, scores, golds, ids
Exemple #6
0
def eval_model(model,
               data,
               metric_meta,
               device,
               with_label=True,
               label_mapper=None,
               task_type=TaskType.Classification):
    predictions = []
    golds = []
    scores = []
    ids = []
    metrics = {}
    print("****device={}".format(device))
    for (batch_info, batch_data) in data:
        batch_info, batch_data = Collater.patch_data(device, batch_info,
                                                     batch_data)
        score, pred, gold = model.predict(batch_info, batch_data)
        predictions.extend(pred)
        golds.extend(gold)
        scores.extend(score)
        ids.extend(batch_info['uids'])

    if task_type == TaskType.Span:
        from experiments.squad import squad_utils
        golds = squad_utils.merge_answers(ids, golds)
        predictions, scores = squad_utils.select_answers(
            ids, predictions, scores)
    if with_label:
        metrics = calc_metrics(metric_meta, golds, predictions, scores,
                               label_mapper)
    for i in range(min(len(ids), 10)):
        print("{}\t{}\t{}\t{}\n".format(ids[i], predictions[i], scores[2 * i],
                                        scores[2 * i + 1]))

    #print("score heads={}".format(scores[:10]))
    return metrics, predictions, scores, golds, ids
Exemple #7
0
    predictions = []
    scores = []
    for sample_id, label in sample_id_2_label_dic.items():
        golds.append(label)
        pred, score_seg = sample_id_2_pred_score_seg_dic[sample_id]
        predictions.append(pred)
        scores.extend(score_seg)
    return golds, predictions, scores


args = parser.parse_args()

task_def_path = args.task_def
task = args.task
task_defs = TaskDefs(task_def_path)

n_class = task_defs.n_class_map[task]
sample_id_2_pred_score_seg_dic = load_score_file(args.score, n_class)

data_format = task_defs.data_format_map[task]
task_type = task_defs.task_type_map[task]
label_mapper = task_defs.global_map.get(task, None)
sample_objs = load_data(args.std_input, data_format, task_type, label_mapper)

golds, predictions, scores = generate_golds_predictions_scores(
    sample_id_2_pred_score_seg_dic, sample_objs)

metrics = calc_metrics(task_defs.metric_meta_map[task], golds, predictions,
                       scores)
print(metrics)
Exemple #8
0
    golds = []
    predictions = []
    scores = []
    for sample_id, label in sample_id_2_label_dic.items():
        golds.append(label)
        pred, score_seg = sample_id_2_pred_score_seg_dic[sample_id]
        predictions.append(pred)
        scores.extend(score_seg)
    return golds, predictions, scores


args = parser.parse_args()

task_def_path = args.task_def
task_defs = TaskDefs(task_def_path)
task_def = task_defs.get_task_def(args.task)
n_class = task_def.n_class
sample_id_2_pred_score_seg_dic = load_score_file(args.score, n_class)

data_type = task_def.data_type
task_type = task_def.task_type
label_mapper = task_def.label_vocab
sample_objs = load_data(args.std_input, data_type, task_type, label_mapper)

golds, predictions, scores = generate_golds_predictions_scores(
    sample_id_2_pred_score_seg_dic, sample_objs)

metrics = calc_metrics(task_def.metric_meta, golds, predictions, scores)
print(metrics)