Exemple #1
0
def evaluation(args, data, split, model, domain, epoch, str_res='results', ner_model=True, predictor=None):
    # evaluate performance on data
    model.eval()

    eval_dict = initialize_eval_dict()
    eval_dict['epoch'] = epoch
    #for batch in prepare_data.iterate_batch(data, args.batch_size, args.device):
    for batch in prepare_data.iterate_batch_rand_bucket_choosing(data, args.batch_size, args.device, ner_model=ner_model, predictor=predictor):
        if ner_model is not None:
            word, char, pos, ner, heads, arc_tags, auto_label, masks, lengths, f_f, f_p, b_f, b_p, w_f, mask_v, file_no = batch
            out_arc, out_arc_tag, masks, lengths = model.forward(word, char, pos, mask=masks, length=lengths, f_f=f_f, f_p=f_p, b_f=b_f, b_p=b_p, w_f=w_f, file_no=file_no, mask_v=mask_v)
        else:
            word, char, pos, ner, heads, arc_tags, auto_label, masks, lengths = batch
            out_arc, out_arc_tag, masks, lengths = model.forward(word, char, pos, mask=masks, length=lengths)

        heads_pred, arc_tags_pred, _ = model.decode(out_arc, out_arc_tag, mask=masks, length=lengths,
                                                    leading_symbolic=prepare_data.NUM_SYMBOLIC_TAGS)
        lengths = lengths.cpu().numpy()
        word = word.data.cpu().numpy()
        pos = pos.data.cpu().numpy()
        ner = ner.data.cpu().numpy()
        heads = heads.data.cpu().numpy()
        arc_tags = arc_tags.data.cpu().numpy()
        heads_pred = heads_pred.data.cpu().numpy()
        arc_tags_pred = arc_tags_pred.data.cpu().numpy()
        stats, stats_nopunc, stats_root, num_inst = parse.eval_(word, pos, heads_pred, arc_tags_pred, heads,
                                                                arc_tags, args.alphabets['word_alphabet'], args.alphabets['pos_alphabet'],
                                                                lengths, punct_set=args.punct_set, symbolic_root=True)
        ucorr, lcorr, total, ucm, lcm = stats
        ucorr_nopunc, lcorr_nopunc, total_nopunc, ucm_nopunc, lcm_nopunc = stats_nopunc
        corr_root, total_root = stats_root
        eval_dict['dp_ucorrect'] += ucorr
        eval_dict['dp_lcorrect'] += lcorr
        eval_dict['dp_total'] += total
        eval_dict['dp_ucomplete_match'] += ucm
        eval_dict['dp_lcomplete_match'] += lcm
        eval_dict['dp_ucorrect_nopunc'] += ucorr_nopunc
        eval_dict['dp_lcorrect_nopunc'] += lcorr_nopunc
        eval_dict['dp_total_nopunc'] += total_nopunc
        eval_dict['dp_ucomplete_match_nopunc'] += ucm_nopunc
        eval_dict['dp_lcomplete_match_nopunc'] += lcm_nopunc
        eval_dict['dp_root_correct'] += corr_root
        eval_dict['dp_total_root'] += total_root
        eval_dict['dp_total_inst'] += num_inst

    eval_dict['dp_uas'] = eval_dict['dp_ucorrect'] * 100 / eval_dict['dp_total']  # considering w. punctuation
    eval_dict['dp_las'] = eval_dict['dp_lcorrect'] * 100 / eval_dict['dp_total']  # considering w. punctuation
    print_results(eval_dict, split, domain, str_res)
    return eval_dict
Exemple #2
0
def evaluation(args,
               data,
               split,
               model,
               domain,
               epoch,
               flag,
               str_res='results'):
    # evaluate performance on data
    model.eval()
    alpha_dir = args.model_path
    eval_dict = initialize_eval_dict()
    eval_dict['epoch'] = epoch
    for batch in prepare_data.iterate_batch(data, args.batch_size,
                                            args.device):
        word, char, pos, ner, heads, arc_tags, auto_label, masks, lengths = batch
        # pdb.set_trace()
        out_arc, out_arc_tag, masks, lengths = model.forward(word,
                                                             char,
                                                             pos,
                                                             mask=masks,
                                                             length=lengths)
        heads_pred, arc_tags_pred, _ = model.decode(
            out_arc,
            out_arc_tag,
            ner,
            flag,
            mask=masks,
            length=lengths,
            leading_symbolic=prepare_data.NUM_SYMBOLIC_TAGS)
        lengths = lengths.cpu().numpy()
        word = word.data.cpu().numpy()
        pos = pos.data.cpu().numpy()
        ner = ner.data.cpu().numpy()
        heads = heads.data.cpu().numpy()
        arc_tags = arc_tags.data.cpu().numpy()
        heads_pred = heads_pred.data.cpu().numpy()
        arc_tags_pred = arc_tags_pred.data.cpu().numpy()
        stats, stats_nopunc, stats_root, num_inst = parse.eval_(
            word,
            pos,
            heads_pred,
            arc_tags_pred,
            heads,
            arc_tags,
            args.alphabets['word_alphabet'],
            args.alphabets['pos_alphabet'],
            lengths,
            punct_set=args.punct_set,
            symbolic_root=True)
        ucorr, lcorr, total, ucm, lcm = stats
        ucorr_nopunc, lcorr_nopunc, total_nopunc, ucm_nopunc, lcm_nopunc = stats_nopunc
        corr_root, total_root = stats_root
        eval_dict['dp_ucorrect'] += ucorr
        eval_dict['dp_lcorrect'] += lcorr
        eval_dict['dp_total'] += total
        eval_dict['dp_ucomplete_match'] += ucm
        eval_dict['dp_lcomplete_match'] += lcm
        eval_dict['dp_ucorrect_nopunc'] += ucorr_nopunc
        eval_dict['dp_lcorrect_nopunc'] += lcorr_nopunc
        eval_dict['dp_total_nopunc'] += total_nopunc
        eval_dict['dp_ucomplete_match_nopunc'] += ucm_nopunc
        eval_dict['dp_lcomplete_match_nopunc'] += lcm_nopunc
        eval_dict['dp_root_correct'] += corr_root
        eval_dict['dp_total_root'] += total_root
        eval_dict['dp_total_inst'] += num_inst

    eval_dict['dp_uas'] = eval_dict['dp_ucorrect'] * 100 / eval_dict[
        'dp_total']  # considering w. punctuation
    eval_dict['dp_las'] = eval_dict['dp_lcorrect'] * 100 / eval_dict[
        'dp_total']  # considering w. punctuation
    print_results(eval_dict, split, domain, str_res)
    return eval_dict