Esempio n. 1
0
def write_results(args, data, data_domain, split, model, model_domain, eval_dict):
    str_file = args.full_model_name + '_' + split + '_model_domain_' + model_domain + '_data_domain_' + data_domain
    res_filename = str_file + '_res.txt'
    pred_filename = str_file + '_pred.txt'
    gold_filename = str_file + '_gold.txt'
    if eval_dict is not None:
        # save results dictionary into a file
        with open(res_filename, 'w') as f:
            json.dump(eval_dict, f)

    # save predictions and gold labels into files
    pred_writer = Writer(args.alphabets)
    gold_writer = Writer(args.alphabets)
    pred_writer.start(pred_filename)
    gold_writer.start(gold_filename)
    for batch in prepare_data.iterate_batch(data, args.batch_size, args.device):
        word, char, pos, ner, heads, arc_tags, auto_label, masks, lengths = batch
        out_arc, out_arc_tag, masks, lengths = model.forward(word, char, pos, mask=masks, length=lengths)
        heads_pred, arc_tags_pred, _ = model.decode(out_arc, out_arc_tag, mask=masks, length=lengths,
                                                    leading_symbolic=prepare_data.NUM_SYMBOLIC_TAGS)
        lengths = lengths.cpu().numpy()
        word = word.data.cpu().numpy()
        pos = pos.data.cpu().numpy()
        ner = ner.data.cpu().numpy()
        heads = heads.data.cpu().numpy()
        arc_tags = arc_tags.data.cpu().numpy()
        heads_pred = heads_pred.data.cpu().numpy()
        arc_tags_pred = arc_tags_pred.data.cpu().numpy()
        # writing predictions
        pred_writer.write(word, pos, ner, heads_pred, arc_tags_pred, lengths, symbolic_root=True)
        # writing gold labels
        gold_writer.write(word, pos, ner, heads, arc_tags, lengths, symbolic_root=True)

    pred_writer.close()
    gold_writer.close()
Esempio n. 2
0
def evaluation(args, data, split, model, domain, epoch, str_res='results'):
    # evaluate performance on data
    model.eval()
    auto_label_idx2inst = Index2Instance(args.alphabets['auto_label_alphabet'])
    eval_dict = initialize_eval_dict()
    eval_dict['epoch'] = epoch
    pred_labels = []
    gold_labels = []
    for batch in prepare_data.iterate_batch(data, args.batch_size, args.device): # add debug statements here [rram] 
        word, char, pos, ner, heads, arc_tags, auto_label, masks, lengths = batch
        output, masks, lengths = model.forward(word, char, pos, mask=masks, length=lengths)
        auto_label_preds = model.decode(output, mask=masks, length=lengths, leading_symbolic=prepare_data.NUM_SYMBOLIC_TAGS)
        lengths = lengths.cpu().numpy()
        word = word.data.cpu().numpy()
        pos = pos.data.cpu().numpy()
        ner = ner.data.cpu().numpy()
        heads = heads.data.cpu().numpy()
        arc_tags = arc_tags.data.cpu().numpy()
        auto_label = auto_label.data.cpu().numpy()
        auto_label_preds = auto_label_preds.data.cpu().numpy()
        gold_labels += auto_label_idx2inst.index2instance(auto_label, lengths, symbolic_root=True)
        pred_labels += auto_label_idx2inst.index2instance(auto_label_preds, lengths, symbolic_root=True)

    eval_dict['auto_label_accuracy'] = accuracy_score(gold_labels, pred_labels) * 100
    eval_dict['auto_label_precision'] = precision_score(gold_labels, pred_labels) * 100
    eval_dict['auto_label_recall'] = recall_score(gold_labels, pred_labels) * 100
    eval_dict['auto_label_f1'] = f1_score(gold_labels, pred_labels) * 100
    print_results(eval_dict, split, domain, str_res)
    return eval_dict
Esempio n. 3
0
def evaluation(args, data, split, model, domain, epoch, str_res='results'):
    # evaluate performance on data
    model.eval()

    eval_dict = initialize_eval_dict()
    eval_dict['epoch'] = epoch
    for batch in prepare_data.iterate_batch(data, args.batch_size, args.device):
        word, char, pos, ner, heads, arc_tags, auto_label, masks, lengths = batch
        out_arc, out_arc_tag, masks, lengths = model.forward(word, char, pos, mask=masks, length=lengths)
        heads_pred, arc_tags_pred, _ = model.decode(out_arc, out_arc_tag, mask=masks, length=lengths,
                                                    leading_symbolic=prepare_data.NUM_SYMBOLIC_TAGS)
        lengths = lengths.cpu().numpy()
        word = word.data.cpu().numpy()
        pos = pos.data.cpu().numpy()
        ner = ner.data.cpu().numpy()
        heads = heads.data.cpu().numpy()
        arc_tags = arc_tags.data.cpu().numpy()
        heads_pred = heads_pred.data.cpu().numpy()
        arc_tags_pred = arc_tags_pred.data.cpu().numpy()
        stats, stats_nopunc, stats_root, num_inst = parse.eval_(word, pos, heads_pred, arc_tags_pred, heads,
                                                                arc_tags, args.alphabets['word_alphabet'], args.alphabets['pos_alphabet'],
                                                                lengths, punct_set=args.punct_set, symbolic_root=True)
        ucorr, lcorr, total, ucm, lcm = stats
        ucorr_nopunc, lcorr_nopunc, total_nopunc, ucm_nopunc, lcm_nopunc = stats_nopunc
        corr_root, total_root = stats_root
        eval_dict['dp_ucorrect'] += ucorr
        eval_dict['dp_lcorrect'] += lcorr
        eval_dict['dp_total'] += total
        eval_dict['dp_ucomplete_match'] += ucm
        eval_dict['dp_lcomplete_match'] += lcm
        eval_dict['dp_ucorrect_nopunc'] += ucorr_nopunc
        eval_dict['dp_lcorrect_nopunc'] += lcorr_nopunc
        eval_dict['dp_total_nopunc'] += total_nopunc
        eval_dict['dp_ucomplete_match_nopunc'] += ucm_nopunc
        eval_dict['dp_lcomplete_match_nopunc'] += lcm_nopunc
        eval_dict['dp_root_correct'] += corr_root
        eval_dict['dp_total_root'] += total_root
        eval_dict['dp_total_inst'] += num_inst

    eval_dict['dp_uas'] = eval_dict['dp_ucorrect'] * 100 / eval_dict['dp_total']  # considering w. punctuation
    eval_dict['dp_las'] = eval_dict['dp_lcorrect'] * 100 / eval_dict['dp_total']  # considering w. punctuation
    print_results(eval_dict, split, domain, str_res)
    return eval_dict