Example #1
0
def evalb(parse1, parse2):
    pyparse1 = pyparser.create_from_bracket_string(str(parse1))
    pyparse2 = pyparser.create_from_bracket_string(str(parse2))
    score = pyscorer.Scorer().score_trees(pyparse1, pyparse2)
#     cross_brackets = score.cross_brackets
    f1 = 2 * (score.recall * score.prec) / (score.recall + score.prec)
    return f1 * score.tag_accracy
Example #2
0
 def evalb(self, gold, pred):
     gold = evalb_parser.create_from_bracket_string(gold)
     pred = evalb_parser.create_from_bracket_string(pred)
     result = scorer.Scorer().score_trees(gold, pred)
     prec, recall = result.prec, result.recall
     fscore = 2 * (prec * recall) / (prec + recall)
     return prec, recall, fscore
Example #3
0
def evaluate(goldtest):
    scorer = Scorer()
    for gold, test in goldtest:
        goldtree = parser.create_from_bracket_string(gold)
        testtree = parser.create_from_bracket_string(test)
        result = scorer.score_trees(gold_tree, test_tree)
        print(result)
def score(reference_parse, proposed_parse):
    """ Performs evaluation on a single parse tree

    Args:
        reference_parse (str): reference parse tree for the current sentence
        proposed_parse (str): proposed parse tree for the current sentence
    Returns:
        precision, recall, f_score, accuracy
        sh1: length of the predicted sentence
        sh2: length of the true sentence

    """
    true_tree = evalbparser.create_from_bracket_string(reference_parse)
    test_tree = evalbparser.create_from_bracket_string(proposed_parse)

    y_true = np.array(true_tree.poss)
    y_pred = np.array(test_tree.poss)

    sh1 = y_pred.shape[0]
    sh2 = y_true.shape[0]

    y_pred = (y_true == y_pred) * 1
    y_true = np.ones(len(y_true))
    
    precision, recall, f_score, _ = precision_recall_fscore_support(y_true, y_pred, labels=[1])
    accuracy = accuracy_score(y_true, y_pred)

    return precision, recall, f_score, accuracy, sh1, sh2
Example #5
0
    def get_results(self):
        results = []
        for i in range(len(self.true_parsed)):
            sentence_true = self.true_parsed[i]
            sentence_test = self.test_parsed[i]
            back, a, b, c = self.Cyk.cyk(sentence_test)
            sentence = sentence_test.split(' ')
            result_test = "".join(
                ['((SENT (',
                 get_parsed(sentence, back, a, b, c), ')))'])
            result_test = result_test[1:-1]
            print("Result sentence:")
            print(result_test)

            target = parser.create_from_bracket_string(sentence_true)
            predicted = parser.create_from_bracket_string(result_test)

            s = scorer.Scorer()
            result = s.score_trees(target, predicted)

            print('The recall is: ' + str(result.recall))
            print('The precision is: ' + str(result.prec))

            results.append(result_test)

        return (results)
Example #6
0
def evaluate(sentence, reference):
    gold_tree = evalbparser.create_from_bracket_string(sentence[1:-1])
    test_tree = evalbparser.create_from_bracket_string(reference[1:-1])

    s = scorer.Scorer()
    result = s.score_trees(gold_tree, test_tree)

    return result.tag_accracy
Example #7
0
    def get_accuracy(self, target, predicted):
        gold_tree = parser.create_from_bracket_string(target)
        test_tree = parser.create_from_bracket_string(predicted)

        s1 = np.array(gold_tree.poss)
        s2 = np.array(test_tree.poss)

        acc = np.sum(s1 == s2) / s1.shape[0]
        return acc
Example #8
0
 def evaluate_predict(sentence, target_parse, cyk_module: cyk.CYKParser, scorer: evalscorer.Scorer) -> evalscorer.Result:
     predicted_string = cyk_module.cyk_parse(sentence)
     pred_tree = evalparser.create_from_bracket_string(predicted_string)
     gold_tree = evalparser.create_from_bracket_string(target_parse)
     if "Failure" in predicted_string:
         result = evalscorer.Result()
         result.state = 2
     else:
         result = scorer.score_trees(gold_tree, pred_tree)
     return result, predicted_string
def compute_precision(prediction_train, 
                      grammars_train) :
    scorer = Scorer()
    tuple_to = []
    for i in range(len(prediction_train)) :
        if prediction_train[i][1] == 1 :
            tuple_to.append((prediction_train[i][0], grammars_train[i]))
    precision = [scorer.score_trees(parser.create_from_bracket_string(pred),
                                    parser.create_from_bracket_string(real)).prec for\
                 (pred, real) in tuple_to]
    return np.sum(precision)/len(grammars_train)
Example #10
0
def get_grammar_from_file_new(seq_file):
    def no_quote_prod(prod):
        prod = re.sub('\'', '', str(prod))
        prod = re.sub(' +', ' ', prod)
        return prod.strip()

    print('Getting grammar from', seq_file)
    f = open(seq_file, 'r')
    # grammar = None
    prod_counter = Counter()
    prods = []
    cnt_line = 0
    for seq in f:
        cnt_line += 1
        tree = parser.create_from_bracket_string(seq)
        this_seq_prods, _ = tree.productions(skip_XX=True, skip_span=True)
        this_seq_prods = [no_quote_prod(prod) for prod in this_seq_prods]
        this_seq_prods = [
            prod for prod in this_seq_prods if 'XX ->' not in prod
        ]
        prods.extend(this_seq_prods)
        prod_counter.update(this_seq_prods)

    print('Done at', cnt_line, 'lines.')
    print('There are', len(set(prods)), 'productions')
    print('Top grammar:', prod_counter.most_common(10))
    print('')
    return set(prods), prod_counter
Example #11
0
def evalb(parse1, parse2):
    from PYEVALB import scorer as pyscorer
    from PYEVALB import parser as pyparser
    pyparse1 = pyparser.create_from_bracket_string(str(parse1))
    pyparse2 = pyparser.create_from_bracket_string(str(parse2))
    try:
        score = pyscorer.Scorer().score_trees(pyparse1, pyparse2)
    except Exception as e:
        print("Exception!")
        print(e)
        print(pyparse1)
        print(pyparse2)
        return 0

    f1 = 2 * (score.recall * score.prec) / (score.recall + score.prec)
    return f1 * score.tag_accracy
Example #12
0
    def check_action2treeseq(self):
        instance = next(iter(self.train_iterator))
        action_str_lst = self.id2original(self.ACTIONS, instance.actions)
        pos_tags = self.id2original(self.POS_TAGS, instance.pos_tags)
        converted_seq = utils.action2treestr(action_str_lst, instance.raws[0], pos_tags)

        measure = scorer.Scorer()
        golden_seq = instance.raw_seq[0]

        gold_tree = parser.create_from_bracket_string(golden_seq)
        converted_tree = parser.create_from_bracket_string(converted_seq)
        ret = measure.score_trees(gold_tree, converted_tree)
        match_num = ret.matched_brackets
        gold_num = ret.gold_brackets
        pred_num = ret.test_brackets
        assert match_num == gold_num
        assert match_num == pred_num
Example #13
0
    def get_eval_metrics(self, instance, pred_action_ids):
        assert type(pred_action_ids) == list
        pred_actions = self.id2original(self.ACTIONS, pred_action_ids)

        tokens = instance.raws[0]
        pos_tags = self.id2original(self.POS_TAGS, instance.pos_tags)

        measure = scorer.Scorer()
        golden_tree_seq = instance.raw_seq[0]
        gold_tree = parser.create_from_bracket_string(golden_tree_seq)
        try:
            pred_tree_seq = utils.action2treestr(pred_actions, tokens, pos_tags)
            pred_tree = parser.create_from_bracket_string(pred_tree_seq)
            ret = measure.score_trees(gold_tree, pred_tree)
        except:
            return -1
        else:
            match_num = ret.matched_brackets
            gold_num = ret.gold_brackets
            pred_num = ret.test_brackets
            return match_num, gold_num, pred_num
Example #14
0
def score(true_parse, proposed_parse):
    """
    Description
    -----------------
    Evaluate parses with the whole non terminals precision and recall, and on only POS tags
    
    Parameters
    -----------------
    true_parse, proposed_parse : Bracketed strings, the true and proposed parse trees.
    
    Returns
    -----------------
    parse_recall, parse_precision, pos_recall, pos_precision
    """

    true_parse = true_parse[2:-1]
    proposed_parse = proposed_parse[2:-1]

    gold_tree = parser.create_from_bracket_string(true_parse)
    test_tree = parser.create_from_bracket_string(proposed_parse)

    # Compute recall and precision for POS tags
    y_true = np.array(gold_tree.poss)
    y_pred = np.array(test_tree.poss)

    y_pred = (y_true == y_pred).astype(int)
    y_true = np.ones(len(y_true)).astype(int)

    (POS_precision, POS_recall, POS_f_score,
     beta) = precision_recall_fscore_support(y_true, y_pred, labels=[1])

    # Compute recall and precision for the whole parse
    thescorer = scorer.Scorer()
    result = thescorer.score_trees(gold_tree, test_tree)

    return result.recall * 100, result.prec * 100, POS_recall[
        0] * 100, POS_precision[0] * 100
def score(true_bracket, proposed_bracket):
    """ Performs evaluation on a single parse tree

    Args:
        true_bracket (str): reference parse tree for the current sentence
        proposed_bracket (str): proposed parse tree for the current sentence

    """
    gold_tree = evalbparser.create_from_bracket_string(true_bracket)
    test_tree = evalbparser.create_from_bracket_string(proposed_bracket)

    # Compute recall and precision for POS tags
    y_true = np.array(gold_tree.poss)
    y_pred = np.array(test_tree.poss)

    y_pred = (y_true == y_pred) * 1
    y_true = np.ones(len(y_true))

    precision, recall, f_score, _ = precision_recall_fscore_support(y_true,
                                                                    y_pred,
                                                                    labels=[1])
    accuracy = accuracy_score(y_true, y_pred)

    return precision, recall, f_score, accuracy
Example #16
0
evalb_scorer = scorer.Scorer()

recalls_corenlp = []
precs_corenlp = []
accs_corenlp = []
parsed_sents = nltk.corpus.treebank.parsed_sents()
skipped_sents = 0
sents_analyzed = 0
with CoreNLPClient(annotators=['tokenize', 'ssplit', 'pos', 'parse'],
                   output_format="json",
                   timeout=3000001,
                   endpoint='http://localhost:9001') as client:
    for i, s in enumerate(nltk.corpus.treebank.sents()):
        sent = detok.detokenize(s)
        corenlp_model = client.annotate(sent)
        gold_sent = parser.create_from_bracket_string(
            parsed_sents[i].pformat())
        parse_tree = parser.create_from_bracket_string(
            corenlp_model['sentences'][0]['parse'])
        try:
            scores = evalb_scorer.score_trees(gold_sent, parse_tree)
        except:
            skipped_sents += 1
            continue
        recalls_corenlp.append(scores.recall)
        precs_corenlp.append(scores.prec)
        accs_corenlp.append(scores.tag_accracy)
        sents_analyzed += 1
        if sents_analyzed == 100:
            break

print("Results of the constituency parsing by CoreNLP in english")
    test_input = f.read().splitlines()
with open(test_output_path, 'r') as f:
    test_output = f.read().splitlines()

# Compute metrics
precisions = []
recalls = []
lengths = []
failures = 0
bugs = 0
for gold, test, sent in zip(test_output, parsed_output, test_input):
    if test == 'No parsing found':
        failures += 1
    else:
        try:
            gold_tree = parser.create_from_bracket_string(gold[2:-1])
            test_tree = parser.create_from_bracket_string(test[2:-1])
            result = scorer.Scorer().score_trees(gold_tree, test_tree)
            
            len_sentence = len(sent.split())
            lengths.append(len_sentence)
            print('')
            print('Sentence length: ' + str(len(gold)))
            print('Recall =' + str(result.recall))
            print('Precision =' + str(result.prec))
            recalls.append(result.recall)
            precisions.append(result.prec)
        except:
            bugs +=1

print('')
Example #18
0
import re


def no_span_prod(rule):
    rule = re.sub('\(([^\)]+)\)', '', str(rule))
    rule = re.sub(' +', ' ', rule)
    rule = rule.strip()
    return rule


# gold = '(IP (NP (PN 这里)) (VP (ADVP (AD 便)) (VP (VV 产生) (IP (NP (QP (CD 一) (CLP (M 个))) (DNP (NP (JJ 结构性)) (DEG 的)) (NP (NN 盲点))) (PU :) (IP (VP (VV 臭味相投) (PU ,) (VV 物以类聚)))))) (PU 。))'
# test = '(IP (IP (NP (PN 这里)) (VP (ADVP (AD 便)) (VP (VV 产生) (NP (QP (CD 一) (CLP (M 个))) (DNP (ADJP (JJ 结构性)) (DEG 的)) (NP (NN 盲点)))))) (PU :) (IP (NP (NN 臭味相投)) (PU ,) (VP (VV 物以类聚))) (PU 。))'
gold = '(TOP (S (INTJ (XX No)) (XX ,) (NP (XX it)) (VP (XX was) (XX nt) (NP (XX Black) (XX Monday))) (XX .)))'
test = '(TOP (S (ADVP (XX No) ) (XX ,) (NP (XX it) ) (VP (XX was) (XX nt) (NP (XX Black) (XX Monday) ) ) (XX .) ) )'

gold_tree = parser.create_from_bracket_string(gold)
test_tree = parser.create_from_bracket_string(test)
gold_prods, gold_heights = gold_tree.productions(skip_XX=False,
                                                 skip_span=False)
test_prods, test_heights = test_tree.productions(skip_XX=False,
                                                 skip_span=False)
gold_nltk_tree = Tree.fromstring(gold).pretty_print()
test_nltk_tree = Tree.fromstring(test).pretty_print()

print(gold_prods)
print(list(map(no_span_prod, gold_prods)))
print(gold_heights)
print(test_prods)
print(test_heights)
print('Substract = ', set(gold_prods) - set(test_prods))
Example #19
0
        outputs = f.readlines()
    with open(TARGET, 'r') as f:
        targets = f.readlines()

    assert len(outputs) == len(targets)

    n_failures = 0
    n_successes = 0
    total_accuracy = 0

    n = len(outputs)
    for target, output in zip(targets, outputs):
        target = target.strip()
        output = output.strip()

        if output == '-':
            n_failures += 1
        else:
            n_successes += 1

            target_tree = evalb_parser.create_from_bracket_string(target[2:-1])
            output_tree = evalb_parser.create_from_bracket_string(output[2:-1])

            s = evalb_scorer.Scorer()
            result = s.score_trees(target_tree, output_tree)

            total_accuracy += result.tag_accracy

    print('successes', n_successes)
    print('failures:', n_failures)
    print('mean accuracy on successes:', total_accuracy / n_successes)
Example #20
0
        parsed_sentence = cyk_parser.parse(sentence)
        if parsed_sentence is not None:
            test_sentences_bis.append(sentence)
            f.write('%s\n' % parsed_sentence)

print('Done')

# Get accuracy
# Get sentences parsed by our parser
with open('data/evaluation_data.parser.txt', 'r') as f:
    file = f.read()
    parsed_sentences = file.split('\n')

# Remove first two and last brackets to use parser from PYEVALB
initial_parsed_sentences = []
parsed_sentences_final = []

for sent in test_sentences_bis:
    initial_parsed_sentences.append(sent[2:-1])

for sent in parsed_sentences:
    parsed_sentences_final.append(parsed_sentences[2:-1])

# Put in tree form
initial_tree = parser.create_from_bracket_string(initial_parsed_sentences)
my_tree = parser.create_from_bracket_string(parsed_sentences_final)

# Get accuracy
result = scorer.Scorer().score_trees(initial_tree, my_tree)
print('Accuracy on Evaluation set: ' + str(result.tag_accracy))
Example #21
0
def evaluation():
    #####################################################################
    #                              Load data                            #
    #####################################################################
    with codecs.open("output.txt", 'r', 'UTF-8') as file:
        result = file.read()
    file.close()
    result = result.split()
    result_tree = []
    i=-1
    for r in result:
        if 'None' in r :
            result_tree.append('(SENT (NC <UNKNOWN>))')
            i += 1
        elif 'SENT' in r :
            result_tree.append(r)
            i += 1
        else :
            result_tree[i] = result_tree[i] + ' ' + r

    with codecs.open("sequoia_test_tree.txt", 'r', 'UTF-8') as file:
        truth = file.read()
    file.close()
    truth = truth.split()
    truth_tree = []
    i=-1
    for t in truth:
        if 'SENT' in t:
            truth_tree.append(t)
            i += 1
        else :
            truth_tree[i] = truth_tree[i] + ' ' + t
    
    assert(len(result_tree)==len(truth_tree))
    N = len(result_tree)
    
    #####################################################################
    #                            Evaluation                             #
    #####################################################################
    recall = []
    precision = []
    Fscore=[]
    tag_accuracy=[]
    
    S = scorer.Scorer()
    fileOut = codecs.open("evaluation_data.parser_output", 'w', 'UTF-8')
    
    for i in range(N):
        t = parser.create_from_bracket_string(truth_tree[i])
        r = parser.create_from_bracket_string(result_tree[i])
        
        fileOut.write(" ".join(str(t.non_terminal_labels)))
        fileOut.write('\n')
        
        if t.sentence == r.sentence :
            scores = S.score_trees(t, r)
            recall.append(scores.recall)
            precision.append(scores.prec)
            Fscore.append(2*scores.recall*scores.prec/(scores.prec+scores.recall))
            tag_accuracy.append(scores.tag_accracy)
    
    print('Average recall : ', np.mean(recall))
    print('Average precision : ', np.mean(precision))
    print('Average F-score: ', np.mean(Fscore))
    print('Average tag accuracy: ', np.mean(tag_accuracy))

    return()
Example #22
0
def get_diff_prods_no_span():
    print('Getting diff between', test_seqs_file, 'and', pred_seqs_file)
    diff = set()
    id = 0
    from collections import Counter
    diff_prods_counter = Counter()
    diff_heights = defaultdict(list)

    for test_line, pred_line in zip(test_seqs, pred_seqs):
        # print ('true =', true_line)
        # print ('pred =', pred_line)
        measure = scorer.Scorer()
        gold_tree = parser.create_from_bracket_string(test_line)
        pred_tree = parser.create_from_bracket_string(pred_line)

        # print (id)
        # print(test_line, pred_line)
        # print (gold_tree.sentence)
        # print (pred_tree.sentence)
        # id += 1
        ret = measure.score_trees(gold_tree, pred_tree)
        match_num = ret.matched_brackets
        gold_num = ret.gold_brackets
        pred_num = ret.test_brackets

        if match_num < gold_num or match_num < pred_num:
            pred_grammar, pred_heights = gold_tree.productions(skip_XX=False,
                                                               skip_span=False)
            true_grammar, _ = pred_tree.productions(skip_XX=False,
                                                    skip_span=False)

            # print(pred_grammar)
            # print(true_grammar)
            # diff_prods = set(pred_grammar) - set(true_grammar)
            diff_prods = []
            diff_prods_heights = []
            for id, prod in enumerate(pred_grammar):
                if prod not in true_grammar:
                    diff_prods.append(prod)
                    diff_prods_heights.append(pred_heights[id])

            for id, prod in enumerate(diff_prods):
                diff_heights[no_span_prod(prod)].append(diff_prods_heights[id])
                # if pred_heights[id] == 0:
                # print (test_line)
                # print (pred_line)
                # print ('Height 0 =', prod, no_span_prod(prod))
                # sys.exit(0)

            diff_no_span_prods = set(
                [no_span_prod(prod) for prod in diff_prods])
            diff.update(diff_no_span_prods)
            diff_prods_counter.update(diff_no_span_prods)

            # pred_tree_nltk.pretty_print()
            # true_tree_nltk.pretty_print()

    # diff_rule_count = dict([e for e in pred_rule_count.items() if e[0] in diff])
    # print ('Wrong rules')
    # print (diff_rule_count)
    # print ('Len wrong rules = ', len(diff))
    # assert len(diff) == len(diff_rule_count)

    print(diff_prods_counter.most_common(10))
    print('There are', len(diff), 'different distint productions')
    print('Done')
    print('')
    return diff, diff_prods_counter, diff_heights
Example #23
0
def evaluate_parser_multiprocess(pcfg,
                                 test_trees,
                                 filepath="parser_output.txt",
                                 write=True):
    """
    Method to evaluate the parser using multiprocess
    :param pcfg: parser pcfg to evaluate
    """

    y_true = []
    y_pred = []

    y_true_non_chomsky = []
    y_pred_non_chomsky = []

    y_true_parsable = []
    y_pred_parsable = []

    y_true_parsable_non_chomsky = []
    y_pred_parsable_non_chomsky = []

    recall_list = []
    precision_list = []
    lines = []

    test_trees = test_trees[:5]
    if write:
        with open(filepath, 'w') as file:
            file.write("")
        with open("non-parsable", 'w') as file:
            file.write("")

    list_sentence = []
    for c, tree in enumerate(test_trees):
        list_sentence.append(list(tree.flatten()))

    # Parsing multi_process :
    n_job = multiprocessing.cpu_count()
    start = time.time()
    with Pool(n_job) as p:
        result_trees = p.map(pcfg.CYK, list_sentence)
    print(f"Parsing time is {time.time()-start}")

    # Analysis of the result
    nb_non_parsable = 0
    list_non_parsable = []
    for (c, tree) in enumerate(test_trees):
        test_sentence = list(tree.flatten())
        parsed_tree = result_trees[c]
        test_sentence_str = ' '.join(str(tree).split())

        # If the sentence is parsable
        if parsed_tree:

            y_true.extend(get_leaves(tree))
            y_pred.extend(get_leaves(parsed_tree))
            y_true_parsable.extend(get_leaves(tree))
            y_pred_parsable.extend(get_leaves(parsed_tree))

            tree.un_chomsky_normal_form(unaryChar="&")
            parsed_tree.un_chomsky_normal_form(unaryChar="&")
            y_true_non_chomsky.extend(get_leaves(tree))
            y_pred_non_chomsky.extend(get_leaves(parsed_tree))
            y_true_parsable_non_chomsky.extend(get_leaves(tree))
            y_pred_parsable_non_chomsky.extend(get_leaves(parsed_tree))
            lines.append('( ' + ' '.join(str(parsed_tree).split()) + ')')
            parsed_tree_str = ' '.join(str(parsed_tree).split())
            test_sentence_str = ' '.join(str(tree[0]).split())

            target_tree = parser.create_from_bracket_string(test_sentence_str)
            predicted_tree = parser.create_from_bracket_string(parsed_tree_str)
            s = scorer.Scorer()
            try:
                result = s.score_trees(target_tree, predicted_tree)
                recall_list.append(result.recall)
                precision_list.append(result.prec)
            except:
                print("No Recall or precision")

            if write:
                with open(filepath, 'a') as file:
                    file.write(lines[-1] + "\n")

        # if the sentence is not parsable
        else:
            aux = get_leaves(tree)
            y_true.extend(aux)
            y_pred.extend(["None" for k in range(len(aux))])

            tree.un_chomsky_normal_form(unaryChar="&")
            y_true_non_chomsky.extend(get_leaves(tree))
            y_pred_non_chomsky.extend(
                ["None" for k in range(len(get_leaves(tree)))])

            nb_non_parsable += 1
            list_non_parsable.append(test_sentence)

            if write:
                with open(filepath, 'a') as file:
                    file.write("\n")
                with open("non-parsable", 'a') as file:
                    file.write('( ' + ' '.join(str(tree).split()) + ')' + "\n")

    print('Nb Non parsable {}'.format(nb_non_parsable))
    print('Accuracy total chomsky on dev set {}:'.format(
        accuracy(y_pred, y_true)))
    print("Accuracy total non chomsky on dev set {}:".format(
        accuracy(y_true_non_chomsky, y_pred_non_chomsky)))
    print('Accuracy parsable chomsky on dev set {}:'.format(
        accuracy(y_pred_parsable, y_true_parsable)))
    print("Accuracy parsable non chomsky on dev set {}:".format(
        accuracy(y_true_parsable_non_chomsky, y_pred_parsable_non_chomsky)))
    print("Recall moyen {} et précision moyenne {}".format(
        np.mean(recall_list), np.mean(precision_list)))
    for k, sentence in enumerate(train_data):
        # sentence = train_data[4]

        s_input = extract_sentence(sentence)
        s_target = remove_functional_labels(sentence).strip()
        s_output = parser.parse(s_input)

        if not s_output:
            continue

        print('input --> ', s_input)
        print('input labels:', s_target)
        print('output -->', extract_sentence(s_output))
        print('output labels:', s_output)

        target_tree = evalb_parser.create_from_bracket_string(s_target[1:-1])
        output_tree = evalb_parser.create_from_bracket_string(s_output[1:-1])

        # print(target_tree)
        # print(output_tree)

        try:
            s = evalb_scorer.Scorer()
            result = s.score_trees(target_tree, output_tree)

            print(
                f'sentence {k}, precision={result.prec}, recall={result.recall}'
            )
            total_precision += result.prec
            total_recall += result.recall
            print(
Example #25
0
    tac = time.time()
    print("Done in " + str(round(tac - tic, 2)) + "sec\n")

    with open('results/evaluation_data.parser_output', 'a') as f:
        if my_parsing is None:
            f.write("Found no viable parsing." + "\n")
        else:
            f.write(my_parsing + "\n")

    if my_parsing is not None:
        # EVALPB works if we remove first and last brackets of the SEQUOIA format and the extra spaces that come with it
        real_parsing = real_parsing[2:-1]
        my_parsing = my_parsing[2:-1]

        print("Score PYEVALB:")
        real_tree = parser.create_from_bracket_string(real_parsing)
        test_tree = parser.create_from_bracket_string(my_parsing)
        result = scorer.Scorer().score_trees(real_tree, test_tree)
        print('accuracy ' + str(result.tag_accracy))

        # for evaluation on the whole corpus, we save real_parsing
        # and_my_parsing in new files without first and last brackets
        with open('results/real_parsings_test_for_eval.txt', 'a') as f:
            f.write(real_parsing + "\n")

        with open('results/my_parsings_test_for_eval.txt', 'a') as f:
            f.write(my_parsing + "\n")

save_scores(
    'results/real_parsings_test_for_eval.txt',
    'results/my_parsings_test_for_eval.txt',