Beispiel #1
0
 def recover_oracle(self):
     '''oracle is already stored implicitly in the forest
     returns best_score, best_parseval, best_tree, edgelist
     '''
     edgelist = self.root.get_oracle_edgelist()
     fv = Hyperedge.deriv2fvector(edgelist)
     tr = Hyperedge.deriv2tree(edgelist)
     bleu_p1 = self.bleu.rescore(tr)
     return bleu_p1, tr, fv, edgelist
Beispiel #2
0
def extract_oracle(forest):
    """oracle is already stored implicitly in the forest
       returns best_score, best_parseval, best_tree, edgelist
    """
    global implicit_oracle
    implicit_oracle = True
    edgelist = get_edgelist(forest.root)
    fv = Hyperedge.deriv2fvector(edgelist)
    tr = Hyperedge.deriv2tree(edgelist)
    return fv[0], Parseval(), tr, edgelist
Beispiel #3
0
def forest_oracle(forest, goldtree, del_puncs=False, prune_results=False):
    """ returns best_score, best_parseval, best_tree, edgelist
           now non-recursive topol-sort-style
    """

    if hasattr(forest.root, "oracle_edge"):
        return extract_oracle(forest)

    ## modifies forest also!!
    if del_puncs:
        idx_mapping, newforest = check_puncs(forest, goldtree.tag_seq)
    else:
        idx_mapping, newforest = lambda x: x, forest

    goldspans = merge_labels(goldtree.all_label_spans(), idx_mapping)
    goldbrs = set(goldspans)  ## including TOP

    for node in newforest:
        if node.is_terminal():
            results = Oracles.unit("(%s %s)" % (node.label, node.word))  ## multiplication unit

        else:
            a, b = (
                (0, 0)
                if node.is_spurious()
                else ((1, 1) if (merge_label((node.label, node.span), idx_mapping) in goldbrs) else (1, 0))
            )

            label = "" if node.is_spurious() else node.label
            results = Oracles()  ## addition unit
            for edge in node.edges:
                edgeres = Oracles.unit()  ## multiplication unit

                for sub in edge.subs:
                    assert hasattr(sub, "oracles"), "%s ; %s ; %s" % (node, sub, edge)
                    edgeres = edgeres * sub.oracles

                ##                nodehead = (a, RES((b, -edge.fvector[0], label, [edge])))   ## originally there is label
                assert 0 in edge.fvector, edge
                nodehead = (a, RES((b, -edge.fvector[0], [edge])))
                results += nodehead * edgeres  ## mul

        if prune_results:
            prune(results)
        node.oracles = results
        if debug:
            print >> logs, node.labelspan(), "\n", results, "----------"

    res = (-1, RES((-1, 0, []))) * newforest.root.oracles  ## scale, remove TOP match

    num_gold = len(goldspans) - 1  ## omit TOP.  N.B. goldspans, not brackets! (NP (NP ...))

    best_parseval = None
    for num_test in res:
        ##        num_matched, score, tree_str, edgelist = res[num_test]
        num_matched, score, edgelist = res[num_test]
        this = Parseval.get_parseval(num_matched, num_test, num_gold)
        if best_parseval is None or this < best_parseval:
            best_parseval = this
            best_score = score
            ##            best_tree = tree_str
            best_edgelist = edgelist

    best_tree = Hyperedge.deriv2tree(best_edgelist)

    ## annotate the forest for oracle so that next-time you can preload oracle
    for edge in best_edgelist:
        edge.head.oracle_edge = edge

    ## very careful here: desymbol !
    ##    return -best_score, best_parseval, Tree.parse(desymbol(best_tree)), best_edgelist
    return -best_score, best_parseval, best_tree, best_edgelist