def recover_oracle(self): '''oracle is already stored implicitly in the forest returns best_score, best_parseval, best_tree, edgelist ''' edgelist = self.root.get_oracle_edgelist() fv = Hyperedge.deriv2fvector(edgelist) tr = Hyperedge.deriv2tree(edgelist) bleu_p1 = self.bleu.rescore(tr) return bleu_p1, tr, fv, edgelist
def extract_oracle(forest): """oracle is already stored implicitly in the forest returns best_score, best_parseval, best_tree, edgelist """ global implicit_oracle implicit_oracle = True edgelist = get_edgelist(forest.root) fv = Hyperedge.deriv2fvector(edgelist) tr = Hyperedge.deriv2tree(edgelist) return fv[0], Parseval(), tr, edgelist
def forest_oracle(forest, goldtree, del_puncs=False, prune_results=False): """ returns best_score, best_parseval, best_tree, edgelist now non-recursive topol-sort-style """ if hasattr(forest.root, "oracle_edge"): return extract_oracle(forest) ## modifies forest also!! if del_puncs: idx_mapping, newforest = check_puncs(forest, goldtree.tag_seq) else: idx_mapping, newforest = lambda x: x, forest goldspans = merge_labels(goldtree.all_label_spans(), idx_mapping) goldbrs = set(goldspans) ## including TOP for node in newforest: if node.is_terminal(): results = Oracles.unit("(%s %s)" % (node.label, node.word)) ## multiplication unit else: a, b = ( (0, 0) if node.is_spurious() else ((1, 1) if (merge_label((node.label, node.span), idx_mapping) in goldbrs) else (1, 0)) ) label = "" if node.is_spurious() else node.label results = Oracles() ## addition unit for edge in node.edges: edgeres = Oracles.unit() ## multiplication unit for sub in edge.subs: assert hasattr(sub, "oracles"), "%s ; %s ; %s" % (node, sub, edge) edgeres = edgeres * sub.oracles ## nodehead = (a, RES((b, -edge.fvector[0], label, [edge]))) ## originally there is label assert 0 in edge.fvector, edge nodehead = (a, RES((b, -edge.fvector[0], [edge]))) results += nodehead * edgeres ## mul if prune_results: prune(results) node.oracles = results if debug: print >> logs, node.labelspan(), "\n", results, "----------" res = (-1, RES((-1, 0, []))) * newforest.root.oracles ## scale, remove TOP match num_gold = len(goldspans) - 1 ## omit TOP. N.B. goldspans, not brackets! (NP (NP ...)) best_parseval = None for num_test in res: ## num_matched, score, tree_str, edgelist = res[num_test] num_matched, score, edgelist = res[num_test] this = Parseval.get_parseval(num_matched, num_test, num_gold) if best_parseval is None or this < best_parseval: best_parseval = this best_score = score ## best_tree = tree_str best_edgelist = edgelist best_tree = Hyperedge.deriv2tree(best_edgelist) ## annotate the forest for oracle so that next-time you can preload oracle for edge in best_edgelist: edge.head.oracle_edge = edge ## very careful here: desymbol ! ## return -best_score, best_parseval, Tree.parse(desymbol(best_tree)), best_edgelist return -best_score, best_parseval, best_tree, best_edgelist