def test_pruning(): for h in hypergraphs(): w = utils.random_viterbi_potentials(h) original_path = ph.best_path(h, w) new_hyper, new_potentials = ph.prune_hypergraph(h, w, -0.99) prune_path = ph.best_path(new_hyper, new_potentials) assert len(original_path.edges) > 0 for edge in original_path.edges: assert edge in prune_path valid_path(new_hyper, prune_path) original_score = w.dot(original_path) print original_score print new_potentials.dot(prune_path) nt.assert_almost_equal(original_score, new_potentials.dot(prune_path)) # Test pruning amount. prune = 0.001 max_marginals = ph.compute_marginals(h, w) new_hyper, new_potentials = ph.prune_hypergraph(h, w, prune) assert (len(new_hyper.edges) > 0) original_edges = {} for edge in h.edges: original_edges[edge.label] = edge new_edges = {} for edge in new_hyper.edges: new_edges[edge.label] = edge for name, edge in new_edges.iteritems(): orig = original_edges[name] nt.assert_almost_equal(w[orig], new_potentials[edge]) m = max_marginals[orig] nt.assert_greater(m, prune)
if score >= thres: kept.add(edge.id) # In[ ]: potentials = ph.InsidePotentials(hypergraph).build(build_potentials) marginals = ph.compute_marginals(hypergraph, potentials) base = marginals[hypergraph.root] for edge in hypergraph.edges: print marginals[edge].value / base.value # In[ ]: phyper, ppotentials = ph.prune_hypergraph(hypergraph, potentials, 0.1) # In[ ]: import pydecode.lp as lp hyperlp = lp.HypergraphLP.make_lp(phyper, ppotentials) hyperlp.lp.writeLP("parse.lp") # In[ ]: class ParseFormat(display.HypergraphPathFormatter): def __init__(self, hypergraph, sentence, path): self.path = path self.hypergraph = hypergraph
label = self.hypergraph.node_label(node) return {"label": label.tag, "shape": ""} def hyperedge_node_attrs(self, edge): return {"color": "pink", "shape": "point"} def hypernode_subgraph(self, node): label = self.hypergraph.node_label(node) return [("cluster_" + str(label.position), None)] def subgraph_format(self, subgraph): return {"label": (["ROOT"] + sentence.split() + ["END"])[int(subgraph.split("_")[1])]} #HMMConstraintFormat(hypergraph, constraints).to_ipython() # Pruning # # In[ ]: pruned_hypergraph, pruned_potentials = ph.prune_hypergraph(hypergraph, potentials, 0.8) # In[ ]: HMMFormat(pruned_hypergraph, []).to_ipython() # In[ ]: very_pruned_hypergraph, _ = ph.prune_hypergraph(hypergraph, potentials, 0.9)