def test_max_marginals():
    """
    Test that max-marginals are correct.
    """
    for h in hypergraphs():
        w = utils.random_viterbi_potentials(h)
        print w.show(h)

        path = ph.best_path(h, w)
        best = w.dot(path)
        print "BEST"

        print "\n".join(["%20s : %s"%(edge.label, w[edge]) for edge in path.edges])
        print best
        nt.assert_not_equal(best, 0.0)
        max_marginals = ph.compute_marginals(h, w)
        for node in h.nodes:
            other = max_marginals[node]
            nt.assert_less_equal(other, best + 1e-4)

        for edge in h.edges:
            other = max_marginals[edge]
            nt.assert_less_equal(other, best + 1e-4)
            if edge in path:
                nt.assert_almost_equal(other, best)
Beispiel #2
0
    def inference(self, x, w, relaxed=False):
        relaxed = relaxed or self._use_relaxed
        if self._debug: a = time.time()
        hypergraph = self._build_hypergraph(x)
        if self._debug: print >>sys.stderr, "BUILD HYPERGRAPH:", time.time() -a 

        if self._debug: a = time.time()
        potentials = self._build_potentials(hypergraph, x, w)
        if self._debug: print >>sys.stderr, "BUILD POTENTIALS:", time.time() - a
        if not self._constrained:
            if self._debug: a = time.time()
            path = ph.best_path(hypergraph, potentials)
            if self._debug: print >>sys.stderr, "BEST PATH:", time.time() - a

        else:
            if self._debug: a = time.time()
            constraints = self.constraints(x, hypergraph)
            hyperlp = lp.HypergraphLP.make_lp(hypergraph, potentials, integral=not relaxed)
            hyperlp.add_constraints(constraints)
            if self._debug: print >>sys.stderr, "BUILD LP:", time.time() - a

            if self._debug: a = time.time()
            if self._use_gurobi:
                hyperlp.solve(pulp.solvers.GUROBI(mip=1 if not relaxed else 0))
            else:
                hyperlp.solve(pulp.solvers.GLPK(mip=1 if not relaxed else 0 ))
            if self._debug: print >>sys.stderr, "SOLVE LP:", time.time() - a

            if relaxed:
                path = hyperlp.decode_fractional()
            else:
                path = hyperlp.path
        if self._debug: print 
        y = set([edge.label for edge in path])
        return y
Beispiel #3
0
 def fn(x):
     mod_weights = ph.pairwise_dot(potentials, x);
     dual_weights = weight_potentials.times(mod_weights)
     path = ph.best_path(graph, dual_weights)
     score = dual_weights.dot(path)
     vec = potentials.dot(path)
     subgrad = np.zeros(len(x))
     for i, j in vec:
         subgrad[i] = j
     return score, subgrad, path
def test_subgradient():
    for h in hypergraphs():
        w = utils.random_log_viterbi_potentials(h)
        constraints, edge = random_have_constraint(h)
        path = ph.best_path(h, w)
        match = constraints.check(path)
        if edge not in path:
            nt.assert_equal(match[0], "have")

        cpath = opt.best_constrained_path(h, w,
                                          constraints)
        assert edge in cpath
def test_pruning():
    for h in hypergraphs():
        w = utils.random_viterbi_potentials(h)

        original_path = ph.best_path(h, w)
        new_hyper, new_potentials = ph.prune_hypergraph(h, w, -0.99)
        prune_path = ph.best_path(new_hyper, new_potentials)
        assert len(original_path.edges) > 0
        for edge in original_path.edges:
            assert edge in prune_path
        valid_path(new_hyper, prune_path)

        original_score = w.dot(original_path)
        print original_score
        print new_potentials.dot(prune_path)
        nt.assert_almost_equal(original_score,
                               new_potentials.dot(prune_path))

        # Test pruning amount.
        prune = 0.001
        max_marginals = ph.compute_marginals(h, w)
        new_hyper, new_potentials = ph.prune_hypergraph(h, w, prune)

        assert (len(new_hyper.edges) > 0)
        original_edges = {}
        for edge in h.edges:
            original_edges[edge.label] = edge

        new_edges = {}
        for edge in new_hyper.edges:
            new_edges[edge.label] = edge

        for name, edge in new_edges.iteritems():

            orig = original_edges[name]
            nt.assert_almost_equal(w[orig], new_potentials[edge])
            m = max_marginals[orig]
            nt.assert_greater(m, prune)
Beispiel #6
0
    def backtrace(self, item):
        self.hypergraph = self.chart.finish()

        scores = np.zeros(len(self.hypergraph.edges))
        self.skips = np.zeros(len(self.hypergraph.edges))
        self.scores = scores

        for edge_num, label, tail_labels in self.hypergraph.node_labels():
            scores[edge_num] = self.score(label, len(tail_labels))
        self.pot = ph.LogViterbiPotentials(self.hypergraph) \
            .from_array(scores)

        if self.m != None:
            counts = np.zeros(len(self.hypergraph.edges), dtype=np.int32)
            if self.m > (self.n / 2):
                m2 = self.n - self.m
                for edge_num, label, tail_labels in self.hypergraph.node_labels():
                    typ, d, s, t, _ = label
                    if typ == interface.Tri and d == interface.Right and len(tail_labels) == 1:
                        if t != s:
                            counts[edge_num] = (t - s)

                self.counts = ph.CountingPotentials(self.hypergraph) \
                    .from_array(counts)
                hmap = ph.extend_hypergraph_by_count(self.hypergraph, self.counts, 0, m2, m2)
            else:
                for edge_num, label in self.hypergraph.head_labels():
                    counts[edge_num] = 1 if (label[0] == interface.Trap) else 0

                self.counts = ph.CountingPotentials(self.hypergraph) \
                    .from_array(counts)
                hmap = ph.extend_hypergraph_by_count(self.hypergraph, self.counts, 0, self.m, self.m)

            new_pot = self.pot.up_project(hmap.domain_hypergraph, hmap)
            path = ph.best_path(hmap.domain_hypergraph, new_pot)
        else:
            path = ph.best_path(self.hypergraph, self.pot)
        return [node.label for node in path.nodes]
def test_best_path():
    """
    Test viterbi path finding.
    """
    for h in hypergraphs():
        w = utils.random_log_viterbi_potentials(h)
        path = ph.best_path(h, w)
        nt.assert_not_equal(w.dot(path), 0.0)
        valid_path(h, path)
        same = False
        for other_path in utils.all_paths(h):
            assert w.dot(path) >= w.dot(other_path)
            if path == other_path: same = True
        assert same
Beispiel #8
0
    def regen(self, penalty, counts):
        # scores = np.zeros(len(self.hypergraph.edges))
        # for edge_num, label, tail_labels in self.hypergraph.node_labels():
        #     #scores[edge_num] = self.score(label, len(self.hypergraph.edges[edge_num].tail))
        #     #len(self.hypergraph.edges[edge_num].tail)
        #     #scores[edge_num] = self.score(label, len(tail_labels))
        #     typ, d, s, t, _ = label
        #     if typ == interface.Trap:
        #         if d == interface.Left: s, t = t, s
        #         scores[edge_num] = scorer.arc_score(s, t)
        #     if typ == interface.Tri and d == interface.Right and len(tail_labels) == 1:
        #         scores[edge_num] =  scorer.bigram_score(s, t+1) - \
        #             ((t+1 - s - 1) * scorer.skip_penalty) if s != t+1 else 0.0

        self.pot = ph.LogViterbiPotentials(self.hypergraph) \
            .from_array(self.scores + (penalty * counts))
        path = ph.best_path(self.hypergraph, self.pot)
        return [node.label for node in path.nodes]
def test_outside():
    """
    Test outside chart properties.
    """
    for h in hypergraphs():
        w = utils.random_viterbi_potentials(h)
        path = ph.best_path(h, w)
        chart = ph.inside_values(h, w)
        best = w.dot(path)
        nt.assert_not_equal(best, 0.0)
        out_chart = ph.outside_values(h, w, chart)
        for node in h.nodes:
            other = chart[node] * out_chart[node]
            nt.assert_less_equal(other, best + 1e-4)
        for edge in path.edges:
            for node in edge.tail:
                if node.is_terminal:
                    nt.assert_almost_equal(other, best)
def test_variables():
    """
    Test variable constraint checking.
    """
    for h in hypergraphs():
        w = utils.random_viterbi_potentials(h)
        variables, edge = random_constraint_trans(h)
        path = ph.best_path(h, w)
        match = list(variables.check(path))
        if edge not in path:
            print "Should not have", edge.id
            assert "have" in match
            assert "not" not in match
        else:
            print "Should have", edge.id
            assert "have" not in match

        nt.assert_equal(len(match), 1)
    def parse(self, sentence):
        words = sentence.strip().split(" ")
        n = len(words)
        nodes = {}
        for i, word in enumerate(words):
            if word not in self.terminals or self.terminals[word] < 5:
                words[i] = '_RARE_'

        sentence_graph = ph.Hypergraph()
        with sentence_graph.builder() as b:
            for i, word in enumerate(words, start=1):
                relevant_rules = (rule for rule in self.get_unary_rules()
                                  if rule.rhs_first == word)
                r = RuleSpan(word, i, i)
                nodes[r] = b.add_node(label=r)
                for rule in relevant_rules:
                    nodes[RuleSpan(rule.lhs, i, i)] = b.add_node(
                        [([nodes[RuleSpan(rule.rhs_first, i, i)]], rule)],
                        label=RuleSpan(rule.lhs, i, i))
            for l in xrange(1, n):
                for i in xrange(1, n-l+1):
                    j = i+l
                    for nonterminal in list(self.multinomials) + ["S"]:
                        edgelist = []
                        for rule in self.multinomials[nonterminal]:
                            if rule.unary:
                                continue
                            for s in xrange(i, j):
                                rule_span1 = RuleSpan(rule.rhs_first, i, s)
                                rule_span2 = RuleSpan(rule.rhs_second, s+1, j)
                                if rule_span1 in nodes.keys()\
                                        and rule_span2 in nodes.keys():
                                    edgelist.append((
                                        [nodes[rule_span1],
                                         nodes[rule_span2]],
                                        rule))
                        if edgelist:
                            rs = RuleSpan(nonterminal, i, j)
                            nodes[rs] = b.add_node(edgelist, label=rs)
        weights = ph.Potentials(sentence_graph).build(self.build_potentials)
        path = ph.best_path(sentence_graph, weights)
        for edge in path.edges:
            print edge.label, self.build_potentials(edge.label)
def test_lp():
    import pydecode.lp as lp
    for h in hypergraphs():
        w = utils.random_log_viterbi_potentials(h)

        g = lp.HypergraphLP.make_lp(h, w)
        g.solve()
        path = g.path
        opath = ph.best_path(h, w)

        nt.assert_almost_equal(w.dot(path), w.dot(opath))
        for edge in path.edges:
            assert edge in opath


        # Constraint.
        constraints, edge = random_have_constraint(h)
        g = lp.HypergraphLP.make_lp(h, w)
        g.add_constraints(constraints)
        g.solve()
        assert edge in g.path
Beispiel #13
0
#     0.398276388645
#     0.686978042126
#     0.0620245561004
#     0.156915932894
#     0.227964177728
#     0.761591732502
#     0.0153166977689
#     0.402361214161
#     0.468028366566
#     0.351031720638
#     0.130741521716
# 

# In[55]:

path = hyper.best_path(hyper1, potentials)
potentials.dot(path)


# Out[55]:

#     3.458035945892334

# In[56]:

import pydecode.optimization as opt
cpath = opt.best_constrained_path(hyper1, potentials, constraints)


# In[57]:
Beispiel #14
0
 def best_path(self):
     viterbi_potentials = self.viterbi_potentials()
     return ph.best_path(self.hypergraph, viterbi_potentials)
def test_diff_potentials_fail():
    h1, w1 = random_hypergraph()
    h2, w2 = random_hypergraph()
    ph.best_path(h1, w2)
for edge in hyper1.edges:
    print hyper1.label(edge), potentials[edge]


# Out[]:

#     First Edge 1.0
#     Second Edge 5.0
#     Third Edge 5.0
# 

# We use the best path.

# In[ ]:

path = ph.best_path(hyper1, potentials)


# In[ ]:

print potentials.dot(path)


# Out[]:

#     6.0
# 

# In[ ]:

display.HypergraphFormatter(hyper1).to_ipython()
Beispiel #17
0
     n3 = b.add_node(label = "c")
     n4 = b.add_node(label = "d")
     n5 = b.add_node((([n1, n2], "edge1"),), label = "e")
     b.add_node([([n5], "edge3"), ([n3, n4], "edge2")], label = "root")

def build_potentials(label):
     return {"edge1" : 3, "edge2" : 1, "edge3" : 1}[label]
potentials = ph.Potentials(hyp).build(build_potentials)


# Draw the graph

# In[3]:

display.HypergraphPotentialFormatter(hyp, potentials).to_ipython()


# Out[3]:

#     <IPython.core.display.Image at 0x2fb1410>

# In[4]:

path = ph.best_path(hyp, potentials)
display.HypergraphPathFormatter(hyp, [path]).to_ipython()


# Out[4]:

#     <IPython.core.display.Image at 0x33e2910>
Beispiel #18
0
    TypeError: not all arguments converted during string formatting


# In[ ]:

def build_potentials(arc):
    print arc
    return random.random()
potentials = ph.Potentials(hypergraph).build(build_potentials)

# phyper, ppotentials = ph.prune_hypergraph(hypergraph, potentials, 0.5)


# In[ ]:

path = ph.best_path(hypergraph, potentials)
best = potentials.dot(path)
maxmarginals = ph.compute_marginals(hypergraph, potentials)
avg = 0.0
for edge in hypergraph.edges:
    avg += float(maxmarginals[edge])
avg = avg / float(len(hypergraph.edges))
thres = ((0.9) * best + (0.1) * avg)

kept = set()
for edge in hypergraph.edges:
    score = float(maxmarginals[edge])
    if score >= thres:
        kept.add(edge.id)

Beispiel #19
0
for edge in hyper1.edges:
    print hyper1.label(edge), weights[edge]


# Out[14]:

#     First Edge 1.0
#     Second Edge 5.0
#     Third Edge 5.0
# 

# We use the best path.

# In[15]:

path = ph.best_path(hyper1, weights)


# In[16]:

print weights.dot(path)


# Out[16]:

#     6.0
# 

# In[17]:

display.HypergraphFormatter(hyper1).to_ipython()
Beispiel #20
0
 def decode_fractional(self):
     vec = [pulp.value(self.edge_vars[edge.id])
            for edge in self.hypergraph.edges]
     weights = ph.LogViterbiPotentials(self.hypergraph).from_vector(vec)
     return ph.best_path(self.hypergraph, weights)