Beispiel #1
0
    def expand_unary(self, i, j):
        """Finish bin (i,j) by building items with unary productions."""
        agenda = [(self.nonterminals.getrank(item.x), totalcost, item) for (totalcost, item) in self.bins[i][j]]
        heapq.heapify(agenda)
        while len(agenda) > 0:
            (trank, _, titem) = heapq.heappop(agenda)
            if log.level >= 3:
                log.write("Applying unary rules to %s\n" % titem)

            # it may happen that the item was defeated or pruned before we got to it
            if titem not in self.bins[i][j].index:
                continue

            for (g,dotchart) in self.grammars:
                if g.filterspan(i,j,self.n):
                    for (estcost, r) in g.unary_rules.get(titem.x, ()):
                        rank = self.nonterminals.getrank(r.lhs)

                        # if the new item isn't of lower priority
                        # than the current trigger item (because of
                        # a unary cycle), adding it could corrupt
                        # the forest
                        if rank <= trank:
                            self.unary_pruned += 1
                            continue

                        (totalcost, (cost, dcost, newstates)) = self.compute_item(r, (titem,), i, j)
                        ded = forest.Deduction((titem,), r, dcost, viterbi=cost)
                        item = forest.Item(r.lhs, i, j, deds=[ded], states=newstates, viterbi=cost)
                        if self.bins[i][j].add(totalcost, item):
                            heapq.heappush(agenda, (rank, totalcost, item))
Beispiel #2
0
 def expand_goal(self, bin1):
     for (cost1, item1) in bin1:
         if item1.x == self.start_nonterminal:
             if log.level >= 3:
                 log.write("Considering: %s\n" % str(item1))
             dcost = sum((m.finaltransition(item1.states[m_i]) for (m_i,m) in enumerate(self.models)), svector.Vector())
             cost = item1.viterbi+self.weights.dot(dcost)
             ded = forest.Deduction((item1,), None, dcost, viterbi=cost)
             self.goal.add(cost, forest.Item(None, 0, self.n, deds=[ded], states=(), viterbi=cost))
Beispiel #3
0
 def add_axiom(self, i, j, r):
     bin = self.bins[i][j]
     (totalcost, (cost, dcost, newstates)) = self.compute_item(r, (), i, j)
     if totalcost < bin.cutoff:
         ded = forest.Deduction((), r, dcost, viterbi=cost)
         item = forest.Item(r.lhs, i, j, deds=[ded], states=newstates, viterbi=cost)
         bin.add(totalcost, item)
     else:
         if log.level >= 4:
             log.write("Prepruning: %s\n" % r)
         self.prepruned += 1
Beispiel #4
0
def make_forest(fieldss):
    nodes = {}
    goal_ids = set()
    for fields in fieldss:
        node_id = fields['hyp']
        if node_id not in nodes:
            nodes[node_id] = forest.Item(sym.fromtag('PHRASE'), 0, 0, [])
        node = nodes[node_id]

        if node_id == 0:
            r = rule.Rule(sym.fromtag('PHRASE'), rule.Phrase([]), rule.Phrase([]))
            node.deds.append(forest.Deduction((), r, svector.Vector()))
        else:
            m = scores_re.match(fields['scores'])
            core_values = [float(x) for x in m.group(1).split(',')]
            dcost = svector.Vector(m.group(2).encode('utf8'))
            for i, x in enumerate(core_values):
                dcost["_core%d" % i] = x

            back = int(fields['back'])
            ant = nodes[back]
            f = fields['src-phrase'].encode('utf8').split()
            e = fields['tgt-phrase'].encode('utf8').split()
            if len(f) != int(fields['cover-end']) - int(fields['cover-start']) + 1:
                sys.stderr.write("warning: French phrase length didn't match covered length\n")

            f = rule.Phrase([sym.setindex(sym.fromtag('PHRASE'), 1)] + f)
            e = rule.Phrase([sym.setindex(sym.fromtag('PHRASE'), 1)] + e)
            r = rule.Rule(sym.fromtag('PHRASE'), f, e)

            ded = forest.Deduction((ant,), r, dcost)
            node.deds.append(ded)

            if int(fields['forward']) < 0: # goal
                goal_ids.add(node_id)

    goal = forest.Item(None, 0, 0, [])
    for node_id in goal_ids:
        goal.deds.append(forest.Deduction((nodes[node_id],), None, svector.Vector()))
    return goal
Beispiel #5
0
    def expand_cell(self, i, j, bintuples):
        """Fill bin (i,j).
        bintuples is a list of (rule, bin, ...) tuples where rule matches
        the input span (i,j) and the bins are the bins of potential antcedents.
        """
        bin = self.bins[i][j]

        for bins in bintuples:
            for (rscore,r) in bins[0]:
                if r.arity() == 1:
                    for (ant1score,ant1) in bins[1]:
                        (totalcost, (cost, dcost, newstates)) = self.compute_item(r, (ant1,), i, j)
                        if totalcost < bin.cutoff:
                            ded = forest.Deduction((ant1,), r, dcost, viterbi=cost)
                            item = forest.Item(r.lhs, i, j, deds=[ded], states=newstates, viterbi=cost)
                            bin.add(totalcost, item)
                        else:
                            if log.level >= 4:
                                log.write("Prepruning: %s (totalcost=%f, cutoff=%f)\n" % (r, totalcost, bin.cutoff))
                            self.prepruned += 1

                elif r.arity() == 2:
                    for (ant1score,ant1) in bins[1]:
                        for (ant2score,ant2) in bins[2]:
                            (totalcost, (cost, dcost, newstates)) = self.compute_item(r, (ant1,ant2), i, j)
                            if totalcost < bin.cutoff:
                                ded = forest.Deduction((ant1,ant2), r, dcost, viterbi=cost)
                                item = forest.Item(r.lhs, i, j, deds=[ded], states=newstates, viterbi=cost)
                                bin.add(totalcost, item)
                            else:
                                if log.level >= 4:
                                    log.write("Prepruning: %s (totalcost=%f, cutoff=%f)\n" % (r, totalcost, bin.cutoff))
                                self.prepruned += 1

                else:
                    log.write("this shouldn't happen")
Beispiel #6
0
    def expand_cell_cubeprune(self, i, j, cubes):
        # initialize candidate list
        cand = []
        index = collections.defaultdict(int)
        for cube in cubes:
            if len(cube) > 0:
                ranks = cube.first()
                r, ants = cube[ranks]
                (totalcost, info) = self.compute_item(r, ants, i, j,
                                                      cube.latticev)
                cand.append((totalcost, info, cube, ranks))
                index[cube, ranks] += 1
        heapq.heapify(cand)

        bin = self.bins[i][j]

        popped = 0
        while len(cand) > 0 and (self.pop_limit is None
                                 or popped < self.pop_limit):
            # Get the best item on the heap
            (totalcost, (cost, dcost, newstates), cube,
             ranks) = heapq.heappop(cand)
            popped += 1
            r, ants = cube[ranks]

            if totalcost < bin.cutoff:
                # Turn it into a real Item
                ded = forest.Deduction(ants, r, dcost, viterbi=cost)
                item = forest.Item(r.lhs,
                                   i,
                                   j,
                                   deds=[ded],
                                   states=newstates,
                                   viterbi=cost)
                bin.add(totalcost, item)
            else:
                self.prepruned += 1

            # Put item's successors into the heap
            for nextranks in cube.successors(ranks):
                index[cube, nextranks] += 1
                if index[cube, nextranks] == cube.n_predecessors(nextranks):
                    r, ants = cube[nextranks]
                    (totalcost, info) = self.compute_item(r, ants, i, j)
                    heapq.heappush(cand, (totalcost, info, cube, nextranks))

        self.discarded += len(cand)
        self.max_popped = max(self.max_popped, popped)
Beispiel #7
0
    def expand_cell_cubeprune(self, i, j, bintuples):
        """Fill bin (i,j).
        bintuples is a list of (rule, bin, ...) tuples where rule matches
        the input span (i,j) and the bins are the bins of potential antecedents.
        """
        # initialize candidate list
        cand = []
        index = collections.defaultdict(int)
        for bins in bintuples:
            if log.level >= 3:
                log.write("Enqueueing cube %s\n" % ",".join(str(bin) for bin in bins))
            for bin in bins:
                if len(bin) == 0:
                    break
            else:
                r = bins[0][0][1]

                ants = tuple([bin[0][1] for bin in bins[1:]])
                (totalcost, info) = self.compute_item(r, ants, i, j)
                ranks = tuple([0 for bin in bins])
                cand.append((totalcost, info, bins, ranks))
                index[(bins,ranks)] += 1
        heapq.heapify(cand)

        bin = self.bins[i][j]

        popped = 0
        while len(cand) > 0 and (self.pop_limit is None or popped < self.pop_limit):

            (totalcost, (cost, dcost, newstates), bins, ranks) = heapq.heappop(cand)
            popped += 1

            if log.level >= 3:
                log.write("pop %d: totalcost=%s cutoff=%s\n" % (popped, totalcost, bin.cutoff))
            r = bins[0][ranks[0]][1]
            ants = [bins[bj][ranks[bj]][1] for bj in xrange(1,len(bins))]

            if totalcost < bin.cutoff:
                ded = forest.Deduction(ants, r, dcost, viterbi=cost)
                item = forest.Item(r.lhs, i, j, deds=[ded], states=newstates, viterbi=cost)
                bin.add(totalcost, item)
            else:
                if log.level >= 4:
                    log.write("Prepruning: %s (totalcost=%f, cutoff=%f)\n" % (r, totalcost, bin.cutoff))
                self.prepruned += 1
                # but we're still going to visit its successors

                # If the top item fell outside the beam, bet that the rest of the heap
                # will too
                #break

            # Put item's successors into the heap
            for bi in xrange(len(bins)):
                nextranks = list(ranks)
                nextranks[bi] += 1
                nextranks = tuple(nextranks)
                if nextranks[bi] < len(bins[bi]):
                    index[bins, nextranks] += 1

                    n_predecessors = len([rank for rank in nextranks if rank > 0])
                    if index[bins, nextranks] == n_predecessors:

                        if bi == 0:
                            save = r
                            r = bins[bi][nextranks[bi]][1]
                        else:
                            save = ants[bi-1]
                            ants[bi-1] = bins[bi][nextranks[bi]][1]

                        (totalcost, info) = self.compute_item(r, ants, i, j)

                        heapq.heappush(cand, (totalcost, info, bins, nextranks))
                        if log.level >= 3:
                            log.write(" push: totalcost=%s\n" % totalcost)

                        if bi == 0:
                            r = save
                        else:
                            ants[bi-1] = save

        self.discarded += len(cand)
        self.max_popped = max(self.max_popped, popped)