def decode_earley(self): """Returns None if no goal item found""" self.initialize_earley() #logger.level = 5 for i, j in cyk_spans(self.N): if logger.level >= 4: logger.writeln() logger.writeln('---- span (%s %s) ----' % (i, j)) # finish dot chart, build a cube new_items = Cube() new_virtual_items = Cube() for dotchart in self.dotcharts: if logger.level >= 4: logger.writeln() logger.writeln('dot chart for %s' % dotchart.grammar.name) dotchart.expand(i, j) for dotitem in dotchart.bins[i][j]: if dotitem.node.filled: for lhs, rulebin in dotitem.node.iter_rulebins(): bins = (rulebin, ) + dotitem.ants if is_virtual(lhs): new_virtual_items.add_cube( bins, self.get_cube_op(i, j)) else: new_items.add_cube(bins, self.get_cube_op(i, j)) self.cubes_built += 1 if logger.level >= 4: logger.writeln(' -- cubes --') logger.writeln(new_items) logger.writeln(' -- cubes for virtual items--') logger.writeln(new_virtual_items) # pop new items from the cube for cube in [new_items, new_virtual_items]: #print '================' #print cube for new_item in cube.iter_top_univar(FLAGS.bin_size): self.nonunary_edges_proposed += 1 #if logger.level >= 4: # logger.writeln('cube pop: %s' % new_item) # logger.writeln(new_item.incoming[0]) added = self.chart.add(new_item) #if logger.level >= 4: # logger.writeln('added: %s' % added) if logger.level >= 4 and added: logger.writeln('cube pop and add: %s' % new_item) logger.writeln(new_item.incoming[0]) #print '----------------' # apply unary rules self.unary_expand(i, j) # generate dot items like A->B.C (first nonterminal matched) # after the unary derivations are all finished for dotchart in self.dotcharts: if logger.level >= 4: logger.writeln() logger.writeln('unary expand for dot chart %s' % dotchart.grammar.name) dotchart.unary_expand(i, j) return self.get_goal(True)
def score_rule(self, rule): """computing feature scores for a rule, giving the rule 'cost' and 'fcosts' fields. this uses only stateless features """ cost = 0 fcosts = [] for feature, weight in self.stateless: fcost = feature.weight(rule) cost += fcost*weight fcosts.append(fcost) # the cost of virtual rules are used as a heuristic only if is_virtual(rule.lhs): rule.cost = 0 rule.hcost = cost rule.fcosts = [0] * len(fcosts) else: rule.cost = cost rule.fcosts = fcosts rule.hcost = 0
def key(self, item): """return the index of the bin an item belongs to""" if item.goal(): return 'GOAL' elif item.var == FLAGS.glue_var: return ('GLUE', item.i, item.j) elif is_virtual(item.var): return ('VIRTUAL', item.i, item.j) elif item.var == FLAGS.straight_var: return ('STRAIGHT', item.i, item.j) elif item.var == FLAGS.inverted_var: return ('INVERTED', item.i, item.j) elif item.var == self.start_symbol: if item.i != 0: return None else: return ('START', item.j) else: # TODO: filtering like this doesn't work # if item.j - item.i > 10: # return None # else: return (item.i, item.j)