def decode_earley(self): """Returns None if no goal item found""" self.initialize_earley() #logger.level = 5 for i, j in cyk_spans(self.N): if logger.level >= 4: logger.writeln() logger.writeln('---- span (%s %s) ----' % (i, j)) # finish dot chart, build a cube new_items = Cube() new_virtual_items = Cube() for dotchart in self.dotcharts: if logger.level >= 4: logger.writeln() logger.writeln('dot chart for %s' % dotchart.grammar.name) dotchart.expand(i, j) for dotitem in dotchart.bins[i][j]: if dotitem.node.filled: for lhs, rulebin in dotitem.node.iter_rulebins(): bins = (rulebin, ) + dotitem.ants if is_virtual(lhs): new_virtual_items.add_cube( bins, self.get_cube_op(i, j)) else: new_items.add_cube(bins, self.get_cube_op(i, j)) self.cubes_built += 1 if logger.level >= 4: logger.writeln(' -- cubes --') logger.writeln(new_items) logger.writeln(' -- cubes for virtual items--') logger.writeln(new_virtual_items) # pop new items from the cube for cube in [new_items, new_virtual_items]: #print '================' #print cube for new_item in cube.iter_top_univar(FLAGS.bin_size): self.nonunary_edges_proposed += 1 #if logger.level >= 4: # logger.writeln('cube pop: %s' % new_item) # logger.writeln(new_item.incoming[0]) added = self.chart.add(new_item) #if logger.level >= 4: # logger.writeln('added: %s' % added) if logger.level >= 4 and added: logger.writeln('cube pop and add: %s' % new_item) logger.writeln(new_item.incoming[0]) #print '----------------' # apply unary rules self.unary_expand(i, j) # generate dot items like A->B.C (first nonterminal matched) # after the unary derivations are all finished for dotchart in self.dotcharts: if logger.level >= 4: logger.writeln() logger.writeln('unary expand for dot chart %s' % dotchart.grammar.name) dotchart.unary_expand(i, j) return self.get_goal(True)
def binary_expand(self, i, j): if logger.level >= 4: logger.writeln('span %s %s' % (i, j)) new_items = Cube() for k in range(i + 1, j): for lvar, lbin in self.chart.iter_items_by_nts(i, k): for rvar, rbin in self.chart.iter_items_by_nts(k, j): for grammar in self.grammars: rulebin = grammar.itg.get_sorted_rules((lvar, rvar)) if rulebin: new_items.add_cube((rulebin, lbin, rbin), self.get_cube_op(i, j)) for new_item in new_items.iter_top(FLAGS.bin_size): if logger.level >= 4: logger.writeln(new_item) self.chart.add(new_item)
class Node(object): def __init__(self): self.incoming = [] self.nout = 0 # number of outgoing edges self.hg = None # hypergraph this node belongs to self.best_paths_list = None def add_incoming(self, edge): self.incoming.append(edge) edge.head = self def best_paths(self): """returns a virtual list (class Cube) of best paths. node.best_paths()[i] is the i'th best path.""" if self.best_paths_list == None: if self.incoming: self.best_paths_list = Cube() for edge in self.incoming: cube = [n.best_paths() for n in edge.tail] self.best_paths_list.add_cube(cube, edge.make_path) else: # initialization for leaf nodes self.best_paths_list = [None] return self.best_paths_list def id_str(self): """The id of a node is assigned after topological sort in reversed topological order. (Root has id 0.) Use python object id if this node is not assigned a id""" if hasattr(self, 'id'): return str(self.id) else: return 'obj%s' % id(self) def dot_label(self, detailed=True): """Returns label used in dot representation.""" if detailed: return '%s: %s' % (self.id_str(), escape_quote(str(self))) else: return '%s' % self.id_str() def dot(self, color='', detailed=True): """dot language representation of this node and its incoming edges""" result = 'n%s [label="%s" style="filled" color="%s"];\n' % \ (self.id_str(), self.dot_label(detailed=detailed), color) # write hyperedges for i, edge in enumerate(self.incoming): edgename = 'e%s_%s' % (self.id_str(), i) # graph node for hyperedge result += '%s [shape="point"]\n' % edgename # hyperedge head result += '%s -> n%s [label="%s"]\n' % \ (edgename, edge.head.id_str(), escape_quote(str(edge)) if detailed else '') # hyperedge tails for tailnode in edge.tail: result += 'n%s -> %s [dir="none"]\n' % \ (tailnode.id_str(), edgename) return result def neighbors(self, max_dist=3): """return a set of nodes who are within max_dist of self""" # TODO: this may have problems because the set doesn't # compare object id but uses user defined comparison methods # TODO: outgoing edges are no longer saved found = set() found.add(self) queue = [(self, 0)] while queue: node, d = queue.pop(0) if d < max_dist: for edge in node.outgoing: if edge.head not in found: found.add(edge.head) queue.append((edge.head, d + 1)) for edge in node.incoming: for tailnode in edge.tail: if tailnode not in found: found.add(tailnode) queue.append((tailnode, d + 1)) return found def serialize(self): return '[%s]' % self.id def deserialize(self, s): s = s.strip() assert s.startswith('[') and s.endswith(']') self.id = int(s[1:-1]) def show_neighborhood(self, max_dist=3, detailed=True): """show the neighborhood of this node in a picture""" dotstr = '' for node in self.neighbors(max_dist): if node is self: dotstr += node.dot(color='dodgerblue', detailed=detailed) else: dotstr += node.dot(detailed=detailed) dotstr = 'digraph hypergraph {\nrankdir=BT\n%s}\n' % dotstr f = open('/tmp/dotty', 'w') f.write(dotstr) f.close() os.system('cat /tmp/dotty | dot -Tgif > /tmp/dotty.gif') os.system('eog /tmp/dotty.gif')