def extract_kbest_forest(self, forest, kbest = 1): "extract the forest created by the kbest final parses" marked_edges = set() done_nodes = set([forest.root]) edge_stack = sum([item.last_edges for item in self.hypothesis_cache[forest.root][0:kbest]], []) #print [str(edge.rule) for edge in edge_stack] #print "Marked edges" while edge_stack: #print edge_stack edge, vecj = edge_stack.pop() marked_edges.add(edge.position_id) for sub, j in izip(edge.subs, vecj): #if sub in done_nodes: continue #done_nodes.add(sub) item = self.hypothesis_cache[sub][j] #print item.full_derivation edge_stack.extend(item.last_edges) def node_pruning(node): return False def edge_pruning(edge): return edge.position_id not in marked_edges return general_prune(forest, node_pruning, edge_pruning)
def extract_pruned_forest(self, forest, extract = 1): "extract the forest created by cube pruning" def node_pruning(node): if self.hypothesis_cache.has_key(node): return False return True def edge_pruning(edge): #print "need edge %s %s" %(edge,edge.position_id) node = edge.head hypvec = self.hypothesis_cache[node] for i, item in enumerate(hypvec): #print "Last edge is %s %s" % (item.last_edge,item.last_edge.position_id) if edge.position_id in [e.position_id for e in item.last_edges] : return False if i == extract: break return True return general_prune(forest, node_pruning, edge_pruning)