def __init__(self, top_node, edge, rule, sentence): self.local_node_cxt = LocalNodeContext(top_node, sentence) self.node = top_node self.edge = edge self.rule = rule self.sent = sentence self.fields = dict((a, b) for a,b in edge.fvector.iteritems()) self.cluster_rhs = self.rule.rhs self.treelet = RuleTree.from_lhs_string(self.rule.lhs) self.sent = sentence self.clustering = False
def extract_fsa(self, node): "Constructs the segment of the fsa associated with a node in the forest" # memoization if we have done this node already if self.memo.has_key(node.position_id): return self.memo[node.position_id] # Create the FSA state for this general node (non marked) # (These will go away during minimization) down_state = fsa.BasicState(self.fsa, (node, DOWN)) #self.create_state((node, DOWN), False) up_state = fsa.BasicState(self.fsa, (node, UP)) #self.create_state((node, UP), False) self.memo[node.position_id] = (down_state, up_state) for edge in node.edges: previous_state = down_state # start experiment # Enumerate internal (non-local terminal) nodes on left hand side lhs = edge.rule.lhs lhs_treelet = RuleTree.from_lhs_string(edge.rule.lhs) def non_fringe(tree): "get the non terminals that are not part of the fringe" if not tree.subs: return [] return [tree.label] + sum(map(non_fringe, tree.subs), []) lhs_internal = sum(map(non_fringe,lhs_treelet.subs), []) print "INTERNAL", lhs_internal for i, nt in enumerate(lhs_internal): extra = "+++"+str(edge.position_id)+"+++"+str(i-10) fake_down_state = self.create_state((str(nt)+extra, DOWN), False) fake_up_state = self.create_state((str(nt)+extra, UP), False) previous_state.add_edge(fake_down_state, 0.0) fake_down_state.add_edge(fake_up_state, 0.0) previous_state = fake_up_state # end experiment rhs = edge.rule.rhs # always start with the parent down state ( . P ) nts_num =0 for i,sym in enumerate(rhs): extra = "+++"+str(edge.position_id)+"+++"+str(i) # next is a word ( . lex ) if is_lex(sym): if self.unique_words: new_state = self.create_state((sym+extra, DOWN), True) else: new_state = self.create_state(sym, True, extra) previous_state.add_edge(new_state, 0.0) # Move the dot ( lex . ) previous_state = new_state else: # it's a symbol # local symbol name (lagrangians!) to_node = edge.subs[nts_num] nts_num += 1 # We are at (. N_id ) need to get to ( N_id .) # First, Create a unique named version of this state (. N_id) and ( N_id . ) # We need these so that we can assign lagrangians local_down_state = self.create_state((str(to_node)+extra, DOWN), False) local_up_state = self.create_state((str(to_node)+extra, UP), False) down_sym, up_sym = self.extract_fsa(to_node) previous_state.add_edge(local_down_state, 0.0) local_down_state.add_edge(down_sym, 0.0) up_sym.add_edge(local_up_state, 0.0) # move the dot previous_state = local_up_state # for nt in lhs_internal: # extra = "+++"+str(edge.position_id)+"+++-1" # local_up_state = self.create_state((str(nt)+extra, UP), False) # previous_state.add_edge(local_up_state,0.0) # previous_state = local_up_state #extra = "+++"+str(edge.position_id)+"+++"+str(i + 1) #end_hyp_edge = self.create_state(("edge"+extra, (edge.rule.tree_size(), edge.fvector["text-length"], edge.fvector) ), False) #previous_state.add_edge(end_hyp_edge, 0.0) #previous_state = end_hyp_edge # Finish by connecting back to parent up previous_state.add_edge(up_state, 0.0) return self.memo[node.position_id]