Esempio n. 1
0
  def __init__(self, top_node, edge, rule, sentence):
    self.local_node_cxt = LocalNodeContext(top_node, sentence)

    self.node = top_node
    self.edge = edge
    self.rule = rule
    self.sent = sentence
    
    self.fields = dict((a, b) for a,b in edge.fvector.iteritems())

    self.cluster_rhs = self.rule.rhs

    self.treelet = RuleTree.from_lhs_string(self.rule.lhs)
    self.sent = sentence

    self.clustering = False
Esempio n. 2
0
  def extract_fsa(self, node):
    "Constructs the segment of the fsa associated with a node in the forest"
    # memoization if we have done this node already
    if self.memo.has_key(node.position_id):
      return self.memo[node.position_id]

    # Create the FSA state for this general node (non marked)
    # (These will go away during minimization)
    down_state = fsa.BasicState(self.fsa, (node, DOWN)) #self.create_state((node, DOWN), False)
    up_state = fsa.BasicState(self.fsa, (node, UP)) #self.create_state((node, UP), False)
    self.memo[node.position_id] = (down_state, up_state)
    

    for edge in node.edges:
      previous_state = down_state
      # start experiment
      # Enumerate internal (non-local terminal) nodes on left hand side 
      lhs = edge.rule.lhs
      
      lhs_treelet = RuleTree.from_lhs_string(edge.rule.lhs)
      def non_fringe(tree):
        "get the non terminals that are not part of the fringe"
        if not tree.subs:
          return []
        return [tree.label] + sum(map(non_fringe, tree.subs), [])
      lhs_internal = sum(map(non_fringe,lhs_treelet.subs), [])
      print "INTERNAL", lhs_internal
      for i, nt in enumerate(lhs_internal):
        extra = "+++"+str(edge.position_id)+"+++"+str(i-10)
        fake_down_state = self.create_state((str(nt)+extra, DOWN), False)
        fake_up_state = self.create_state((str(nt)+extra, UP), False)        
        previous_state.add_edge(fake_down_state, 0.0)
        fake_down_state.add_edge(fake_up_state, 0.0)
        previous_state = fake_up_state
      
      # end experiment


      rhs = edge.rule.rhs
      
      # always start with the parent down state ( . P ) 
      
      nts_num =0 
      for i,sym in enumerate(rhs):
        extra = "+++"+str(edge.position_id)+"+++"+str(i)

        # next is a word ( . lex ) 
        if is_lex(sym):

          if self.unique_words:
            new_state = self.create_state((sym+extra, DOWN), True)

          else:
            new_state = self.create_state(sym, True, extra)

          previous_state.add_edge(new_state, 0.0)

          # Move the dot ( lex . )
          previous_state = new_state          
        else:
          # it's a symbol

          # local symbol name (lagrangians!)
          to_node = edge.subs[nts_num]
          nts_num += 1
          
          # We are at (. N_id ) need to get to ( N_id .) 

          # First, Create a unique named version of this state (. N_id) and ( N_id . )
          # We need these so that we can assign lagrangians
          local_down_state = self.create_state((str(to_node)+extra, DOWN), False)
          local_up_state = self.create_state((str(to_node)+extra, UP), False)

          down_sym, up_sym = self.extract_fsa(to_node)
          
          previous_state.add_edge(local_down_state, 0.0)
          local_down_state.add_edge(down_sym, 0.0)
          up_sym.add_edge(local_up_state, 0.0)

          # move the dot
          previous_state = local_up_state


      # for nt in lhs_internal:
#         extra = "+++"+str(edge.position_id)+"+++-1"
#         local_up_state = self.create_state((str(nt)+extra, UP), False)        
#         previous_state.add_edge(local_up_state,0.0)
#         previous_state = local_up_state


      #extra = "+++"+str(edge.position_id)+"+++"+str(i + 1)
      
      #end_hyp_edge = self.create_state(("edge"+extra, (edge.rule.tree_size(), edge.fvector["text-length"], edge.fvector) ), False)
      #previous_state.add_edge(end_hyp_edge, 0.0)
      #previous_state = end_hyp_edge

      # Finish by connecting back to parent up
      previous_state.add_edge(up_state, 0.0)
    return self.memo[node.position_id]