Exemple #1
0
  def bottom_up_visit(self):
    """
    Visit each node in the tree, bottom up, and in level-order.

    ###########################################################
    # bottom_up_visit(self):
    # traverse etree bottom-up, in level order
    # (1) Add terminal nodes to the visit queue
    # (2) As each node is visited, add its parent to the visit
    #     queue if not already on the queue
    # (3) During each visit, perform the proper alignment function
    #     depending on the type of node: 'terminal' or 'non-terminal'
    ###########################################################
    """
    queue = [ ]
    if self.etree.data is None:
      empty = PartialGridAlignment()
      empty.score = None
      if self.etree.partialAlignments is not None:
          self.etree.partialAlignments.append(empty)
      else:
          self.etree.partialAlignments = [empty]
      self.etree.oracle = PartialGridAlignment()
      return

    # Add first-level nodes to the queue
    for terminal in self.etree.getTerminals():
      queue.append(terminal)

    # Visit each node in the queue and put parent
    # in queue if not there already
    # Parent is there already if it is the last one in the queue
    while len(queue) > 0:
      currentNode = queue.pop(0)

      # Put parent in the queue if it is not there already
      # We are guaranteed to have visited all of a node's children before we visit that node
      if (currentNode.parent is not None) and (len(queue) == 0 or queue[-1] is not currentNode.parent()):
        if abs(currentNode.parent().depth() - currentNode.depth()) == 1:
          queue.append(currentNode.parent())

      # Visit node here.
      # if currentNode.isTerminal():
      # Is current node a preterminal?
      if len(currentNode.children[0].children) == 0:
        self.terminal_operation(currentNode.eIndex, currentNode)
      else:
        self.nonterminal_operation_cube(currentNode)
    def bottom_up_visit(self):
        """
        Visit each node in the tree, bottom up, and in level-order.

        ###########################################################
        # bottom_up_visit(self):
        # traverse etree bottom-up, in level order
        # (1) Add terminal nodes to the visit queue
        # (2) As each node is visited, add its parent to the visit
        #     queue if not already on the queue
        # (3) During each visit, perform the proper alignment function
        #     depending on the type of node: 'terminal' or 'non-terminal'
        ###########################################################
        """
        queue = [ ]
        if self.etree is None:
            empty = PartialGridAlignment()
            empty.score = None
            self.etree.partialAlignments["hyp"].append(empty)
            self.etree.partialAlignments["oracle"] = PartialGridAlignment()
            return

        # Add first-level nodes to the queue
        # for terminal in sorted(list(self.etree.getTerminals()),key=lambda x: x.data["word_id"]):
        for terminal in self.etree.getTerminals():
            queue.append(terminal)


        # Visit each node in the queue and put parent
        # in queue if not there already
        # Parent is there already if it is the last one in the queue
        while len(queue) > 0:
            currentNode = queue.pop(0)
            # Put parent in the queue if it is not there already
            # We are guaranteed to have visited all of a node's children before we visit that node
            for edgeToParent in currentNode.parent:
                edgeToParent["parent"].unprocessedChildNum -= 1
                if edgeToParent["parent"].unprocessedChildNum == 0:
                    queue.append(edgeToParent["parent"])

            # Visit node here.
            if currentNode.data["pos"] != "TOP":
                self.terminal_operation(currentNode)
            if len(currentNode.hyperEdges) > 0:
                self.nonterminal_operation_cube(currentNode)

        return
Exemple #3
0
   def bottom_up_visit(self):
     """
     Visit each node in the tree, bottom up, and in level-order.
 
     ###########################################################
     # bottom_up_visit(self):
     # traverse etree bottom-up, in level order
     # (1) Add terminal nodes to the visit queue
     # (2) As each node is visited, add its parent to the visit
     #     queue if not already on the queue
     # (3) During each visit, perform the proper alignment function
     #     depending on the type of node: 'terminal' or 'non-terminal'
     ###########################################################
     """
     queue = [ ]
     if self.etree is None or self.etree.data is None:
       empty = PartialGridAlignment()
       empty.score = None
       self.etree.partialAlignments.append(empty)
       self.etree.oracle = PartialGridAlignment()
       return
     # print self.etree 
     # Add first-level nodes to the queue
     for terminal in self.etree.getTreeTerminals():
         queue.append(terminal)
     # Visit each node in the queue and put parent
     # in queue if not there already
     # Parent is there already if it is the last one in the queue
     while len(queue) > 0:
         currentNode = queue.pop(0)
       # Put parent in the queue if it is not there already
       # We are guaranteed to have visited all of a node's children before we visit that node
         if (currentNode.parent is not None) and (len(queue) == 0 or queue[-1] is not currentNode.parent()):
             if abs(currentNode.parent().depth() - currentNode.depth()) == 1:
                 queue.append(currentNode.parent())
 
       # Visit node here.
       # if currentNode.isTerminal():
       # Is current node a preterminal?
         self.terminal_operation(currentNode)
         if len(currentNode.children) > 0:
             self.nonterminal_operation_cube(currentNode)
Exemple #4
0
  def terminal_operation(self, index, currentNode = None):
    """
    Fire features at (pre)terminal nodes of the tree.
    """
    ##################################################
    # Setup
    ##################################################

    partialAlignments = []
    partialAlignments_hope = []
    partialAlignments_fear = []
    oracleAlignment = None

    heapify(partialAlignments)

    tgtWordList = self.f
    srcWordList = self.e
    tgtWord = None
    srcWord = currentNode.children[0].data
    srcTag = currentNode.data
    tgtIndex = None
    srcIndex = currentNode.children[0].eIndex

    span = (srcIndex, srcIndex)

    ##################################################
    # null partial alignment ( assign no links )
    ##################################################
    tgtIndex = -1
    tgtWord = '*NULL*'
    scoreVector = svector.Vector()
    # Compute feature score

    for k, func in enumerate(self.featureTemplates):
      value_dict = func(self.info, tgtWord, srcWord, tgtIndex, srcIndex, [], self.diagValues, currentNode)
      for name, value in value_dict.iteritems():
        if value != 0:
          scoreVector[name] += value

    nullPartialAlignment = PartialGridAlignment()
    nullPartialAlignment.score = score = scoreVector.dot(self.weights)
    nullPartialAlignment.scoreVector = scoreVector
    nullPartialAlignment.scoreVector_local = svector.Vector(scoreVector)

    self.addPartialAlignment(partialAlignments, nullPartialAlignment, self.BEAM_SIZE)

    if self.COMPUTE_ORACLE or self.COMPUTE_FEAR:
      nullPartialAlignment.fscore = self.ff_fscore(nullPartialAlignment, span)

      if self.COMPUTE_ORACLE:
        oracleAlignment = nullPartialAlignment
      if self.COMPUTE_HOPE:
        nullPartialAlignment.hope = nullPartialAlignment.fscore + nullPartialAlignment.score
        self.addPartialAlignment_hope(partialAlignments_hope, nullPartialAlignment, self.BEAM_SIZE)
      if self.COMPUTE_FEAR:
        nullPartialAlignment.fear = (1 - nullPartialAlignment.fscore) + nullPartialAlignment.score
        self.addPartialAlignment_fear(partialAlignments_fear, nullPartialAlignment, self.BEAM_SIZE)

    ##################################################
    # Single-link alignment
    ##################################################
    bestTgtWords = []
    for tgtIndex, tgtWord in enumerate(tgtWordList):
      currentLinks = [(tgtIndex, srcIndex)]
      scoreVector = svector.Vector()

      for k, func in enumerate(self.featureTemplates):
        value_dict = func(self.info, tgtWord, srcWord, tgtIndex, srcIndex, currentLinks, self.diagValues, currentNode)
        for name, value in value_dict.iteritems():
          if value != 0:
            scoreVector[name] += value

      # Keep track of scores for all 1-link partial alignments
      score = scoreVector.dot(self.weights)
      bestTgtWords.append((score, tgtIndex))

      singleLinkPartialAlignment = PartialGridAlignment()
      singleLinkPartialAlignment.score = score
      singleLinkPartialAlignment.scoreVector = scoreVector
      singleLinkPartialAlignment.scoreVector_local = svector.Vector(scoreVector)
      singleLinkPartialAlignment.links = currentLinks

      self.addPartialAlignment(partialAlignments, singleLinkPartialAlignment, self.BEAM_SIZE)

      if self.COMPUTE_ORACLE or self.COMPUTE_FEAR:
        singleLinkPartialAlignment.fscore = self.ff_fscore(singleLinkPartialAlignment, span)

        if self.COMPUTE_ORACLE:
          if singleLinkPartialAlignment.fscore > oracleAlignment.fscore:
            oracleAlignment = singleLinkPartialAlignment

        if self.COMPUTE_HOPE:
          singleLinkPartialAlignment.hope = singleLinkPartialAlignment.fscore + singleLinkPartialAlignment.score
          self.addPartialAlignment_hope(partialAlignments_hope, singleLinkPartialAlignment, self.BEAM_SIZE)

        if self.COMPUTE_FEAR:
          singleLinkPartialAlignment.fear = (1-singleLinkPartialAlignment.fscore)+singleLinkPartialAlignment.score
          self.addPartialAlignment_fear(partialAlignments_fear, singleLinkPartialAlignment, self.BEAM_SIZE)

    ##################################################
    # Two link alignment
    ##################################################
    # Get ready for 2-link alignments

    # Sort the fwords by score
    bestTgtWords.sort(reverse=True)
    LIMIT = max(10, len(bestTgtWords)/2)

    for index1, obj1 in enumerate(bestTgtWords[0:LIMIT]):
      for _, obj2 in enumerate(bestTgtWords[index1+1:LIMIT]):
        # clear contents of twoLinkPartialAlignment
        tgtIndex_a = obj1[1]
        tgtIndex_b = obj2[1]
        # Don't consider a pair (tgtIndex_a, tgtIndex_b) if distance between
        # these indices > 1 (Arabic/English only).
        # Need to debug feature that is supposed to deal with this naturally.
        if self.LANG == "ar_en":
          if (abs(tgtIndex_b - tgtIndex_a) > 1):
            continue

        tgtWord_a = tgtWordList[tgtIndex_a]
        tgtWord_b = tgtWordList[tgtIndex_b]
        currentLinks = [(tgtIndex_a, srcIndex), (tgtIndex_b, srcIndex)]

        scoreVector = svector.Vector()
        for k, func in enumerate(self.featureTemplates):
          value_dict = func(self.info, tgtWord, srcWord,
                            tgtIndex, srcIndex, currentLinks,
                            self.diagValues, currentNode)
          for name, value in value_dict.iteritems():
            if value != 0:
              scoreVector[name] += value

        score = scoreVector.dot(self.weights)

        twoLinkPartialAlignment = PartialGridAlignment()
        twoLinkPartialAlignment.score = score
        twoLinkPartialAlignment.scoreVector = scoreVector
        twoLinkPartialAlignment.scoreVector_local = svector.Vector(scoreVector)
        twoLinkPartialAlignment.links = currentLinks

        self.addPartialAlignment(partialAlignments, twoLinkPartialAlignment, self.BEAM_SIZE)
        if self.COMPUTE_ORACLE or self.COMPUTE_FEAR:
          twoLinkPartialAlignment.fscore = self.ff_fscore(twoLinkPartialAlignment, span)

          if self.COMPUTE_ORACLE:
            if twoLinkPartialAlignment.fscore > oracleAlignment.fscore:
              oracleAlignment = twoLinkPartialAlignment

          if self.COMPUTE_HOPE:
            twoLinkPartialAlignment.hope = twoLinkPartialAlignment.fscore + twoLinkPartialAlignment.score
            self.addPartialAlignment_hope(partialAlignments_hope, twoLinkPartialAlignment, self.BEAM_SIZE)

          if self.COMPUTE_FEAR:
            twoLinkPartialAlignment.fear = (1-twoLinkPartialAlignment.fscore)+twoLinkPartialAlignment.score
            self.addPartialAlignment_fear(partialAlignments_fear, twoLinkPartialAlignment, self.BEAM_SIZE)

    ########################################################################
    # Finalize. Sort model-score list and then hope list.
    ########################################################################
    # Sort model score list.
    sortedBestFirstPartialAlignments = []
    while len(partialAlignments) > 0:
      sortedBestFirstPartialAlignments.insert(0,heappop(partialAlignments))
    # Sort hope score list.
    if self.COMPUTE_HOPE:
      sortedBestFirstPartialAlignments_hope = []
      while len(partialAlignments_hope) > 0:
        (_, obj) = heappop(partialAlignments_hope)
        sortedBestFirstPartialAlignments_hope.insert(0,obj)
    # Sort fear score list.
    if self.COMPUTE_FEAR:
      sortedBestFirstPartialAlignments_fear = []
      while len(partialAlignments_fear) > 0:
        (_, obj) = heappop(partialAlignments_fear)
        sortedBestFirstPartialAlignments_fear.insert(0, obj)

    currentNode.partialAlignments = sortedBestFirstPartialAlignments
    if self.COMPUTE_FEAR:
      currentNode.partialAlignments_fear = sortedBestFirstPartialAlignments_fear
    if self.COMPUTE_HOPE:
      currentNode.partialAlignments_hope = sortedBestFirstPartialAlignments_hope
    if self.COMPUTE_ORACLE:
      currentNode.oracle = None
      # Oracle BEFORE beam is applied
      currentNode.oracle = oracleAlignment
Exemple #5
0
   def terminal_operation(self, currentNode = None):
       """
       Fire features at (pre)terminal nodes of the tree.
       """
       ##################################################
       # Setup
       ##################################################
 
       partialAlignments = []
       partialAlignments_hope = []
       partialAlignments_fear = []
       oracleAlignment = None
 
       heapify(partialAlignments)
 
       tgtWordList = self.f
       srcWordList = self.e
       tgtWord = None
       srcWord = currentNode.data["surface"]
       srcTag = currentNode.data["pos"]
       tgtIndex = None
       srcIndex = currentNode.eIndex
 
       span = (srcIndex, srcIndex)
 
       ##################################################
       # null partial alignment ( assign no links )
       ##################################################
       tgtIndex = -1
       tgtWord = '*NULL*'
       scoreVector = svector.Vector()
       # Compute feature score
 
       for k, func in enumerate(self.featureTemplates):
           value_dict = func(self.info, tgtWord, srcWord, tgtIndex, srcIndex, [], self.diagValues, currentNode)
           for name, value in value_dict.iteritems():
               if value != 0:
                   scoreVector[name] += value
 
       nullPartialAlignment = PartialGridAlignment()
       nullPartialAlignment.score = score = scoreVector.dot(self.weights)
       nullPartialAlignment.scoreVector = scoreVector
       nullPartialAlignment.scoreVector_local = svector.Vector(scoreVector)
       self.addPartialAlignment(partialAlignments, nullPartialAlignment, self.BEAM_SIZE)
 
       if self.COMPUTE_ORACLE or self.COMPUTE_FEAR:
           nullPartialAlignment.fscore = self.ff_fscore(nullPartialAlignment, span)
           if self.COMPUTE_ORACLE:
               oracleAlignment = nullPartialAlignment
           if self.COMPUTE_HOPE:
               nullPartialAlignment.hope = nullPartialAlignment.fscore + nullPartialAlignment.score
               self.addPartialAlignment_hope(partialAlignments_hope, nullPartialAlignment, self.BEAM_SIZE)
           if self.COMPUTE_FEAR:
               nullPartialAlignment.fear = (1 - nullPartialAlignment.fscore) + nullPartialAlignment.score
               self.addPartialAlignment_fear(partialAlignments_fear, nullPartialAlignment, self.BEAM_SIZE)
 
       ##################################################
       # Single-link alignment
       ##################################################
       singleBestAlignment = []
       alignmentList = []
       for tgtIndex, tgtWord in enumerate(tgtWordList):
         currentLinks = [(tgtIndex, srcIndex)]
         scoreVector = svector.Vector()
 
         for k, func in enumerate(self.featureTemplates):
           value_dict = func(self.info, tgtWord, srcWord, tgtIndex, srcIndex, currentLinks, self.diagValues, currentNode)
           for name, value in value_dict.iteritems():
             if value != 0:
               scoreVector[name] += value
 
         # Keep track of scores for all 1-link partial alignments
         score = scoreVector.dot(self.weights)
         singleBestAlignment.append((score, [tgtIndex]))
 
         singleLinkPartialAlignment = PartialGridAlignment()
         singleLinkPartialAlignment.score = score
         singleLinkPartialAlignment.scoreVector = scoreVector
         singleLinkPartialAlignment.scoreVector_local = svector.Vector(scoreVector)
         singleLinkPartialAlignment.links = currentLinks
 
         self.addPartialAlignment(partialAlignments, singleLinkPartialAlignment, self.BEAM_SIZE)
 
         if self.COMPUTE_ORACLE or self.COMPUTE_FEAR:
           singleLinkPartialAlignment.fscore = self.ff_fscore(singleLinkPartialAlignment, span)
 
           if self.COMPUTE_ORACLE:
             if singleLinkPartialAlignment.fscore > oracleAlignment.fscore:
               oracleAlignment = singleLinkPartialAlignment
 
           if self.COMPUTE_HOPE:
             singleLinkPartialAlignment.hope = singleLinkPartialAlignment.fscore + singleLinkPartialAlignment.score
             self.addPartialAlignment_hope(partialAlignments_hope, singleLinkPartialAlignment, self.BEAM_SIZE)
 
           if self.COMPUTE_FEAR:
             singleLinkPartialAlignment.fear = (1-singleLinkPartialAlignment.fscore)+singleLinkPartialAlignment.score
             self.addPartialAlignment_fear(partialAlignments_fear, singleLinkPartialAlignment, self.BEAM_SIZE)
       alignmentList = singleBestAlignment
       singleBestAlignment.sort(reverse=True)
       ##################################################
       # N link alignment(N>=2)
       ##################################################
       # Get ready for N-link alignments(N>=2)
       for i in xrange(2,self.nto1+1): 
         # Sort the fwords by score
         alignmentList.sort(reverse=True)
         newAlignmentList = []
         LIMIT_1 = max(10, self.lenF/2)
         LIMIT_N = max(10, self.lenF/i)
         for (_,na) in alignmentList[0:LIMIT_N]:# na means n link alignment
           for (_, sa) in singleBestAlignment[0:LIMIT_1]:#sa means single-link alignment
             if(na[-1]>=sa[0]):#sa actually always have only one element
               continue
             # clear contents of twoLinkPartialAlignment
             tgtIndex_a = na[-1]
             tgtIndex_b = sa[0]
             # Don't consider a pair (tgtIndex_a, tgtIndex_b) if distance between
             # these indices > 1 (Arabic/English only).
             # Need to debug feature that is supposed to deal with this naturally.
             if self.LANG == "ar_en":
               if (abs(tgtIndex_b - tgtIndex_a) > 1):
                 continue
 
             currentLinks = list(map(lambda x: (x,srcIndex),na+sa))
               
             scoreVector = svector.Vector()
             for k, func in enumerate(self.featureTemplates):
               value_dict = func(self.info, tgtWord, srcWord,
                                 tgtIndex, srcIndex, currentLinks,
                                 self.diagValues, currentNode)
               for name, value in value_dict.iteritems():
                 if value != 0:
                   scoreVector[name] += value
 
             score = scoreVector.dot(self.weights)
             newAlignmentList.append((score, na+sa))
 
             NLinkPartialAlignment = PartialGridAlignment()
             NLinkPartialAlignment.score = score
             NLinkPartialAlignment.scoreVector = scoreVector
             NLinkPartialAlignment.scoreVector_local = svector.Vector(scoreVector)
             NLinkPartialAlignment.links = currentLinks
             self.addPartialAlignment(partialAlignments, NLinkPartialAlignment, self.BEAM_SIZE)
             if self.COMPUTE_ORACLE or self.COMPUTE_FEAR:
               NLinkPartialAlignment.fscore = self.ff_fscore(NLinkPartialAlignment, span)
 
               if self.COMPUTE_ORACLE:
                 if NLinkPartialAlignment.fscore > oracleAlignment.fscore:
                   oracleAlignment = NLinkPartialAlignment
 
               if self.COMPUTE_HOPE:
                 NLinkPartialAlignment.hope = NLinkPartialAlignment.fscore + NLinkPartialAlignment.score
                 self.addPartialAlignment_hope(partialAlignments_hope, NLinkPartialAlignment, self.BEAM_SIZE)
 
               if self.COMPUTE_FEAR:
                 NLinkPartialAlignment.fear = (1-NLinkPartialAlignment.fscore)+NLinkPartialAlignment.score
                 self.addPartialAlignment_fear(partialAlignments_fear, NLinkPartialAlignment, self.BEAM_SIZE)
         alignmentList = newAlignmentList 
 
       ########################################################################
       # Finalize. Sort model-score list and then hope list.
       ########################################################################
       # Sort model score list.
       sortedBestFirstPartialAlignments = []
       while len(partialAlignments) > 0:
         sortedBestFirstPartialAlignments.insert(0,heappop(partialAlignments))
       # Sort hope score list.
       if self.COMPUTE_HOPE:
         sortedBestFirstPartialAlignments_hope = []
         while len(partialAlignments_hope) > 0:
           (_, obj) = heappop(partialAlignments_hope)
           sortedBestFirstPartialAlignments_hope.insert(0,obj)
       # Sort fear score list.
       if self.COMPUTE_FEAR:
         sortedBestFirstPartialAlignments_fear = []
         while len(partialAlignments_fear) > 0:
           (_, obj) = heappop(partialAlignments_fear)
           sortedBestFirstPartialAlignments_fear.insert(0, obj)
 
       currentNode.partialAlignments = sortedBestFirstPartialAlignments
       if self.COMPUTE_FEAR:
         currentNode.partialAlignments_fear = sortedBestFirstPartialAlignments_fear
       if self.COMPUTE_HOPE:
         currentNode.partialAlignments_hope = sortedBestFirstPartialAlignments_hope
       if self.COMPUTE_ORACLE:
         currentNode.oracle = None
         # Oracle BEFORE beam is applied
         currentNode.oracle = oracleAlignment