Beispiel #1
0
  def createEdge(self, childEdges, currentNode, span):
    """
    Create a new edge from the list of edges 'edge'.
    Creating an edge involves:
    (1) Initializing the PartialGridAlignment data structure
    (2) Adding links (f,e) to list newEdge.links
    (3) setting the score of the edge with scoreEdge(newEdge, ...)
    In addition, set the score of the new edge.
    """
    newEdge = PartialGridAlignment()
    newEdge.scoreVector_local = svector.Vector()
    newEdge.scoreVector = svector.Vector()

    for e in childEdges:
      newEdge.links += e.links
      newEdge.scoreVector_local += e.scoreVector_local
      newEdge.scoreVector += e.scoreVector

      if e.boundingBox is None:
        e.boundingBox = self.boundingBox(e.links)
    score, boundingBox = self.scoreEdge(newEdge,
                                        currentNode,
                                        span,
                                        childEdges)
    return newEdge, boundingBox
    def createEdge(self, childEdges, currentNode, span, hyperEdge):
        """
        Create a new edge from the list of edges 'edge'.
        Creating an edge involves:
        (1) Initializing the PartialGridAlignment data structure
        (2) Adding links (f,e) to list newEdge.links
        (3) setting the score of the edge with scoreEdge(newEdge, ...)
        In addition, set the score of the new edge.
        """
        newEdge = PartialGridAlignment()
        newEdge.decodingPath.data = currentNode.data
        newEdge.decodingPath.isDummy = False
        newEdge.scoreVector_local = svector.Vector()
        newEdge.scoreVector = svector.Vector()
        newEdge.hyperEdgeScore = hyperEdge.score

        for index, e in enumerate(childEdges):
            newEdge.links += e.getDepthAddedLink()
            newEdge.scoreVector_local += e.scoreVector_local
            # TOP node does not have local hypothesis so there is only one childedge
            if currentNode.data["word_id"] != e.decodingPath.data["word_id"]:
                newEdge.decodingPath.addChild(e.decodingPath)

            newEdge.scoreVector += e.scoreVector

            if e.boundingBox is None:
                e.boundingBox = self.boundingBox(e.links)

        score, boundingBox = self.scoreEdge(newEdge, currentNode, span, childEdges)
        return newEdge, boundingBox
    def createDummyEdge(self, childEdges, currentNode, dummyCurrentNode, span, hyperEdge, isLastMerge = True):

        newEdge = PartialGridAlignment()
        newEdge.decodingPath.data = dummyCurrentNode.data
        newEdge.decodingPath.isDummy = not isLastMerge
        newEdge.scoreVector_local = svector.Vector()
        newEdge.scoreVector = svector.Vector()
        newEdge.hyperEdgeScore = hyperEdge.score

        for index, e in enumerate(childEdges):
            if isLastMerge:
                newEdge.links += e.getDepthAddedLink()
            else:
                newEdge.links += e.links

            newEdge.scoreVector_local += e.scoreVector_local
            # TOP node does not have local hypothesis so there is only one childedge
            if currentNode.data["word_id"] != e.decodingPath.data["word_id"]:
                newEdge.decodingPath.addChild(e.decodingPath)
                e.decodingPath.parent = newEdge.decodingPath

            newEdge.scoreVector += e.scoreVector

            if e.boundingBox is None:
                e.boundingBox = self.boundingBox(e.links)
        score, boundingBox = self.scoreEdge(newEdge, currentNode, span, childEdges)
        return newEdge, boundingBox
Beispiel #4
0
   def createEdge(self, childEdges, currentNode, span):
     """
     Create a new edge from the list of edges 'edge'.
     Creating an edge involves:
     (1) Initializing the PartialGridAlignment data structure
     (2) Adding links (f,e) to list newEdge.links
     (3) setting the score of the edge with scoreEdge(newEdge, ...)
     In addition, set the score of the new edge.
     """
     newEdge = PartialGridAlignment()
     newEdge.scoreVector_local = svector.Vector()
     newEdge.scoreVector = svector.Vector()
 
     for e in childEdges:
         newEdge.links += e.links
         newEdge.scoreVector_local += e.scoreVector_local
         newEdge.scoreVector += e.scoreVector
 
         if e.boundingBox is None:
             e.boundingBox = self.boundingBox(e.links)
     score, boundingBox = self.scoreEdge(newEdge, currentNode, span, childEdges)
     return newEdge, boundingBox
Beispiel #5
0
  def terminal_operation(self, index, currentNode = None):
    """
    Fire features at (pre)terminal nodes of the tree.
    """
    ##################################################
    # Setup
    ##################################################

    partialAlignments = []
    partialAlignments_hope = []
    partialAlignments_fear = []
    oracleAlignment = None

    heapify(partialAlignments)

    tgtWordList = self.f
    srcWordList = self.e
    tgtWord = None
    srcWord = currentNode.children[0].data
    srcTag = currentNode.data
    tgtIndex = None
    srcIndex = currentNode.children[0].eIndex

    span = (srcIndex, srcIndex)

    ##################################################
    # null partial alignment ( assign no links )
    ##################################################
    tgtIndex = -1
    tgtWord = '*NULL*'
    scoreVector = svector.Vector()
    # Compute feature score

    for k, func in enumerate(self.featureTemplates):
      value_dict = func(self.info, tgtWord, srcWord, tgtIndex, srcIndex, [], self.diagValues, currentNode)
      for name, value in value_dict.iteritems():
        if value != 0:
          scoreVector[name] += value

    nullPartialAlignment = PartialGridAlignment()
    nullPartialAlignment.score = score = scoreVector.dot(self.weights)
    nullPartialAlignment.scoreVector = scoreVector
    nullPartialAlignment.scoreVector_local = svector.Vector(scoreVector)

    self.addPartialAlignment(partialAlignments, nullPartialAlignment, self.BEAM_SIZE)

    if self.COMPUTE_ORACLE or self.COMPUTE_FEAR:
      nullPartialAlignment.fscore = self.ff_fscore(nullPartialAlignment, span)

      if self.COMPUTE_ORACLE:
        oracleAlignment = nullPartialAlignment
      if self.COMPUTE_HOPE:
        nullPartialAlignment.hope = nullPartialAlignment.fscore + nullPartialAlignment.score
        self.addPartialAlignment_hope(partialAlignments_hope, nullPartialAlignment, self.BEAM_SIZE)
      if self.COMPUTE_FEAR:
        nullPartialAlignment.fear = (1 - nullPartialAlignment.fscore) + nullPartialAlignment.score
        self.addPartialAlignment_fear(partialAlignments_fear, nullPartialAlignment, self.BEAM_SIZE)

    ##################################################
    # Single-link alignment
    ##################################################
    bestTgtWords = []
    for tgtIndex, tgtWord in enumerate(tgtWordList):
      currentLinks = [(tgtIndex, srcIndex)]
      scoreVector = svector.Vector()

      for k, func in enumerate(self.featureTemplates):
        value_dict = func(self.info, tgtWord, srcWord, tgtIndex, srcIndex, currentLinks, self.diagValues, currentNode)
        for name, value in value_dict.iteritems():
          if value != 0:
            scoreVector[name] += value

      # Keep track of scores for all 1-link partial alignments
      score = scoreVector.dot(self.weights)
      bestTgtWords.append((score, tgtIndex))

      singleLinkPartialAlignment = PartialGridAlignment()
      singleLinkPartialAlignment.score = score
      singleLinkPartialAlignment.scoreVector = scoreVector
      singleLinkPartialAlignment.scoreVector_local = svector.Vector(scoreVector)
      singleLinkPartialAlignment.links = currentLinks

      self.addPartialAlignment(partialAlignments, singleLinkPartialAlignment, self.BEAM_SIZE)

      if self.COMPUTE_ORACLE or self.COMPUTE_FEAR:
        singleLinkPartialAlignment.fscore = self.ff_fscore(singleLinkPartialAlignment, span)

        if self.COMPUTE_ORACLE:
          if singleLinkPartialAlignment.fscore > oracleAlignment.fscore:
            oracleAlignment = singleLinkPartialAlignment

        if self.COMPUTE_HOPE:
          singleLinkPartialAlignment.hope = singleLinkPartialAlignment.fscore + singleLinkPartialAlignment.score
          self.addPartialAlignment_hope(partialAlignments_hope, singleLinkPartialAlignment, self.BEAM_SIZE)

        if self.COMPUTE_FEAR:
          singleLinkPartialAlignment.fear = (1-singleLinkPartialAlignment.fscore)+singleLinkPartialAlignment.score
          self.addPartialAlignment_fear(partialAlignments_fear, singleLinkPartialAlignment, self.BEAM_SIZE)

    ##################################################
    # Two link alignment
    ##################################################
    # Get ready for 2-link alignments

    # Sort the fwords by score
    bestTgtWords.sort(reverse=True)
    LIMIT = max(10, len(bestTgtWords)/2)

    for index1, obj1 in enumerate(bestTgtWords[0:LIMIT]):
      for _, obj2 in enumerate(bestTgtWords[index1+1:LIMIT]):
        # clear contents of twoLinkPartialAlignment
        tgtIndex_a = obj1[1]
        tgtIndex_b = obj2[1]
        # Don't consider a pair (tgtIndex_a, tgtIndex_b) if distance between
        # these indices > 1 (Arabic/English only).
        # Need to debug feature that is supposed to deal with this naturally.
        if self.LANG == "ar_en":
          if (abs(tgtIndex_b - tgtIndex_a) > 1):
            continue

        tgtWord_a = tgtWordList[tgtIndex_a]
        tgtWord_b = tgtWordList[tgtIndex_b]
        currentLinks = [(tgtIndex_a, srcIndex), (tgtIndex_b, srcIndex)]

        scoreVector = svector.Vector()
        for k, func in enumerate(self.featureTemplates):
          value_dict = func(self.info, tgtWord, srcWord,
                            tgtIndex, srcIndex, currentLinks,
                            self.diagValues, currentNode)
          for name, value in value_dict.iteritems():
            if value != 0:
              scoreVector[name] += value

        score = scoreVector.dot(self.weights)

        twoLinkPartialAlignment = PartialGridAlignment()
        twoLinkPartialAlignment.score = score
        twoLinkPartialAlignment.scoreVector = scoreVector
        twoLinkPartialAlignment.scoreVector_local = svector.Vector(scoreVector)
        twoLinkPartialAlignment.links = currentLinks

        self.addPartialAlignment(partialAlignments, twoLinkPartialAlignment, self.BEAM_SIZE)
        if self.COMPUTE_ORACLE or self.COMPUTE_FEAR:
          twoLinkPartialAlignment.fscore = self.ff_fscore(twoLinkPartialAlignment, span)

          if self.COMPUTE_ORACLE:
            if twoLinkPartialAlignment.fscore > oracleAlignment.fscore:
              oracleAlignment = twoLinkPartialAlignment

          if self.COMPUTE_HOPE:
            twoLinkPartialAlignment.hope = twoLinkPartialAlignment.fscore + twoLinkPartialAlignment.score
            self.addPartialAlignment_hope(partialAlignments_hope, twoLinkPartialAlignment, self.BEAM_SIZE)

          if self.COMPUTE_FEAR:
            twoLinkPartialAlignment.fear = (1-twoLinkPartialAlignment.fscore)+twoLinkPartialAlignment.score
            self.addPartialAlignment_fear(partialAlignments_fear, twoLinkPartialAlignment, self.BEAM_SIZE)

    ########################################################################
    # Finalize. Sort model-score list and then hope list.
    ########################################################################
    # Sort model score list.
    sortedBestFirstPartialAlignments = []
    while len(partialAlignments) > 0:
      sortedBestFirstPartialAlignments.insert(0,heappop(partialAlignments))
    # Sort hope score list.
    if self.COMPUTE_HOPE:
      sortedBestFirstPartialAlignments_hope = []
      while len(partialAlignments_hope) > 0:
        (_, obj) = heappop(partialAlignments_hope)
        sortedBestFirstPartialAlignments_hope.insert(0,obj)
    # Sort fear score list.
    if self.COMPUTE_FEAR:
      sortedBestFirstPartialAlignments_fear = []
      while len(partialAlignments_fear) > 0:
        (_, obj) = heappop(partialAlignments_fear)
        sortedBestFirstPartialAlignments_fear.insert(0, obj)

    currentNode.partialAlignments = sortedBestFirstPartialAlignments
    if self.COMPUTE_FEAR:
      currentNode.partialAlignments_fear = sortedBestFirstPartialAlignments_fear
    if self.COMPUTE_HOPE:
      currentNode.partialAlignments_hope = sortedBestFirstPartialAlignments_hope
    if self.COMPUTE_ORACLE:
      currentNode.oracle = None
      # Oracle BEFORE beam is applied
      currentNode.oracle = oracleAlignment
Beispiel #6
0
   def terminal_operation(self, currentNode = None):
       """
       Fire features at (pre)terminal nodes of the tree.
       """
       ##################################################
       # Setup
       ##################################################
 
       partialAlignments = []
       partialAlignments_hope = []
       partialAlignments_fear = []
       oracleAlignment = None
 
       heapify(partialAlignments)
 
       tgtWordList = self.f
       srcWordList = self.e
       tgtWord = None
       srcWord = currentNode.data["surface"]
       srcTag = currentNode.data["pos"]
       tgtIndex = None
       srcIndex = currentNode.eIndex
 
       span = (srcIndex, srcIndex)
 
       ##################################################
       # null partial alignment ( assign no links )
       ##################################################
       tgtIndex = -1
       tgtWord = '*NULL*'
       scoreVector = svector.Vector()
       # Compute feature score
 
       for k, func in enumerate(self.featureTemplates):
           value_dict = func(self.info, tgtWord, srcWord, tgtIndex, srcIndex, [], self.diagValues, currentNode)
           for name, value in value_dict.iteritems():
               if value != 0:
                   scoreVector[name] += value
 
       nullPartialAlignment = PartialGridAlignment()
       nullPartialAlignment.score = score = scoreVector.dot(self.weights)
       nullPartialAlignment.scoreVector = scoreVector
       nullPartialAlignment.scoreVector_local = svector.Vector(scoreVector)
       self.addPartialAlignment(partialAlignments, nullPartialAlignment, self.BEAM_SIZE)
 
       if self.COMPUTE_ORACLE or self.COMPUTE_FEAR:
           nullPartialAlignment.fscore = self.ff_fscore(nullPartialAlignment, span)
           if self.COMPUTE_ORACLE:
               oracleAlignment = nullPartialAlignment
           if self.COMPUTE_HOPE:
               nullPartialAlignment.hope = nullPartialAlignment.fscore + nullPartialAlignment.score
               self.addPartialAlignment_hope(partialAlignments_hope, nullPartialAlignment, self.BEAM_SIZE)
           if self.COMPUTE_FEAR:
               nullPartialAlignment.fear = (1 - nullPartialAlignment.fscore) + nullPartialAlignment.score
               self.addPartialAlignment_fear(partialAlignments_fear, nullPartialAlignment, self.BEAM_SIZE)
 
       ##################################################
       # Single-link alignment
       ##################################################
       singleBestAlignment = []
       alignmentList = []
       for tgtIndex, tgtWord in enumerate(tgtWordList):
         currentLinks = [(tgtIndex, srcIndex)]
         scoreVector = svector.Vector()
 
         for k, func in enumerate(self.featureTemplates):
           value_dict = func(self.info, tgtWord, srcWord, tgtIndex, srcIndex, currentLinks, self.diagValues, currentNode)
           for name, value in value_dict.iteritems():
             if value != 0:
               scoreVector[name] += value
 
         # Keep track of scores for all 1-link partial alignments
         score = scoreVector.dot(self.weights)
         singleBestAlignment.append((score, [tgtIndex]))
 
         singleLinkPartialAlignment = PartialGridAlignment()
         singleLinkPartialAlignment.score = score
         singleLinkPartialAlignment.scoreVector = scoreVector
         singleLinkPartialAlignment.scoreVector_local = svector.Vector(scoreVector)
         singleLinkPartialAlignment.links = currentLinks
 
         self.addPartialAlignment(partialAlignments, singleLinkPartialAlignment, self.BEAM_SIZE)
 
         if self.COMPUTE_ORACLE or self.COMPUTE_FEAR:
           singleLinkPartialAlignment.fscore = self.ff_fscore(singleLinkPartialAlignment, span)
 
           if self.COMPUTE_ORACLE:
             if singleLinkPartialAlignment.fscore > oracleAlignment.fscore:
               oracleAlignment = singleLinkPartialAlignment
 
           if self.COMPUTE_HOPE:
             singleLinkPartialAlignment.hope = singleLinkPartialAlignment.fscore + singleLinkPartialAlignment.score
             self.addPartialAlignment_hope(partialAlignments_hope, singleLinkPartialAlignment, self.BEAM_SIZE)
 
           if self.COMPUTE_FEAR:
             singleLinkPartialAlignment.fear = (1-singleLinkPartialAlignment.fscore)+singleLinkPartialAlignment.score
             self.addPartialAlignment_fear(partialAlignments_fear, singleLinkPartialAlignment, self.BEAM_SIZE)
       alignmentList = singleBestAlignment
       singleBestAlignment.sort(reverse=True)
       ##################################################
       # N link alignment(N>=2)
       ##################################################
       # Get ready for N-link alignments(N>=2)
       for i in xrange(2,self.nto1+1): 
         # Sort the fwords by score
         alignmentList.sort(reverse=True)
         newAlignmentList = []
         LIMIT_1 = max(10, self.lenF/2)
         LIMIT_N = max(10, self.lenF/i)
         for (_,na) in alignmentList[0:LIMIT_N]:# na means n link alignment
           for (_, sa) in singleBestAlignment[0:LIMIT_1]:#sa means single-link alignment
             if(na[-1]>=sa[0]):#sa actually always have only one element
               continue
             # clear contents of twoLinkPartialAlignment
             tgtIndex_a = na[-1]
             tgtIndex_b = sa[0]
             # Don't consider a pair (tgtIndex_a, tgtIndex_b) if distance between
             # these indices > 1 (Arabic/English only).
             # Need to debug feature that is supposed to deal with this naturally.
             if self.LANG == "ar_en":
               if (abs(tgtIndex_b - tgtIndex_a) > 1):
                 continue
 
             currentLinks = list(map(lambda x: (x,srcIndex),na+sa))
               
             scoreVector = svector.Vector()
             for k, func in enumerate(self.featureTemplates):
               value_dict = func(self.info, tgtWord, srcWord,
                                 tgtIndex, srcIndex, currentLinks,
                                 self.diagValues, currentNode)
               for name, value in value_dict.iteritems():
                 if value != 0:
                   scoreVector[name] += value
 
             score = scoreVector.dot(self.weights)
             newAlignmentList.append((score, na+sa))
 
             NLinkPartialAlignment = PartialGridAlignment()
             NLinkPartialAlignment.score = score
             NLinkPartialAlignment.scoreVector = scoreVector
             NLinkPartialAlignment.scoreVector_local = svector.Vector(scoreVector)
             NLinkPartialAlignment.links = currentLinks
             self.addPartialAlignment(partialAlignments, NLinkPartialAlignment, self.BEAM_SIZE)
             if self.COMPUTE_ORACLE or self.COMPUTE_FEAR:
               NLinkPartialAlignment.fscore = self.ff_fscore(NLinkPartialAlignment, span)
 
               if self.COMPUTE_ORACLE:
                 if NLinkPartialAlignment.fscore > oracleAlignment.fscore:
                   oracleAlignment = NLinkPartialAlignment
 
               if self.COMPUTE_HOPE:
                 NLinkPartialAlignment.hope = NLinkPartialAlignment.fscore + NLinkPartialAlignment.score
                 self.addPartialAlignment_hope(partialAlignments_hope, NLinkPartialAlignment, self.BEAM_SIZE)
 
               if self.COMPUTE_FEAR:
                 NLinkPartialAlignment.fear = (1-NLinkPartialAlignment.fscore)+NLinkPartialAlignment.score
                 self.addPartialAlignment_fear(partialAlignments_fear, NLinkPartialAlignment, self.BEAM_SIZE)
         alignmentList = newAlignmentList 
 
       ########################################################################
       # Finalize. Sort model-score list and then hope list.
       ########################################################################
       # Sort model score list.
       sortedBestFirstPartialAlignments = []
       while len(partialAlignments) > 0:
         sortedBestFirstPartialAlignments.insert(0,heappop(partialAlignments))
       # Sort hope score list.
       if self.COMPUTE_HOPE:
         sortedBestFirstPartialAlignments_hope = []
         while len(partialAlignments_hope) > 0:
           (_, obj) = heappop(partialAlignments_hope)
           sortedBestFirstPartialAlignments_hope.insert(0,obj)
       # Sort fear score list.
       if self.COMPUTE_FEAR:
         sortedBestFirstPartialAlignments_fear = []
         while len(partialAlignments_fear) > 0:
           (_, obj) = heappop(partialAlignments_fear)
           sortedBestFirstPartialAlignments_fear.insert(0, obj)
 
       currentNode.partialAlignments = sortedBestFirstPartialAlignments
       if self.COMPUTE_FEAR:
         currentNode.partialAlignments_fear = sortedBestFirstPartialAlignments_fear
       if self.COMPUTE_HOPE:
         currentNode.partialAlignments_hope = sortedBestFirstPartialAlignments_hope
       if self.COMPUTE_ORACLE:
         currentNode.oracle = None
         # Oracle BEFORE beam is applied
         currentNode.oracle = oracleAlignment