def createEdge(self, childEdges, currentNode, span): """ Create a new edge from the list of edges 'edge'. Creating an edge involves: (1) Initializing the PartialGridAlignment data structure (2) Adding links (f,e) to list newEdge.links (3) setting the score of the edge with scoreEdge(newEdge, ...) In addition, set the score of the new edge. """ newEdge = PartialGridAlignment() newEdge.scoreVector_local = svector.Vector() newEdge.scoreVector = svector.Vector() for e in childEdges: newEdge.links += e.links newEdge.scoreVector_local += e.scoreVector_local newEdge.scoreVector += e.scoreVector if e.boundingBox is None: e.boundingBox = self.boundingBox(e.links) score, boundingBox = self.scoreEdge(newEdge, currentNode, span, childEdges) return newEdge, boundingBox
def createEdge(self, childEdges, currentNode, span, hyperEdge): """ Create a new edge from the list of edges 'edge'. Creating an edge involves: (1) Initializing the PartialGridAlignment data structure (2) Adding links (f,e) to list newEdge.links (3) setting the score of the edge with scoreEdge(newEdge, ...) In addition, set the score of the new edge. """ newEdge = PartialGridAlignment() newEdge.decodingPath.data = currentNode.data newEdge.decodingPath.isDummy = False newEdge.scoreVector_local = svector.Vector() newEdge.scoreVector = svector.Vector() newEdge.hyperEdgeScore = hyperEdge.score for index, e in enumerate(childEdges): newEdge.links += e.getDepthAddedLink() newEdge.scoreVector_local += e.scoreVector_local # TOP node does not have local hypothesis so there is only one childedge if currentNode.data["word_id"] != e.decodingPath.data["word_id"]: newEdge.decodingPath.addChild(e.decodingPath) newEdge.scoreVector += e.scoreVector if e.boundingBox is None: e.boundingBox = self.boundingBox(e.links) score, boundingBox = self.scoreEdge(newEdge, currentNode, span, childEdges) return newEdge, boundingBox
def createDummyEdge(self, childEdges, currentNode, dummyCurrentNode, span, hyperEdge, isLastMerge = True): newEdge = PartialGridAlignment() newEdge.decodingPath.data = dummyCurrentNode.data newEdge.decodingPath.isDummy = not isLastMerge newEdge.scoreVector_local = svector.Vector() newEdge.scoreVector = svector.Vector() newEdge.hyperEdgeScore = hyperEdge.score for index, e in enumerate(childEdges): if isLastMerge: newEdge.links += e.getDepthAddedLink() else: newEdge.links += e.links newEdge.scoreVector_local += e.scoreVector_local # TOP node does not have local hypothesis so there is only one childedge if currentNode.data["word_id"] != e.decodingPath.data["word_id"]: newEdge.decodingPath.addChild(e.decodingPath) e.decodingPath.parent = newEdge.decodingPath newEdge.scoreVector += e.scoreVector if e.boundingBox is None: e.boundingBox = self.boundingBox(e.links) score, boundingBox = self.scoreEdge(newEdge, currentNode, span, childEdges) return newEdge, boundingBox
def createEdge(self, childEdges, currentNode, span): """ Create a new edge from the list of edges 'edge'. Creating an edge involves: (1) Initializing the PartialGridAlignment data structure (2) Adding links (f,e) to list newEdge.links (3) setting the score of the edge with scoreEdge(newEdge, ...) In addition, set the score of the new edge. """ newEdge = PartialGridAlignment() newEdge.scoreVector_local = svector.Vector() newEdge.scoreVector = svector.Vector() for e in childEdges: newEdge.links += e.links newEdge.scoreVector_local += e.scoreVector_local newEdge.scoreVector += e.scoreVector if e.boundingBox is None: e.boundingBox = self.boundingBox(e.links) score, boundingBox = self.scoreEdge(newEdge, currentNode, span, childEdges) return newEdge, boundingBox
def terminal_operation(self, index, currentNode = None): """ Fire features at (pre)terminal nodes of the tree. """ ################################################## # Setup ################################################## partialAlignments = [] partialAlignments_hope = [] partialAlignments_fear = [] oracleAlignment = None heapify(partialAlignments) tgtWordList = self.f srcWordList = self.e tgtWord = None srcWord = currentNode.children[0].data srcTag = currentNode.data tgtIndex = None srcIndex = currentNode.children[0].eIndex span = (srcIndex, srcIndex) ################################################## # null partial alignment ( assign no links ) ################################################## tgtIndex = -1 tgtWord = '*NULL*' scoreVector = svector.Vector() # Compute feature score for k, func in enumerate(self.featureTemplates): value_dict = func(self.info, tgtWord, srcWord, tgtIndex, srcIndex, [], self.diagValues, currentNode) for name, value in value_dict.iteritems(): if value != 0: scoreVector[name] += value nullPartialAlignment = PartialGridAlignment() nullPartialAlignment.score = score = scoreVector.dot(self.weights) nullPartialAlignment.scoreVector = scoreVector nullPartialAlignment.scoreVector_local = svector.Vector(scoreVector) self.addPartialAlignment(partialAlignments, nullPartialAlignment, self.BEAM_SIZE) if self.COMPUTE_ORACLE or self.COMPUTE_FEAR: nullPartialAlignment.fscore = self.ff_fscore(nullPartialAlignment, span) if self.COMPUTE_ORACLE: oracleAlignment = nullPartialAlignment if self.COMPUTE_HOPE: nullPartialAlignment.hope = nullPartialAlignment.fscore + nullPartialAlignment.score self.addPartialAlignment_hope(partialAlignments_hope, nullPartialAlignment, self.BEAM_SIZE) if self.COMPUTE_FEAR: nullPartialAlignment.fear = (1 - nullPartialAlignment.fscore) + nullPartialAlignment.score self.addPartialAlignment_fear(partialAlignments_fear, nullPartialAlignment, self.BEAM_SIZE) ################################################## # Single-link alignment ################################################## bestTgtWords = [] for tgtIndex, tgtWord in enumerate(tgtWordList): currentLinks = [(tgtIndex, srcIndex)] scoreVector = svector.Vector() for k, func in enumerate(self.featureTemplates): value_dict = func(self.info, tgtWord, srcWord, tgtIndex, srcIndex, currentLinks, self.diagValues, currentNode) for name, value in value_dict.iteritems(): if value != 0: scoreVector[name] += value # Keep track of scores for all 1-link partial alignments score = scoreVector.dot(self.weights) bestTgtWords.append((score, tgtIndex)) singleLinkPartialAlignment = PartialGridAlignment() singleLinkPartialAlignment.score = score singleLinkPartialAlignment.scoreVector = scoreVector singleLinkPartialAlignment.scoreVector_local = svector.Vector(scoreVector) singleLinkPartialAlignment.links = currentLinks self.addPartialAlignment(partialAlignments, singleLinkPartialAlignment, self.BEAM_SIZE) if self.COMPUTE_ORACLE or self.COMPUTE_FEAR: singleLinkPartialAlignment.fscore = self.ff_fscore(singleLinkPartialAlignment, span) if self.COMPUTE_ORACLE: if singleLinkPartialAlignment.fscore > oracleAlignment.fscore: oracleAlignment = singleLinkPartialAlignment if self.COMPUTE_HOPE: singleLinkPartialAlignment.hope = singleLinkPartialAlignment.fscore + singleLinkPartialAlignment.score self.addPartialAlignment_hope(partialAlignments_hope, singleLinkPartialAlignment, self.BEAM_SIZE) if self.COMPUTE_FEAR: singleLinkPartialAlignment.fear = (1-singleLinkPartialAlignment.fscore)+singleLinkPartialAlignment.score self.addPartialAlignment_fear(partialAlignments_fear, singleLinkPartialAlignment, self.BEAM_SIZE) ################################################## # Two link alignment ################################################## # Get ready for 2-link alignments # Sort the fwords by score bestTgtWords.sort(reverse=True) LIMIT = max(10, len(bestTgtWords)/2) for index1, obj1 in enumerate(bestTgtWords[0:LIMIT]): for _, obj2 in enumerate(bestTgtWords[index1+1:LIMIT]): # clear contents of twoLinkPartialAlignment tgtIndex_a = obj1[1] tgtIndex_b = obj2[1] # Don't consider a pair (tgtIndex_a, tgtIndex_b) if distance between # these indices > 1 (Arabic/English only). # Need to debug feature that is supposed to deal with this naturally. if self.LANG == "ar_en": if (abs(tgtIndex_b - tgtIndex_a) > 1): continue tgtWord_a = tgtWordList[tgtIndex_a] tgtWord_b = tgtWordList[tgtIndex_b] currentLinks = [(tgtIndex_a, srcIndex), (tgtIndex_b, srcIndex)] scoreVector = svector.Vector() for k, func in enumerate(self.featureTemplates): value_dict = func(self.info, tgtWord, srcWord, tgtIndex, srcIndex, currentLinks, self.diagValues, currentNode) for name, value in value_dict.iteritems(): if value != 0: scoreVector[name] += value score = scoreVector.dot(self.weights) twoLinkPartialAlignment = PartialGridAlignment() twoLinkPartialAlignment.score = score twoLinkPartialAlignment.scoreVector = scoreVector twoLinkPartialAlignment.scoreVector_local = svector.Vector(scoreVector) twoLinkPartialAlignment.links = currentLinks self.addPartialAlignment(partialAlignments, twoLinkPartialAlignment, self.BEAM_SIZE) if self.COMPUTE_ORACLE or self.COMPUTE_FEAR: twoLinkPartialAlignment.fscore = self.ff_fscore(twoLinkPartialAlignment, span) if self.COMPUTE_ORACLE: if twoLinkPartialAlignment.fscore > oracleAlignment.fscore: oracleAlignment = twoLinkPartialAlignment if self.COMPUTE_HOPE: twoLinkPartialAlignment.hope = twoLinkPartialAlignment.fscore + twoLinkPartialAlignment.score self.addPartialAlignment_hope(partialAlignments_hope, twoLinkPartialAlignment, self.BEAM_SIZE) if self.COMPUTE_FEAR: twoLinkPartialAlignment.fear = (1-twoLinkPartialAlignment.fscore)+twoLinkPartialAlignment.score self.addPartialAlignment_fear(partialAlignments_fear, twoLinkPartialAlignment, self.BEAM_SIZE) ######################################################################## # Finalize. Sort model-score list and then hope list. ######################################################################## # Sort model score list. sortedBestFirstPartialAlignments = [] while len(partialAlignments) > 0: sortedBestFirstPartialAlignments.insert(0,heappop(partialAlignments)) # Sort hope score list. if self.COMPUTE_HOPE: sortedBestFirstPartialAlignments_hope = [] while len(partialAlignments_hope) > 0: (_, obj) = heappop(partialAlignments_hope) sortedBestFirstPartialAlignments_hope.insert(0,obj) # Sort fear score list. if self.COMPUTE_FEAR: sortedBestFirstPartialAlignments_fear = [] while len(partialAlignments_fear) > 0: (_, obj) = heappop(partialAlignments_fear) sortedBestFirstPartialAlignments_fear.insert(0, obj) currentNode.partialAlignments = sortedBestFirstPartialAlignments if self.COMPUTE_FEAR: currentNode.partialAlignments_fear = sortedBestFirstPartialAlignments_fear if self.COMPUTE_HOPE: currentNode.partialAlignments_hope = sortedBestFirstPartialAlignments_hope if self.COMPUTE_ORACLE: currentNode.oracle = None # Oracle BEFORE beam is applied currentNode.oracle = oracleAlignment
def terminal_operation(self, currentNode = None): """ Fire features at (pre)terminal nodes of the tree. """ ################################################## # Setup ################################################## partialAlignments = [] partialAlignments_hope = [] partialAlignments_fear = [] oracleAlignment = None heapify(partialAlignments) tgtWordList = self.f srcWordList = self.e tgtWord = None srcWord = currentNode.data["surface"] srcTag = currentNode.data["pos"] tgtIndex = None srcIndex = currentNode.eIndex span = (srcIndex, srcIndex) ################################################## # null partial alignment ( assign no links ) ################################################## tgtIndex = -1 tgtWord = '*NULL*' scoreVector = svector.Vector() # Compute feature score for k, func in enumerate(self.featureTemplates): value_dict = func(self.info, tgtWord, srcWord, tgtIndex, srcIndex, [], self.diagValues, currentNode) for name, value in value_dict.iteritems(): if value != 0: scoreVector[name] += value nullPartialAlignment = PartialGridAlignment() nullPartialAlignment.score = score = scoreVector.dot(self.weights) nullPartialAlignment.scoreVector = scoreVector nullPartialAlignment.scoreVector_local = svector.Vector(scoreVector) self.addPartialAlignment(partialAlignments, nullPartialAlignment, self.BEAM_SIZE) if self.COMPUTE_ORACLE or self.COMPUTE_FEAR: nullPartialAlignment.fscore = self.ff_fscore(nullPartialAlignment, span) if self.COMPUTE_ORACLE: oracleAlignment = nullPartialAlignment if self.COMPUTE_HOPE: nullPartialAlignment.hope = nullPartialAlignment.fscore + nullPartialAlignment.score self.addPartialAlignment_hope(partialAlignments_hope, nullPartialAlignment, self.BEAM_SIZE) if self.COMPUTE_FEAR: nullPartialAlignment.fear = (1 - nullPartialAlignment.fscore) + nullPartialAlignment.score self.addPartialAlignment_fear(partialAlignments_fear, nullPartialAlignment, self.BEAM_SIZE) ################################################## # Single-link alignment ################################################## singleBestAlignment = [] alignmentList = [] for tgtIndex, tgtWord in enumerate(tgtWordList): currentLinks = [(tgtIndex, srcIndex)] scoreVector = svector.Vector() for k, func in enumerate(self.featureTemplates): value_dict = func(self.info, tgtWord, srcWord, tgtIndex, srcIndex, currentLinks, self.diagValues, currentNode) for name, value in value_dict.iteritems(): if value != 0: scoreVector[name] += value # Keep track of scores for all 1-link partial alignments score = scoreVector.dot(self.weights) singleBestAlignment.append((score, [tgtIndex])) singleLinkPartialAlignment = PartialGridAlignment() singleLinkPartialAlignment.score = score singleLinkPartialAlignment.scoreVector = scoreVector singleLinkPartialAlignment.scoreVector_local = svector.Vector(scoreVector) singleLinkPartialAlignment.links = currentLinks self.addPartialAlignment(partialAlignments, singleLinkPartialAlignment, self.BEAM_SIZE) if self.COMPUTE_ORACLE or self.COMPUTE_FEAR: singleLinkPartialAlignment.fscore = self.ff_fscore(singleLinkPartialAlignment, span) if self.COMPUTE_ORACLE: if singleLinkPartialAlignment.fscore > oracleAlignment.fscore: oracleAlignment = singleLinkPartialAlignment if self.COMPUTE_HOPE: singleLinkPartialAlignment.hope = singleLinkPartialAlignment.fscore + singleLinkPartialAlignment.score self.addPartialAlignment_hope(partialAlignments_hope, singleLinkPartialAlignment, self.BEAM_SIZE) if self.COMPUTE_FEAR: singleLinkPartialAlignment.fear = (1-singleLinkPartialAlignment.fscore)+singleLinkPartialAlignment.score self.addPartialAlignment_fear(partialAlignments_fear, singleLinkPartialAlignment, self.BEAM_SIZE) alignmentList = singleBestAlignment singleBestAlignment.sort(reverse=True) ################################################## # N link alignment(N>=2) ################################################## # Get ready for N-link alignments(N>=2) for i in xrange(2,self.nto1+1): # Sort the fwords by score alignmentList.sort(reverse=True) newAlignmentList = [] LIMIT_1 = max(10, self.lenF/2) LIMIT_N = max(10, self.lenF/i) for (_,na) in alignmentList[0:LIMIT_N]:# na means n link alignment for (_, sa) in singleBestAlignment[0:LIMIT_1]:#sa means single-link alignment if(na[-1]>=sa[0]):#sa actually always have only one element continue # clear contents of twoLinkPartialAlignment tgtIndex_a = na[-1] tgtIndex_b = sa[0] # Don't consider a pair (tgtIndex_a, tgtIndex_b) if distance between # these indices > 1 (Arabic/English only). # Need to debug feature that is supposed to deal with this naturally. if self.LANG == "ar_en": if (abs(tgtIndex_b - tgtIndex_a) > 1): continue currentLinks = list(map(lambda x: (x,srcIndex),na+sa)) scoreVector = svector.Vector() for k, func in enumerate(self.featureTemplates): value_dict = func(self.info, tgtWord, srcWord, tgtIndex, srcIndex, currentLinks, self.diagValues, currentNode) for name, value in value_dict.iteritems(): if value != 0: scoreVector[name] += value score = scoreVector.dot(self.weights) newAlignmentList.append((score, na+sa)) NLinkPartialAlignment = PartialGridAlignment() NLinkPartialAlignment.score = score NLinkPartialAlignment.scoreVector = scoreVector NLinkPartialAlignment.scoreVector_local = svector.Vector(scoreVector) NLinkPartialAlignment.links = currentLinks self.addPartialAlignment(partialAlignments, NLinkPartialAlignment, self.BEAM_SIZE) if self.COMPUTE_ORACLE or self.COMPUTE_FEAR: NLinkPartialAlignment.fscore = self.ff_fscore(NLinkPartialAlignment, span) if self.COMPUTE_ORACLE: if NLinkPartialAlignment.fscore > oracleAlignment.fscore: oracleAlignment = NLinkPartialAlignment if self.COMPUTE_HOPE: NLinkPartialAlignment.hope = NLinkPartialAlignment.fscore + NLinkPartialAlignment.score self.addPartialAlignment_hope(partialAlignments_hope, NLinkPartialAlignment, self.BEAM_SIZE) if self.COMPUTE_FEAR: NLinkPartialAlignment.fear = (1-NLinkPartialAlignment.fscore)+NLinkPartialAlignment.score self.addPartialAlignment_fear(partialAlignments_fear, NLinkPartialAlignment, self.BEAM_SIZE) alignmentList = newAlignmentList ######################################################################## # Finalize. Sort model-score list and then hope list. ######################################################################## # Sort model score list. sortedBestFirstPartialAlignments = [] while len(partialAlignments) > 0: sortedBestFirstPartialAlignments.insert(0,heappop(partialAlignments)) # Sort hope score list. if self.COMPUTE_HOPE: sortedBestFirstPartialAlignments_hope = [] while len(partialAlignments_hope) > 0: (_, obj) = heappop(partialAlignments_hope) sortedBestFirstPartialAlignments_hope.insert(0,obj) # Sort fear score list. if self.COMPUTE_FEAR: sortedBestFirstPartialAlignments_fear = [] while len(partialAlignments_fear) > 0: (_, obj) = heappop(partialAlignments_fear) sortedBestFirstPartialAlignments_fear.insert(0, obj) currentNode.partialAlignments = sortedBestFirstPartialAlignments if self.COMPUTE_FEAR: currentNode.partialAlignments_fear = sortedBestFirstPartialAlignments_fear if self.COMPUTE_HOPE: currentNode.partialAlignments_hope = sortedBestFirstPartialAlignments_hope if self.COMPUTE_ORACLE: currentNode.oracle = None # Oracle BEFORE beam is applied currentNode.oracle = oracleAlignment