Example #1
0
    def Train(self, sourceCorpusFile, targetCorpusFile, iterations):
        
        sourceLines = u.readFromFile(sourceCorpusFile)
        targetLines = u.readFromFile(targetCorpusFile)
        
        if (len(sourceLines) != len(targetLines)):
            print "Source(%s) and target(%s) corpus lengths differ." % (len(sourceLines), len(targetLines))

        print u.now(), "Initializing"
        self.__Initialize(sourceLines, targetLines)

        for s in xrange(0, iterations):
            
            start1 = u.now()
            print start1, "Computing Counts for iteration", s+1
            self.__ComputeCounts(sourceLines, targetLines)
            
            start2 = u.now()
            print start2, "Computing t values for iteration", s+1
            self.__ComputeTValues(sourceLines, targetLines)
                        
            end = u.now()
            
            print u.now(), "Iteration", s+1, "complete."
            print u.now(), "Started count computations at %s." % start1
            print u.now(), "Started t value computations at %s." % start2
            print u.now(), "Finished at %s" % end

            
        return self.__tMap
Example #2
0
    def Train(self, sourceCorpusFile, targetCorpusFile, iterations):

        sourceLines = u.readFromFile(sourceCorpusFile)
        targetLines = u.readFromFile(targetCorpusFile)

        if (len(sourceLines) != len(targetLines)):
            print "Source(%s) and target(%s) corpus lengths differ." % (
                len(sourceLines), len(targetLines))

        print u.now(), "Initializing"
        self.__Initialize(sourceLines, targetLines)

        for s in xrange(0, iterations):

            start1 = u.now()
            print start1, "Computing Counts for iteration", s + 1
            self.__ComputeCounts(sourceLines, targetLines)

            start2 = u.now()
            print start2, "Computing t values for iteration", s + 1
            self.__ComputeTValues(sourceLines, targetLines)

            end = u.now()

            print u.now(), "Iteration", s + 1, "complete."
            print u.now(), "Started count computations at %s." % start1
            print u.now(), "Started t value computations at %s." % start2
            print u.now(), "Finished at %s" % end

        return self.__tMap
Example #3
0
    def Align(self, sourceFile, targetFile):

        sourceLines = u.readFromFile(sourceFile)
        targetLines = u.readFromFile(targetFile)

        if (len(sourceLines) != len(targetLines)):
            print "Source(%s) and target(%s) corpus lengths differ." % (
                len(sourceLines), len(targetLines))

        lineCount = min(len(sourceLines), len(targetLines))

        # SentenceIndex, EnglishIndex, ForeignIndex

        alignments = []

        for k in xrange(0, lineCount):

            sourceWords = sourceLines[k].split()
            sourceWords.insert(0, self.NULLTAG)

            targetWords = targetLines[k].split()
            targetWords.insert(0, self.NULLTAG)

            mk = len(sourceWords) - 1
            lk = len(targetWords) - 1

            for i in xrange(1, mk + 1):

                bestAlignment = a3p1.Alignment(0, 0, "", 0, "", 0)

                for j in xrange(0, lk + 1):

                    fi = sourceWords[i]
                    ej = targetWords[j]

                    tValue = self._IBM1__tMap[a3p1.WordMap(fi, ej)]
                    #qValue = self.__qMap[ AlignmentByPosition(j, i, lk, mk) ]
                    qValue = self.GetQValue(j, i, lk, mk)
                    score = tValue * qValue

                    if (bestAlignment.Score < score):
                        bestAlignment = a3p1.Alignment(score, k + 1, fi, i, ej,
                                                       j)

                alignments.append(bestAlignment)

        return alignments
    def Align(self, sourceFile, targetFile):

        sourceLines = u.readFromFile(sourceFile)
        targetLines = u.readFromFile(targetFile)
        
        if (len(sourceLines) != len(targetLines)):
            print "Source(%s) and target(%s) corpus lengths differ." % (len(sourceLines), len(targetLines))
            
        lineCount = min(len(sourceLines), len(targetLines))
            
        # SentenceIndex, EnglishIndex, ForeignIndex

        alignments = []
        
        for k in xrange(0, lineCount):
        
            sourceWords = sourceLines[k].split()
            sourceWords.insert(0, self.NULLTAG)

            targetWords = targetLines[k].split()
            targetWords.insert(0,self.NULLTAG)

            mk = len(sourceWords) - 1
            lk = len(targetWords) - 1

            for i in xrange(1, mk+1):
                
                bestAlignment = a3p1.Alignment(0, 0, "", 0, "", 0)
                                
                qList = self.__qMap.GetQValues(lk, mk)
                
                for j in xrange(0, lk+1):

                    fi = sourceWords[i]
                    ej = targetWords[j]

                    tValue = self._IBM1__tMap[ a3p1.WordMap(fi, ej) ]
                    qValue = qList[(j, i)]
                    score = tValue * qValue

                    if (bestAlignment.Score < score):
                        bestAlignment = a3p1.Alignment(score, k+1, fi, i, ej, j)

                alignments.append(bestAlignment)

        return alignments
Example #5
0
    def Align(self, sourceFile, targetFile):

        sourceLines = u.readFromFile(sourceFile)
        targetLines = u.readFromFile(targetFile)

        if (len(sourceLines) != len(targetLines)):
            print "Source(%s) and target(%s) corpus lengths differ." % (
                len(sourceLines), len(targetLines))

        lineCount = min(len(sourceLines), len(targetLines))

        # SentenceIndex, EnglishIndex, ForeignIndex

        alignments = []

        for k in xrange(0, lineCount):
            sourceLine = sourceLines[k]
            targetLine = targetLines[k]

            sourceWords = sourceLine.split()
            sourceWords.insert(0, self.NULLTAG)

            targetWords = targetLine.split()
            targetWords.insert(0, self.NULLTAG)

            mk = len(sourceWords)
            lk = len(targetWords)

            for i in xrange(1, mk):

                bestAlignment = Alignment(0, 0, "", 0, "", 0)

                for j in xrange(0, lk):

                    fi = sourceWords[i]
                    ej = targetWords[j]

                    tValue = self.__tMap[WordMap(fi, ej)]

                    if (bestAlignment.Score < tValue):
                        bestAlignment = Alignment(tValue, k + 1, fi, i, ej, j)

                alignments.append(bestAlignment)

        return alignments
    def Train(self, sourceCorpusFile, targetCorpusFile, iterations, tFile):
        
        sourceLines = u.readFromFile(sourceCorpusFile)
        targetLines = u.readFromFile(targetCorpusFile)
        
        if (len(sourceLines) != len(targetLines)):
            print "Source(%s) and target(%s) corpus lengths differ." % (len(sourceLines), len(targetLines))

        if (tFile == ""):
            print u.now(), "Initializing"
            self._IBM1__Initialize(sourceLines, targetLines)
        else:
            print u.now(), "Loading Initial T Values"
            self.LoadTValues(tFile)

        print u.now(), "Initializing Q Values"
        self.__qMap.Initialize(sourceLines, targetLines)
        
        for s in xrange(0, iterations):
            
#            for x in xrange(0,9):
#                print "Iteration", s, ":", x, "2 8 8", self.__qMap.GetQValue(x, 2, 8, 8)

            start1 = u.now()
            print start1, "Computing Counts for iteration", s+1
            self._IBM1__ComputeCounts(sourceLines, targetLines)
            
            start2 = u.now()
            print start2, "Computing t values for iteration", s+1
            self._IBM1__ComputeTValues(sourceLines, targetLines)
            
            start3 = u.now()
            print start3, "Computing q values for iteration", s+1
            self.__ComputeQValues(sourceLines, targetLines)
                        
            end = u.now()
            
            print u.now(), "Iteration", s+1, "complete."
            print u.now(), "Started count computations at %s." % start1
            print u.now(), "Started t value computations at %s." % start2
            print u.now(), "Started q value computations at %s." % start3
            print u.now(), "Iteration", s+1, "finished at %s" % end

            
        return self._IBM1__tMap
Example #7
0
    def Align(self, sourceFile, targetFile):

        sourceLines = u.readFromFile(sourceFile)
        targetLines = u.readFromFile(targetFile)
        
        if (len(sourceLines) != len(targetLines)):
            print "Source(%s) and target(%s) corpus lengths differ." % (len(sourceLines), len(targetLines))
            
        lineCount = min(len(sourceLines), len(targetLines))
            
        # SentenceIndex, EnglishIndex, ForeignIndex

        alignments = []
        
        for k in xrange(0, lineCount):
            sourceLine = sourceLines[ k ]
            targetLine = targetLines[ k ]

            sourceWords = sourceLine.split()
            sourceWords.insert(0, self.NULLTAG)
            
            targetWords = targetLine.split()
            targetWords.insert(0, self.NULLTAG)

            mk = len(sourceWords)
            lk = len(targetWords)

            for i in xrange(1, mk):
                
                bestAlignment = Alignment(0, 0, "", 0, "", 0)
                                
                for j in xrange(0, lk):
                    
                    fi = sourceWords[i]
                    ej = targetWords[j]
                    
                    tValue = self.__tMap[ WordMap(fi, ej) ]
                    
                    if (bestAlignment.Score < tValue):
                        bestAlignment = Alignment(tValue, k+1, fi, i, ej, j)
            
                alignments.append(bestAlignment)
        
        return alignments
Example #8
0
def LoadAlignments(alignmentFile, sourceFile, targetFile):
    lines = u.readFromFile(alignmentFile)
    sourceLines = u.readFromFile(sourceFile)
    targetLines = u.readFromFile(targetFile)
    
    lineIndex = 0

    alignments = dict()
    
    for line in lines:
        lineIndex += 1
        
        alignmentLine = line.split()
        
        if len(alignmentLine) != 3:
            print "Invalid line %s: %s" % ( lineIndex, line )  
        
        k = int(alignmentLine[0])
        i = int(alignmentLine[2])
        j = int(alignmentLine[1])
        
        sourceLine = sourceLines[k-1].split()
        sourceLine.insert(0, SentenceAlignment.NULLTAG)

        targetLine = targetLines[k-1].split()
        targetLine.insert(0, SentenceAlignment.NULLTAG)

        if (alignments.has_key( k )):
            alignment = alignments[ k ]
        else:
            alignment = SentenceAlignment(sourceLine, targetLine)

        alignment.Align(i, j)
        
        alignments[ k ] = alignment
        
    return alignments
Example #9
0
def LoadAlignments(alignmentFile, sourceFile, targetFile):
    lines = u.readFromFile(alignmentFile)
    sourceLines = u.readFromFile(sourceFile)
    targetLines = u.readFromFile(targetFile)

    lineIndex = 0

    alignments = dict()

    for line in lines:
        lineIndex += 1

        alignmentLine = line.split()

        if len(alignmentLine) != 3:
            print "Invalid line %s: %s" % (lineIndex, line)

        k = int(alignmentLine[0])
        i = int(alignmentLine[2])
        j = int(alignmentLine[1])

        sourceLine = sourceLines[k - 1].split()
        sourceLine.insert(0, SentenceAlignment.NULLTAG)

        targetLine = targetLines[k - 1].split()
        targetLine.insert(0, SentenceAlignment.NULLTAG)

        if (alignments.has_key(k)):
            alignment = alignments[k]
        else:
            alignment = SentenceAlignment(sourceLine, targetLine)

        alignment.Align(i, j)

        alignments[k] = alignment

    return alignments
Example #10
0
    def LoadTValues(self, tFile):
        lines = u.readFromFile(tFile)
        i = 0

        for line in lines:
            i += 1

            tMapLine = line.split()

            if len(tMapLine) != 3:
                print "Invalid line %s: %s" % (i, line)

            key = WordMap(tMapLine[0], tMapLine[1])
            val = float(tMapLine[2])
            self.__tMap[key] = val

        return self.__tMap
Example #11
0
 def LoadQValues(self, qFile):
     lines = u.readFromFile(qFile)
     i = 0
     
     for line in lines:
         i += 1
         
         qMapLine = line.split()
         
         if len(qMapLine) != 5:
             print "Invalid line %s: %s" % ( i, line )  
         
         key = AlignmentByPosition(int(qMapLine[0]), int(qMapLine[1]), int(qMapLine[2]), int(qMapLine[3]))
         val = float(qMapLine[4])
         self.__qMap[key] = val 
         
     return self.__qMap
Example #12
0
 def LoadTValues(self, tFile):
     lines = u.readFromFile(tFile)
     i = 0
     
     for line in lines:
         i += 1
         
         tMapLine = line.split()
         
         if len(tMapLine) != 3:
             print "Invalid line %s: %s" % ( i, line )  
         
         key = WordMap(tMapLine[0], tMapLine[1])
         val = float(tMapLine[2])
         self.__tMap[key] = val 
         
     return self.__tMap
Example #13
0
    def LoadQValues(self, qFile):
        lines = u.readFromFile(qFile)
        i = 0

        for line in lines:
            i += 1

            qMapLine = line.split()

            if len(qMapLine) != 5:
                print "Invalid line %s: %s" % (i, line)

            key = AlignmentByPosition(int(qMapLine[0]), int(qMapLine[1]),
                                      int(qMapLine[2]), int(qMapLine[3]))
            val = float(qMapLine[4])
            self.__qMap[key] = val

        return self.__qMap
 def Load(self, qFile):
     lines = u.readFromFile(qFile)
     i = 0
     
     for line in lines:
         i += 1
         
         qMapLine = line.split()
         
         if len(qMapLine) != 5:
             print "Invalid line %s: %s" % ( i, line )  
         
         key = AlignmentByPosition(int(qMapLine[0]), int(qMapLine[1]), int(qMapLine[2]), int(qMapLine[3]))
         val = float(qMapLine[4])
         
         self.SetQValue(key.j, key.iCondition, key.lCondition, key.mCondition, val)
         
     return self.__qMap
Example #15
0
 def LoadQValues2(self, qFile):
     lines = u.readFromFile(qFile)
     i = 0
     
     qMap = dict()
     
     for line in lines:
         i += 1
         
         qMapLine = line.split()
         
         if len(qMapLine) != 5:
             print "Invalid line %s: %s" % ( i, line )  
         
         key = AlignmentByPosition(qMapLine[0], qMapLine[1], qMapLine[2], qMapLine[3])
         val = float(qMapLine[4])
         qMap[key] = val 
         
     return qMap
Example #16
0
    def LoadQValues2(self, qFile):
        lines = u.readFromFile(qFile)
        i = 0

        qMap = dict()

        for line in lines:
            i += 1

            qMapLine = line.split()

            if len(qMapLine) != 5:
                print "Invalid line %s: %s" % (i, line)

            key = AlignmentByPosition(qMapLine[0], qMapLine[1], qMapLine[2],
                                      qMapLine[3])
            val = float(qMapLine[4])
            qMap[key] = val

        return qMap