Example #1
0
    def Train(self, sourceCorpusFile, targetCorpusFile, iterations):

        sourceLines = u.readFromFile(sourceCorpusFile)
        targetLines = u.readFromFile(targetCorpusFile)

        if (len(sourceLines) != len(targetLines)):
            print "Source(%s) and target(%s) corpus lengths differ." % (
                len(sourceLines), len(targetLines))

        print u.now(), "Initializing"
        self.__Initialize(sourceLines, targetLines)

        for s in xrange(0, iterations):

            start1 = u.now()
            print start1, "Computing Counts for iteration", s + 1
            self.__ComputeCounts(sourceLines, targetLines)

            start2 = u.now()
            print start2, "Computing t values for iteration", s + 1
            self.__ComputeTValues(sourceLines, targetLines)

            end = u.now()

            print u.now(), "Iteration", s + 1, "complete."
            print u.now(), "Started count computations at %s." % start1
            print u.now(), "Started t value computations at %s." % start2
            print u.now(), "Finished at %s" % end

        return self.__tMap
Example #2
0
    def Train(self, sourceCorpusFile, targetCorpusFile, iterations, tFile):
        
        sourceLines = u.readFromFile(sourceCorpusFile)
        targetLines = u.readFromFile(targetCorpusFile)
        
        if (len(sourceLines) != len(targetLines)):
            print "Source(%s) and target(%s) corpus lengths differ." % (len(sourceLines), len(targetLines))

        if (tFile == ""):
            print u.now(), "Initializing"
            self._IBM1__Initialize(sourceLines, targetLines)
        else:
            print u.now(), "Loading Initial T Values"
            self.LoadTValues(tFile)

        for s in xrange(0, iterations):
            
            for x in xrange(0,9):
                print "Iteration", s, ":", x, "2 8 8", self.GetQValue(x, 2, 8, 8)

            start1 = u.now()
            print start1, "Computing Counts for iteration", s+1
            self._IBM1__ComputeCounts(sourceLines, targetLines)
            
            start2 = u.now()
            print start2, "Computing t values for iteration", s+1
            self._IBM1__ComputeTValues(sourceLines, targetLines)
            
            start3 = u.now()
            print start3, "Computing q values for iteration", s+1
            self.__ComputeQValues(sourceLines, targetLines)
                        
            end = u.now()
            
            print u.now(), "Iteration", s+1, "complete."
            print u.now(), "Started count computations at %s." % start1
            print u.now(), "Started t value computations at %s." % start2
            print u.now(), "Started q value computations at %s." % start3
            print u.now(), "Iteration", s+1, "finished at %s" % end

            
        return self._IBM1__tMap
Example #3
0
    tFile = "tValues.txt"

    #    sourceAlignmentFile = "test.es"
    #    targetAlignmentFile = "test.en"
    #    aFile = "alignment_test.p1.out"

    model = IBM1()

    tMap = model.Train(sourceCorpusFile, targetCorpusFile, 5)

    model.SaveTValues(tFile)
    #
    #    print u.now(), "Loading t values"
    #    tMap = model.LoadTValues(tFile)

    print u.now(), "Aligning words"
    alignments = model.Align(sourceAlignmentFile, targetAlignmentFile)

    print u.now(), "Saving alignments"
    SaveAlignments(aFile, alignments)

    print u.now(), "Found %s alignments" % len(alignments)
    print u.now(), "Done"

    #    for key in tMap.keys():
    #        if key.eCondition == "cyprus":
    #            print key.f, tMap[key]

    print u.now(), "Found %s possible alignment pairings" % len(tMap)

    #for mapCount in tMap.Items():
Example #4
0
    def Train(self, sourceCorpusFile, targetCorpusFile, iterations):
        
        sourceLines = u.readFromFile(sourceCorpusFile)
        targetLines = u.readFromFile(targetCorpusFile)
        
        if (len(sourceLines) != len(targetLines)):
            print "Source(%s) and target(%s) corpus lengths differ." % (len(sourceLines), len(targetLines))

        print u.now(), "Initializing"
        self.__Initialize(sourceLines, targetLines)

        for s in xrange(0, iterations):
            
            start1 = u.now()
            print start1, "Computing Counts for iteration", s+1
            self.__ComputeCounts(sourceLines, targetLines)
            
            start2 = u.now()
            print start2, "Computing t values for iteration", s+1
            self.__ComputeTValues(sourceLines, targetLines)
                        
            end = u.now()
            
            print u.now(), "Iteration", s+1, "complete."
            print u.now(), "Started count computations at %s." % start1
            print u.now(), "Started t value computations at %s." % start2
            print u.now(), "Finished at %s" % end

            
        return self.__tMap
Example #5
0
    sourceAlignmentFile = "test.es"
    targetAlignmentFile = "test.en"
    aFile = "alignment_test.p2.out" 
    
    model = IBM2()

#    print u.now(), "Training start"
#    tMap = model.Train(sourceCorpusFile, targetCorpusFile, 5, tFile)
#
#    print u.now(), "Saving t values"
#    model.SaveTValues(newTFile)
#
#    print u.now(), "Saving q values"
#    model.SaveQValues(qFile)
 
    print u.now(), "Loading t values"
    tMap = model.LoadTValues(newTFile)

    print u.now(), "Loading q values"
    #qMap0 = model.QMap()
    #qMap1 = model.LoadQValues2(qFile)
    qMap = model.LoadQValues(qFile)


    print u.now(), "Aligning words"
    alignments = model.Align(sourceAlignmentFile, targetAlignmentFile)
    
    
    print u.now(), "Saving alignments to", aFile
    a3p1.SaveAlignments(aFile, alignments)
    
Example #6
0
    tFile = "tValues.txt"

#    sourceAlignmentFile = "test.es"
#    targetAlignmentFile = "test.en"
#    aFile = "alignment_test.p1.out" 
    
    model = IBM1()

    tMap = model.Train(sourceCorpusFile, targetCorpusFile,5)

    model.SaveTValues(tFile)
#
#    print u.now(), "Loading t values"
#    tMap = model.LoadTValues(tFile)
    
    print u.now(), "Aligning words"
    alignments = model.Align(sourceAlignmentFile, targetAlignmentFile)
    
    print u.now(), "Saving alignments"
    SaveAlignments(aFile, alignments)
    
    print u.now(), "Found %s alignments" % len(alignments)
    print u.now(), "Done"

    
#    for key in tMap.keys():
#        if key.eCondition == "cyprus":
#            print key.f, tMap[key]
    
    print u.now(), "Found %s possible alignment pairings" % len(tMap)
Example #7
0
    def Train(self, sourceCorpusFile, targetCorpusFile, iterations, tFile):

        sourceLines = u.readFromFile(sourceCorpusFile)
        targetLines = u.readFromFile(targetCorpusFile)

        if (len(sourceLines) != len(targetLines)):
            print "Source(%s) and target(%s) corpus lengths differ." % (
                len(sourceLines), len(targetLines))

        if (tFile == ""):
            print u.now(), "Initializing"
            self._IBM1__Initialize(sourceLines, targetLines)
        else:
            print u.now(), "Loading Initial T Values"
            self.LoadTValues(tFile)

        for s in xrange(0, iterations):

            for x in xrange(0, 9):
                print "Iteration", s, ":", x, "2 8 8", self.GetQValue(
                    x, 2, 8, 8)

            start1 = u.now()
            print start1, "Computing Counts for iteration", s + 1
            self._IBM1__ComputeCounts(sourceLines, targetLines)

            start2 = u.now()
            print start2, "Computing t values for iteration", s + 1
            self._IBM1__ComputeTValues(sourceLines, targetLines)

            start3 = u.now()
            print start3, "Computing q values for iteration", s + 1
            self.__ComputeQValues(sourceLines, targetLines)

            end = u.now()

            print u.now(), "Iteration", s + 1, "complete."
            print u.now(), "Started count computations at %s." % start1
            print u.now(), "Started t value computations at %s." % start2
            print u.now(), "Started q value computations at %s." % start3
            print u.now(), "Iteration", s + 1, "finished at %s" % end

        return self._IBM1__tMap
Example #8
0
    sourceAlignmentFile = "test.es"
    targetAlignmentFile = "test.en"
    aFile = "alignment_test.p2.out"

    model = IBM2()

    #    print u.now(), "Training start"
    #    tMap = model.Train(sourceCorpusFile, targetCorpusFile, 5, tFile)
    #
    #    print u.now(), "Saving t values"
    #    model.SaveTValues(newTFile)
    #
    #    print u.now(), "Saving q values"
    #    model.SaveQValues(qFile)

    print u.now(), "Loading t values"
    tMap = model.LoadTValues(newTFile)

    print u.now(), "Loading q values"
    #qMap0 = model.QMap()
    #qMap1 = model.LoadQValues2(qFile)
    qMap = model.LoadQValues(qFile)

    print u.now(), "Aligning words"
    alignments = model.Align(sourceAlignmentFile, targetAlignmentFile)

    print u.now(), "Saving alignments to", aFile
    a3p1.SaveAlignments(aFile, alignments)

    #    print u.now(), "Found %s alignments" % len(alignments)
    print u.now(), "Done"
Example #9
0
#    esTargetModel = a3p2_opt.IBM2A()
#    esTargetModel.LoadTValues(enSourceM2TFile)
#    esTargetModel.LoadQValues(enSourceM2QFile)
#    
#    print u.now(), "Getting first alignments"
#    alignments_enTarget = enTargetModel.Align(esSentencesFile, enSentencesFile)
#    alignments_esTarget = esTargetModel.Align(enSentencesFile, esSentencesFile)
#    
#    a3p1.SaveAlignments(enTargetAlignmentsFile, alignments_enTarget)
#    a3p1.SaveAlignments(esTargetAlignmentsFile, alignments_esTarget)
    
###########################################################
    

    
    print u.now(), "Loading..."

#    alignments_enTarget = LoadAlignments(esSourceAlignmentsFile, esSentencesFile, enSentencesFile)
#    alignments_esTarget = LoadAlignments(enSourceAlignmentsFile, enSentencesFile, esSentencesFile)

    alignments_enTarget = LoadAlignments(enTargetAlignmentsFile, esSentencesFile, enSentencesFile)
    alignments_esTarget = LoadAlignments(esTargetAlignmentsFile, enSentencesFile, esSentencesFile)
    

    if len(alignments_enTarget) != len(alignments_esTarget):
        print "Sentence counts do not match: %s for p(f|e) model, %s for p(e|f) model" % \
            ( len(alignments_enTarget), len(alignments_esTarget) )
            
    sentenceCount = len(alignments_esTarget)
    
    fullAlignments = []
Example #10
0
    #
    #    print u.now(), "Load Model p(e|f)"
    #    esTargetModel = a3p2_opt.IBM2A()
    #    esTargetModel.LoadTValues(enSourceM2TFile)
    #    esTargetModel.LoadQValues(enSourceM2QFile)
    #
    #    print u.now(), "Getting first alignments"
    #    alignments_enTarget = enTargetModel.Align(esSentencesFile, enSentencesFile)
    #    alignments_esTarget = esTargetModel.Align(enSentencesFile, esSentencesFile)
    #
    #    a3p1.SaveAlignments(enTargetAlignmentsFile, alignments_enTarget)
    #    a3p1.SaveAlignments(esTargetAlignmentsFile, alignments_esTarget)

    ###########################################################

    print u.now(), "Loading..."

    #    alignments_enTarget = LoadAlignments(esSourceAlignmentsFile, esSentencesFile, enSentencesFile)
    #    alignments_esTarget = LoadAlignments(enSourceAlignmentsFile, enSentencesFile, esSentencesFile)

    alignments_enTarget = LoadAlignments(enTargetAlignmentsFile,
                                         esSentencesFile, enSentencesFile)
    alignments_esTarget = LoadAlignments(esTargetAlignmentsFile,
                                         enSentencesFile, esSentencesFile)

    if len(alignments_enTarget) != len(alignments_esTarget):
        print "Sentence counts do not match: %s for p(f|e) model, %s for p(e|f) model" % \
            ( len(alignments_enTarget), len(alignments_esTarget) )

    sentenceCount = len(alignments_esTarget)