Example #1
0
    def procureModel(self):
        if self.options.resume_from_checkpoint:
            model = ModelTemplate.resume(self.options.resume_from_checkpoint)
            self.sequitur = model.sequitur
        elif self.options.modelFile:
            if sys.version_info[:2] >= (3, 0):
                model = pickle.load(open(self.options.modelFile, 'rb'), encoding='latin1')
            else:
                try:
                    model = pickle.load(open(self.options.modelFile, 'rb'))
                except ValueError:
                    print('This error most likely occured because the loaded model was created in python3.\n', file=sys.stderr)
                    raise
                
            self.sequitur = model.sequitur
        else:
            self.sequitur = Sequitur()
            model = None

        if self.options.shouldRampUp:
            model.rampUp()

        if self.options.trainSample:
            model = self.trainModel(model)
            if not model:
                print('failed to estimate or load model', file=self.log)
                return

        if not model:
            raise UsageError

#       model.sequenceModel.showMostProbable(sys.stdout, model.sequitur.symbol, limit=250)

        if self.options.shouldTranspose:
            model.transpose()

        if self.options.newModelFile:
            oldSize, newSize = model.strip()
            print('stripped number of multigrams from %d to %d' % (oldSize, newSize), file=self.log)
            f = open(self.options.newModelFile, 'wb')
            pickle.dump(model, f, pickle.HIGHEST_PROTOCOL)
            f.close()
            del f

        if self.options.shouldSelfTest:
            print('warning: --self-test does not treat pronunciation variants correctly', file=self.log)
            if not self.develSample:
                print('error: cannot do --self-test without --devel sample', file=self.log)
            else:
                translator = Translator(model)
                evaluator = Evaluator()
                evaluator.setSample(self.develSample)
                evaluator.verboseLog = self.log
                result = evaluator.evaluate(translator)
                print(result, file=self.log)

        return model
Example #2
0
    def procureModel(self):
        if self.options.resume_from_checkpoint:
            model = ModelTemplate.resume(self.options.resume_from_checkpoint)
            self.sequitur = model.sequitur
        elif self.options.modelFile:
            model = pickle.load(open(self.options.modelFile, "rb"))
            self.sequitur = model.sequitur
        else:
            self.sequitur = Sequitur()
            model = None

        if self.options.shouldRampUp:
            model.rampUp()

        if self.options.trainSample:
            model = self.trainModel(model)
            if not model:
                print('failed to estimate or load model', file=self.log)
                return

        if not model:
            raise UsageError


#       model.sequenceModel.showMostProbable(sys.stdout, model.sequitur.symbol, limit=250)

        if self.options.shouldTranspose:
            model.transpose()

        if self.options.newModelFile:
            oldSize, newSize = model.strip()
            print('stripped number of multigrams from %d to %d' %
                  (oldSize, newSize),
                  file=self.log)
            f = open(self.options.newModelFile, 'wb')
            pickle.dump(model, f)
            f.close()
            del f

        if self.options.shouldSelfTest:
            print(
                'warning: --self-test does not treat pronunciation variants correctly',
                file=self.log)
            if not self.develSample:
                print('error: cannot do --self-test without --devel sample',
                      file=self.log)
            else:
                translator = Translator(model)
                evaluator = Evaluator()
                evaluator.setSample(self.develSample)
                evaluator.verboseLog = self.log
                result = evaluator.evaluate(translator)
                print(result, file=self.log)

        return model
Example #3
0
    def procureModel(self):
	if self.options.resume_from_checkpoint:
	    model = ModelTemplate.resume(self.options.resume_from_checkpoint)
	    self.sequitur = model.sequitur
	elif self.options.modelFile:
	    model = pickle.load(open(self.options.modelFile))
	    self.sequitur = model.sequitur
	else:
	    self.sequitur = Sequitur()
	    model = None

	if self.options.shouldRampUp:
	    model.rampUp()

	if self.options.trainSample:
	    model = self.trainModel(model)
            if not model:
                print >> self.log, 'failed to estimate or load model'
                return

	if not model:
            raise UsageError

#       model.sequenceModel.showMostProbable(sys.stdout, model.sequitur.symbol, limit=250)

	if self.options.shouldTranspose:
	    model.transpose()

	if self.options.newModelFile:
	    oldSize, newSize = model.strip()
	    print >> self.log, 'stripped number of multigrams from %d to %d' % (oldSize, newSize)
	    f = open(self.options.newModelFile, 'w')
	    pickle.dump(model, f, pickle.HIGHEST_PROTOCOL)
	    f.close()
	    del f

	if self.options.shouldSelfTest:
	    print >> self.log, 'warning: --self-test does not treat pronunciation variants correctly'
	    if not self.develSample:
		print >> self.log, 'error: cannot do --self-test without --devel sample'
	    else:
		translator = Translator(model)
		evaluator = Evaluator()
		evaluator.setSample(self.develSample)
		evaluator.verboseLog = self.log
		result = evaluator.evaluate(translator)
		print >> self.log, result

	return model
Example #4
0
    def trainModel(self, initialModel):
        self.loadSamples()
        compiledTrainSample = self.sequitur.compileSample(self.trainSample)
        compiledDevelSample = self.sequitur.compileSample(self.develSample)
        del self.trainSample

        if self.options.fixed_discount:
            discount = eval(self.options.fixed_discount)
            if not isinstance(discount, collections.Sequence):
                discount = [discount]
            discount = num.array(discount)
        else:
            discount = None

        template = ModelTemplate(self.sequitur)
        if self.options.fixed_discount:
            template.DiscountAdjustmentStrategy = FixedDiscounts(discount)
        elif self.develSample:
            if self.options.eager_discount_adjustment:
                template.DiscountAdjustmentStrategy = EagerDiscountAdjuster
            else:
                template.DiscountAdjustmentStrategy = DefaultDiscountAdjuster
        else:
            template.DiscountAdjustmentStrategy = StaticDiscounts

        if self.options.lengthConstraints:
            spec = self.options.lengthConstraints.strip()
            if spec.startswith('['):
                assert spec.endswith(']')
                st = spec[1:-1].split(',')
                st = [t.split(':') for t in st]
                st = [(int(l), int(r)) for l, r in st]
                template.setSizeTemplates(st)
            else:
                lc = tuple(map(int, spec.split(',')))
                template.setLengthConstraints(*lc)
        template.allowEmergenceOfNewMultigrams(
            not bool(self.options.shouldSuppressNewMultigrams))
        template.useMaximumApproximation(bool(self.options.viterbi))

        if self.options.minIterations > self.options.maxIterations:
            print('invalid limits on number of iterations %d > %d' % \
                  (self.options.minIterations,self.options.maxIterations), file=self.log)
            return
        template.minIterations = self.options.minIterations
        template.maxIterations = self.options.maxIterations
        if self.options.checkpoint and self.options.newModelFile:
            template.checkpointInterval = 8 * 60 * 60
            base, ext = os.path.splitext(self.options.newModelFile)
            template.checkpointFile = base + '-cp%d' + ext

        if self.options.shouldWipeModel:
            initialModel.wipeOut(template.nPossibleMultigrams())

        if self.options.shouldTestContinuously:
            if self.develSample:
                template.observers.append(
                    OnlineTester('devel', self.develSample))
            if self.options.testSample:
                template.observers.append(
                    OnlineTester('test',
                                 self.loadSample(self.options.testSample)))

        estimationContext = template.makeContext(compiledTrainSample,
                                                 compiledDevelSample,
                                                 initialModel)
        del initialModel

        estimationContext.log = self.log
        if self.options.shouldInitializeWithCounts:
            template.initializeWithOverlappingCounts(estimationContext)
        template.run(estimationContext)
        return estimationContext.bestModel
Example #5
0
    def trainModel(self, initialModel):
	self.loadSamples()
	compiledTrainSample = self.sequitur.compileSample(self.trainSample)
	compiledDevelSample = self.sequitur.compileSample(self.develSample)
	del self.trainSample

        if self.options.fixed_discount:
            discount = eval(self.options.fixed_discount)
            if not operator.isSequenceType(discount):
                discount = [discount]
            discount = num.array(discount)
        else:
            discount = None
            
	template = ModelTemplate(self.sequitur)
        if self.options.fixed_discount:
            template.DiscountAdjustmentStrategy = FixedDiscounts(discount)            
	elif self.develSample:
	    if self.options.eager_discount_adjustment:
		template.DiscountAdjustmentStrategy = EagerDiscountAdjuster
            else:
		template.DiscountAdjustmentStrategy = DefaultDiscountAdjuster
	else:
            template.DiscountAdjustmentStrategy = StaticDiscounts

	if self.options.lengthConstraints:
	    spec = self.options.lengthConstraints.strip()
	    if spec.startswith('['):
		assert spec.endswith(']')
		st = spec[1:-1].split(',')
		st = [ t.split(':') for t in st ]
		st = [ (int(l), int(r)) for l, r in st ]
		template.setSizeTemplates(st)
	    else:
		lc = tuple(map(int, spec.split(',')))
		template.setLengthConstraints(*lc)
        template.allowEmergenceOfNewMultigrams(not bool(self.options.shouldSuppressNewMultigrams))
        template.useMaximumApproximation(bool(self.options.viterbi))

	if self.options.minIterations > self.options.maxIterations:
	    print >> self.log, 'invalid limits on number of iterations %d > %d' % \
		  (self.options.minIterations,self.options.maxIterations)
	    return
	template.minIterations = self.options.minIterations
	template.maxIterations = self.options.maxIterations
	if self.options.checkpoint and self.options.newModelFile:
	    template.checkpointInterval = 8 * 60*60
	    base, ext = os.path.splitext(self.options.newModelFile)
	    template.checkpointFile = base + '-cp%d' + ext

	if self.options.shouldWipeModel:
	    initialModel.wipeOut(template.nPossibleMultigrams())

	if self.options.shouldTestContinuously:
	    if self.develSample:
		template.observers.append(
		    OnlineTester('devel', self.develSample))
	    if self.options.testSample:
		template.observers.append(
		    OnlineTester('test', self.loadSample(self.options.testSample)))

	estimationContext = template.makeContext(
	    compiledTrainSample, compiledDevelSample, initialModel)
	del initialModel

	estimationContext.log = self.log
        if self.options.shouldInitializeWithCounts:
            template.initializeWithOverlappingCounts(estimationContext)
	template.run(estimationContext)
	return estimationContext.bestModel
Example #6
0
    def procureModel(self):
        #print self.options,type(self.options)
        #print self.loadSample,type(self.loadSample)
        #print self.log,type(self.log)

        if self.options.resume_from_checkpoint:
            model = ModelTemplate.resume(self.options.resume_from_checkpoint)
            self.sequitur = model.sequitur
        elif self.options.modelFile:
            #print "loading",self.options.modelFile
            f = open(self.options.modelFile)

            #print "loaded",f
            #print "type:",type(f)
            #print pickle
            class Model(object):
                pass

            model = pickle.load(f)
            #print "loaded",self.options.modelFile
            self.sequitur = model.sequitur
        else:
            self.sequitur = Sequitur()
            model = None

        if self.options.shouldRampUp:
            model.rampUp()

        if self.options.trainSample:
            model = self.trainModel(model)
            if not model:
                print >> self.log, 'failed to estimate or load model'
                return

        if not model:
            raise UsageError


#       model.sequenceModel.showMostProbable(sys.stdout, model.sequitur.symbol, limit=250)

        if self.options.shouldTranspose:
            model.transpose()

        if self.options.newModelFile:
            oldSize, newSize = model.strip()
            print >> self.log, 'stripped number of multigrams from %d to %d' % (
                oldSize, newSize)
            f = open(self.options.newModelFile, 'w')
            pickle.dump(model, f, pickle.HIGHEST_PROTOCOL)
            f.close()
            del f

        if self.options.shouldSelfTest:
            print >> self.log, 'warning: --self-test does not treat pronunciation variants correctly'
            if not self.develSample:
                print >> self.log, 'error: cannot do --self-test without --devel sample'
            else:
                translator = Translator(model)
                evaluator = Evaluator()
                evaluator.setSample(self.develSample)
                evaluator.verboseLog = self.log
                result = evaluator.evaluate(translator)
                print >> self.log, result

        return model