Esempio n. 1
0
 def initData(self, X, T, split=True):
     VX = None
     VT = None
     X, T = vt.shuffleData(X, T, self.random)
     if split:
         X, T, VX, VT = \
             vt.splitData(X, T,
                         self.trainOpt['heldOutRatio'],
                         self.trainOpt['xvalidNo'])
     return X, T, VX, VT
Esempio n. 2
0
 def initData(self, X, T, split=True):
     VX = None
     VT = None
     X, T = vt.shuffleData(X, T, self.random)
     if split:
         X, T, VX, VT = \
             vt.splitData(X, T,
                         self.trainOpt['heldOutRatio'],
                         self.trainOpt['xvalidNo'])
     return X, T, VX, VT
Esempio n. 3
0
    def train(self, trainInput, trainTarget, validInput=None, validTarget=None):
        self.initFolder()
        trainOpt = self.trainOpt
        if validInput is None and validTarget is None:
            X, T, VX, VT = self.initData(\
                trainInput, trainTarget, \
                split=self.trainOpt['needValid'])
        else:
            X = trainInput
            T = trainTarget
            VX = validInput
            VT = validTarget
        N = X.shape[0]
        numEpoch = trainOpt['numEpoch']
        calcError = trainOpt['calcError']
        numExPerBat = trainOpt['batchSize']
        progressWriter = ProgressWriter(N, width=80)
        logger = Logger(self, csv=trainOpt['writeRecord'])
        logger.logMsg('Trainer ' + self.name)
        plotter = Plotter(self)
        bestVscore = None
        bestTscore = None
        bestEpoch = 0
        nAfterBest = 0
        stop = False

        # Train loop through epochs
        for epoch in range(0, numEpoch):
            E = 0
            correct = 0
            total = 0

            if trainOpt['shuffle']:
                X, T = vt.shuffleData(X, T, self.random)

            batchStart = 0
            while batchStart < N:
                # Batch info
                batchEnd = min(N, batchStart + numExPerBat)
                numExThisBat = batchEnd - batchStart

                # Write progress bar
                if trainOpt['progress']:
                    progressWriter.increment(amount=numExThisBat)

                # Forward
                Y_bat = self.model.forward(X[batchStart:batchEnd], dropout=True)
                T_bat = T[batchStart:batchEnd]

                # Loss
                Etmp, dEdY = self.model.getCost(Y_bat, T_bat)
                E += Etmp * numExThisBat / float(N)

                # Backward
                self.model.backward(dEdY)

                # Update
                self.model.updateWeights()

                # Prediction error
                if calcError:
                    rate_, correct_, total_ = \
                        tester.calcRate(self.model, Y_bat, T_bat)
                    correct += correct_
                    total += total_

                batchStart += numExPerBat

            # Store train statistics
            if calcError:
                rate = correct / float(total)
                self.rate[epoch] = rate
            self.loss[epoch] = E

            if not trainOpt.has_key('criterion'):
                Tscore = E
            else:
                if trainOpt['criterion'] == 'loss':
                    Tscore = E
                elif trainOpt['criterion'] == 'rate':
                    Tscore = 1 - rate
                else:
                    raise Exception('Unknown stopping criterion "%s"' % \
                        trainOpt['criterion'])

            # Run validation
            if trainOpt['needValid']:
                VY = tester.test(self.model, VX)
                VE, dVE = self.model.getCost(VY, VT)
                self.validLoss[epoch] = VE
                if calcError:
                    Vrate, correct, total = tester.calcRate(self.model, VY, VT)
                    self.validRate[epoch] = Vrate

                # Check stopping criterion
                if not trainOpt.has_key('criterion'):
                    Vscore = VE
                else:
                    if trainOpt['criterion'] == 'loss':
                        Vscore = VE
                    elif trainOpt['criterion'] == 'rate':
                        Vscore = 1 - Vrate
                    else:
                        raise Exception('Unknown stopping criterion "%s"' % \
                            trainOpt['criterion'])
                if (bestVscore is None) or (Vscore < bestVscore):
                    bestVscore = Vscore
                    bestTscore = Tscore
                    nAfterBest = 0
                    bestEpoch = epoch
                    # Save trainer if VE is best
                    if trainOpt['saveModel']:
                        self.save()
                else:
                    nAfterBest += 1
                    # Stop training if above patience level
                    if nAfterBest > trainOpt['patience']:
                        print 'Patience level reached, early stop.'
                        print 'Will stop at score ', bestTscore
                        stop = True
            else:
                if trainOpt['saveModel']:
                    self.save()
                if trainOpt.has_key('stopScore') and \
                    Tscore < trainOpt['stopScore']:
                    print 'Training score is lower than %.4f , ealy stop.' % \
                        trainOpt['stopScore'] 
                    stop = True                    

            # Anneal learning rate
            self.model.updateLearningParams(epoch)

            # Print statistics
            logger.logTrainStats()
            if trainOpt['needValid']:
                print 'BT: %.4f' % bestTscore
            
            # Plot train curves
            if trainOpt['plotFigs']:
                plotter.plot()

            # Terminate
            if stop:       
                break

        # Record final epoch number
        self.stoppedTrainScore = bestTscore
        self.stoppedEpoch = bestEpoch if trainOpt['needValid'] else epoch
Esempio n. 4
0
    def train(self,
              trainInput,
              trainTarget,
              validInput=None,
              validTarget=None):
        self.initFolder()
        trainOpt = self.trainOpt
        if validInput is None and validTarget is None:
            X, T, VX, VT = self.initData(\
                trainInput, trainTarget, \
                split=self.trainOpt['needValid'])
        else:
            X = trainInput
            T = trainTarget
            VX = validInput
            VT = validTarget
        N = X.shape[0]
        numEpoch = trainOpt['numEpoch']
        calcError = trainOpt['calcError']
        numExPerBat = trainOpt['batchSize']
        progressWriter = ProgressWriter(N, width=80)
        logger = Logger(self, csv=trainOpt['writeRecord'])
        logger.logMsg('Trainer ' + self.name)
        plotter = Plotter(self)
        bestVscore = None
        bestTscore = None
        bestEpoch = 0
        nAfterBest = 0
        stop = False

        # Train loop through epochs
        for epoch in range(0, numEpoch):
            E = 0
            correct = 0
            total = 0

            if trainOpt['shuffle']:
                X, T = vt.shuffleData(X, T, self.random)

            batchStart = 0
            while batchStart < N:
                # Batch info
                batchEnd = min(N, batchStart + numExPerBat)
                numExThisBat = batchEnd - batchStart

                # Write progress bar
                if trainOpt['progress']:
                    progressWriter.increment(amount=numExThisBat)

                # Forward
                Y_bat = self.model.forward(X[batchStart:batchEnd],
                                           dropout=True)
                T_bat = T[batchStart:batchEnd]

                # Loss
                Etmp, dEdY = self.model.getCost(Y_bat, T_bat)
                E += Etmp * numExThisBat / float(N)

                # Backward
                self.model.backward(dEdY)

                # Update
                self.model.updateWeights()

                # Prediction error
                if calcError:
                    rate_, correct_, total_ = \
                        tester.calcRate(self.model, Y_bat, T_bat)
                    correct += correct_
                    total += total_

                batchStart += numExPerBat

            # Store train statistics
            if calcError:
                rate = correct / float(total)
                self.rate[epoch] = rate
            self.loss[epoch] = E

            if not trainOpt.has_key('criterion'):
                Tscore = E
            else:
                if trainOpt['criterion'] == 'loss':
                    Tscore = E
                elif trainOpt['criterion'] == 'rate':
                    Tscore = 1 - rate
                else:
                    raise Exception('Unknown stopping criterion "%s"' % \
                        trainOpt['criterion'])

            # Run validation
            if trainOpt['needValid']:
                VY = tester.test(self.model, VX)
                VE, dVE = self.model.getCost(VY, VT)
                self.validLoss[epoch] = VE
                if calcError:
                    Vrate, correct, total = tester.calcRate(self.model, VY, VT)
                    self.validRate[epoch] = Vrate

                # Check stopping criterion
                if not trainOpt.has_key('criterion'):
                    Vscore = VE
                else:
                    if trainOpt['criterion'] == 'loss':
                        Vscore = VE
                    elif trainOpt['criterion'] == 'rate':
                        Vscore = 1 - Vrate
                    else:
                        raise Exception('Unknown stopping criterion "%s"' % \
                            trainOpt['criterion'])
                if (bestVscore is None) or (Vscore < bestVscore):
                    bestVscore = Vscore
                    bestTscore = Tscore
                    nAfterBest = 0
                    bestEpoch = epoch
                    # Save trainer if VE is best
                    if trainOpt['saveModel']:
                        self.save()
                else:
                    nAfterBest += 1
                    # Stop training if above patience level
                    if nAfterBest > trainOpt['patience']:
                        print 'Patience level reached, early stop.'
                        print 'Will stop at score ', bestTscore
                        stop = True
            else:
                if trainOpt['saveModel']:
                    self.save()
                if trainOpt.has_key('stopScore') and \
                    Tscore < trainOpt['stopScore']:
                    print 'Training score is lower than %.4f , ealy stop.' % \
                        trainOpt['stopScore']
                    stop = True

            # Anneal learning rate
            self.model.updateLearningParams(epoch)

            # Print statistics
            logger.logTrainStats()
            if trainOpt['needValid']:
                print 'BT: %.4f' % bestTscore

            # Plot train curves
            if trainOpt['plotFigs']:
                plotter.plot()

            # Terminate
            if stop:
                break

        # Record final epoch number
        self.stoppedTrainScore = bestTscore
        self.stoppedEpoch = bestEpoch if trainOpt['needValid'] else epoch
Esempio n. 5
0
    def train(self,
              trainInput,
              trainTarget,
              trainInputWeights=None,
              validInput=None,
              validTarget=None,
              validInputWeights=None):
        self.initFolder()
        trainOpt = self.trainOpt
        if validInput is None and validTarget is None:
            X, T, VX, VT = self.initData(\
                trainInput, trainTarget, \
                split=self.trainOpt['needValid'])
        else:
            X = trainInput
            T = trainTarget
            VX = validInput
            VT = validTarget
        N = X.shape[0]
        print 'Epoch size:', N
        numEpoch = trainOpt['numEpoch']
        calcError = trainOpt['calcError']
        numExPerBat = trainOpt['batchSize']
        print 'Batch size:', numExPerBat
        numBatPerStep = trainOpt['stepSize'] \
            if trainOpt.has_key('stepSize') \
            else int(np.ceil(N / float(numExPerBat)))
        print 'Step size:', numBatPerStep
        numExPerStep = numExPerBat * numBatPerStep \
            if trainOpt.has_key('stepSize') \
            else N
        print 'Examples per step:', numExPerStep
        numStepPerEpoch = int(np.ceil(
            N / float(numExPerStep))) \
            if trainOpt.has_key('stepSize') \
            else 1
        print 'Steps per epoch:', numStepPerEpoch
        progressWriter = ProgressWriter(numExPerStep, width=80)
        logger = Logger(self, csv=trainOpt['writeRecord'])
        logger.logMsg('Trainer ' + self.name)
        plotter = Plotter(self)
        bestVscore = None
        bestTscore = None
        bestStep = 0
        totalBat = 0
        step = 0
        totalStep = 0
        nAfterBest = 0
        stop = False
        self.loss = np.zeros((numStepPerEpoch * numEpoch))
        self.validLoss = np.zeros((numStepPerEpoch * numEpoch))
        self.rate = np.zeros((numStepPerEpoch * numEpoch))
        self.validRate = np.zeros((numStepPerEpoch * numEpoch))

        # Train loop through epochs
        for epoch in range(0, numEpoch):
            self.epoch = epoch
            epochE = 0
            epochCorrect = 0
            epochTotal = 0

            # Shuffle data
            if trainOpt['shuffle']:
                X, T = vt.shuffleData(X, T, self.random)

            # Every step, validate
            for step in range(0, numStepPerEpoch):
                stepStart = step * numExPerStep
                stepEnd = min((step + 1) * numExPerStep, N)
                numExThisStep = stepEnd - stepStart
                E = 0
                correct = 0
                total = 0
                self.totalStep = totalStep

                # Every batch forward-backward
                for batch in range(0, numBatPerStep):
                    batchStart = stepStart + batch * numExPerBat
                    if batchStart > N:
                        break
                    batchEnd = min(stepStart + (batch + 1) * numExPerBat,
                                   stepEnd)
                    numExThisBat = batchEnd - batchStart
                    self.totalBatch = totalBat

                    if trainOpt['progress']:
                        progressWriter.increment(amount=numExThisBat)

                    # Forward
                    Y_bat = self.model.forward(X[batchStart:batchEnd],
                                               dropout=True)
                    T_bat = T[batchStart:batchEnd]

                    # Loss
                    Etmp, dEdY = self.model.getCost(Y_bat,
                                                    T_bat,
                                                    weights=trainInputWeights)
                    E += Etmp * numExThisBat / float(numExThisStep)
                    epochE += Etmp * numExThisBat / float(N)

                    # Backward
                    self.model.backward(dEdY)

                    # Update
                    self.model.updateWeights()

                    # Prediction error
                    if calcError:
                        rate_, correct_, total_ = \
                            tester.calcRate(self.model, Y_bat, T_bat)
                        correct += correct_
                        total += total_
                        epochCorrect += correct_
                        epochTotal += total_

                    totalBat += 1

                # Store train statistics
                if calcError:
                    rate = correct / float(total)
                    self.rate[totalStep] = rate
                self.loss[totalStep] = E

                # Early stop
                if not trainOpt.has_key('criterion'):
                    Tscore = E
                else:
                    if trainOpt['criterion'] == 'loss':
                        Tscore = E
                    elif trainOpt['criterion'] == 'rate':
                        Tscore = 1 - rate
                    else:
                        raise Exception('Unknown stopping criterion "%s"' % \
                            trainOpt['criterion'])

                # Run validation
                if trainOpt['needValid']:
                    VY = tester.test(self.model, VX)
                    VE, dVE = self.model.getCost(VY,
                                                 VT,
                                                 weights=validInputWeights)
                    self.validLoss[totalStep] = VE
                    if calcError:
                        Vrate, correct, total = tester.calcRate(
                            self.model, VY, VT)
                        self.validRate[totalStep] = Vrate

                    # Check stopping criterion
                    if not trainOpt.has_key('criterion'):
                        Vscore = VE
                    else:
                        if trainOpt['criterion'] == 'loss':
                            Vscore = VE
                        elif trainOpt['criterion'] == 'rate':
                            Vscore = 1 - Vrate
                        else:
                            raise Exception(
                                'Unknown stopping criterion "%s"' % \
                                trainOpt['criterion'])
                    if (bestVscore is None) or (Vscore < bestVscore):
                        bestVscore = Vscore
                        bestTscore = Tscore
                        nAfterBest = 0
                        bestStep = totalStep

                        # Save trainer if VE is best
                        if trainOpt['saveModel']:
                            self.save()
                    else:
                        nAfterBest += 1
                        # Stop training if above patience level
                        if nAfterBest > trainOpt['patience']:
                            print 'Patience level reached, early stop.'
                            print 'Will stop at score ', bestTscore
                            stop = True
                else:
                    if trainOpt['saveModel']:
                        self.save()
                    if trainOpt.has_key('stopScore') and \
                        Tscore < trainOpt['stopScore']:
                        print \
                            'Training score is lower than %.4f , ealy stop.' % \
                            trainOpt['stopScore']
                        stop = True

                logger.logTrainStats()
                if trainOpt['needValid']:
                    print 'P: %d' % nAfterBest,
                print self.name

                if stop:
                    break

            # Store train statistics
            if calcError:
                epochRate = epochCorrect / float(epochTotal)
            print 'Epoch Final: %d TE: %.4f TR:%.4f' % \
                (epoch, epochE, epochRate)

            # Anneal learning rate
            self.model.updateLearningParams(epoch)

            # Plot train curves
            if trainOpt['plotFigs']:
                plotter.plot()

            # Terminate
            if stop:
                break

        # Report best train score
        self.stoppedTrainScore = bestTscore
Esempio n. 6
0
    def train(
                self, 
                trainInput, 
                trainTarget, 
                trainInputWeights=None,
                validInput=None, 
                validTarget=None,
                validInputWeights=None):
        self.initFolder()
        trainOpt = self.trainOpt
        if validInput is None and validTarget is None:
            X, T, VX, VT = self.initData(\
                trainInput, trainTarget, \
                split=self.trainOpt['needValid'])
        else:
            X = trainInput
            T = trainTarget
            VX = validInput
            VT = validTarget
        N = X.shape[0]
        print 'Epoch size:', N
        numEpoch = trainOpt['numEpoch']
        calcError = trainOpt['calcError']
        numExPerBat = trainOpt['batchSize']
        print 'Batch size:', numExPerBat
        numBatPerStep = trainOpt['stepSize'] \
            if trainOpt.has_key('stepSize') \
            else int(np.ceil(N / float(numExPerBat)))
        print 'Step size:', numBatPerStep
        numExPerStep = numExPerBat * numBatPerStep \
            if trainOpt.has_key('stepSize') \
            else N
        print 'Examples per step:', numExPerStep
        numStepPerEpoch = int(np.ceil(
            N / float(numExPerStep))) \
            if trainOpt.has_key('stepSize') \
            else 1
        print 'Steps per epoch:', numStepPerEpoch
        progressWriter = ProgressWriter(numExPerStep, width=80)
        logger = Logger(self, csv=trainOpt['writeRecord'])
        logger.logMsg('Trainer ' + self.name)
        plotter = Plotter(self)
        bestVscore = None
        bestTscore = None
        bestStep = 0
        totalBat = 0
        step = 0
        totalStep = 0
        nAfterBest = 0
        stop = False
        self.loss = np.zeros((numStepPerEpoch * numEpoch))
        self.validLoss = np.zeros((numStepPerEpoch * numEpoch))
        self.rate = np.zeros((numStepPerEpoch * numEpoch))
        self.validRate = np.zeros((numStepPerEpoch * numEpoch))
        
        # Train loop through epochs
        for epoch in range(0, numEpoch):
            self.epoch = epoch
            epochE = 0
            epochCorrect = 0
            epochTotal = 0
            
            # Shuffle data
            if trainOpt['shuffle']:
                X, T = vt.shuffleData(X, T, self.random)
            
            # Every step, validate
            for step in range(0, numStepPerEpoch):
                stepStart = step * numExPerStep
                stepEnd = min((step + 1) * numExPerStep, N)
                numExThisStep = stepEnd - stepStart
                E = 0
                correct = 0
                total = 0
                self.totalStep = totalStep
                
                # Every batch forward-backward
                for batch in range(0, numBatPerStep):
                    batchStart = stepStart + batch * numExPerBat
                    if batchStart > N:
                        break
                    batchEnd = min(
                        stepStart + (batch + 1) * numExPerBat, stepEnd)
                    numExThisBat = batchEnd - batchStart
                    self.totalBatch = totalBat
                    
                    if trainOpt['progress']:
                        progressWriter.increment(amount=numExThisBat)
                    
                    # Forward
                    Y_bat = self.model.forward(
                        X[batchStart:batchEnd], dropout=True)
                    T_bat = T[batchStart:batchEnd]
                    
                    # Loss
                    Etmp, dEdY = self.model.getCost(
                        Y_bat, T_bat, weights=trainInputWeights)
                    E += Etmp * numExThisBat / float(numExThisStep)
                    epochE += Etmp * numExThisBat / float(N)
                    
                    # Backward
                    self.model.backward(dEdY)
                    
                    # Update
                    self.model.updateWeights()
                    
                    # Prediction error
                    if calcError:
                        rate_, correct_, total_ = \
                            tester.calcRate(self.model, Y_bat, T_bat)
                        correct += correct_
                        total += total_
                        epochCorrect += correct_
                        epochTotal += total_
                    
                    totalBat += 1
                
                # Store train statistics
                if calcError:
                    rate = correct / float(total)
                    self.rate[totalStep] = rate
                self.loss[totalStep] = E
                
                # Early stop
                if not trainOpt.has_key('criterion'):
                    Tscore = E
                else:
                    if trainOpt['criterion'] == 'loss':
                        Tscore = E
                    elif trainOpt['criterion'] == 'rate':
                        Tscore = 1 - rate
                    else:
                        raise Exception('Unknown stopping criterion "%s"' % \
                            trainOpt['criterion'])
                
                # Run validation
                if trainOpt['needValid']:
                    VY = tester.test(self.model, VX)
                    VE, dVE = self.model.getCost(
                        VY, VT, weights=validInputWeights)
                    self.validLoss[totalStep] = VE
                    if calcError:
                        Vrate, correct, total = tester.calcRate(
                            self.model, VY, VT)
                        self.validRate[totalStep] = Vrate
                    
                    # Check stopping criterion
                    if not trainOpt.has_key('criterion'):
                        Vscore = VE
                    else:
                        if trainOpt['criterion'] == 'loss':
                            Vscore = VE
                        elif trainOpt['criterion'] == 'rate':
                            Vscore = 1 - Vrate
                        else:
                            raise Exception(
                                'Unknown stopping criterion "%s"' % \
                                trainOpt['criterion'])
                    if (bestVscore is None) or (Vscore < bestVscore):
                        bestVscore = Vscore
                        bestTscore = Tscore
                        nAfterBest = 0
                        bestStep = totalStep

                        # Save trainer if VE is best
                        if trainOpt['saveModel']:
                            self.save()
                    else:
                        nAfterBest += 1
                        # Stop training if above patience level
                        if nAfterBest > trainOpt['patience']:
                            print 'Patience level reached, early stop.'
                            print 'Will stop at score ', bestTscore
                            stop = True
                else:
                    if trainOpt['saveModel']:
                        self.save()
                    if trainOpt.has_key('stopScore') and \
                        Tscore < trainOpt['stopScore']:
                        print \
                            'Training score is lower than %.4f , ealy stop.' % \
                            trainOpt['stopScore'] 
                        stop = True
                
                logger.logTrainStats()
                if trainOpt['needValid']:
                    print 'P: %d' % nAfterBest,
                print self.name
                
                if stop:
                    break
            
            # Store train statistics
            if calcError:
                epochRate = epochCorrect / float(epochTotal)
            print 'Epoch Final: %d TE: %.4f TR:%.4f' % \
                (epoch, epochE, epochRate)
            
            # Anneal learning rate
            self.model.updateLearningParams(epoch)
            
            # Plot train curves
            if trainOpt['plotFigs']:
                plotter.plot()
                
            # Terminate
            if stop:       
                break
                
        # Report best train score
        self.stoppedTrainScore = bestTscore