def train(self, numEpochs=1, batchSize=5, learnRateVal=0.1, numExamplesToTrain=-1, gradMax=3., L2regularization=0.0, dropoutRate=0.0, sentenceAttention=False, wordwiseAttention=False): """ Takes care of training model, including propagation of errors and updating of parameters. """ expName = "Epochs_{0}_LRate_{1}_L2Reg_{2}_dropout_{3}_sentAttn_{4}_" \ "wordAttn_{5}".format(str(numEpochs), str(learnRateVal), str(L2regularization), str(dropoutRate), str(sentenceAttention), str(wordwiseAttention)) self.configs.update(locals()) trainPremiseIdxMat, trainHypothesisIdxMat = self.embeddingTable.convertDataToIdxMatrices( self.trainData, self.trainDataStats) trainGoldLabel = convertLabelsToMat(self.trainData) valPremiseIdxMat, valHypothesisIdxMat = self.embeddingTable.convertDataToIdxMatrices( self.valData, self.valDataStats) valGoldLabel = convertLabelsToMat(self.valData) # If you want to train on less than full dataset if numExamplesToTrain > 0: valPremiseIdxMat = valPremiseIdxMat[:, range(numExamplesToTrain), :] valHypothesisIdxMat = valHypothesisIdxMat[:, range(numExamplesToTrain), :] valGoldLabel = valGoldLabel[range(numExamplesToTrain)] #Whether zero-padded on left or right pad = "right" # Get full premise/hypothesis tensors # batchPremiseTensor, batchHypothesisTensor, batchLabels = \ # convertDataToTrainingBatch(valPremiseIdxMat, self.numTimestepsPremise, valHypothesisIdxMat, # self.numTimestepsHypothesis, "right", self.embeddingTable, # valGoldLabel, range(len(valGoldLabel))) #sharedValPremise = theano.shared(batchPremiseTensor) #sharedValHypothesis = theano.shared(batchHypothesisTensor) #sharedValLabels = theano.shared(batchLabels) inputPremise = T.ftensor3(name="inputPremise") inputHypothesis = T.ftensor3(name="inputHypothesis") yTarget = T.fmatrix(name="yTarget") learnRate = T.scalar(name="learnRate", dtype='float32') fGradSharedHypothesis, fGradSharedPremise, fUpdatePremise, \ fUpdateHypothesis, costFn, _, _ = self.trainFunc(inputPremise, inputHypothesis, yTarget, learnRate, gradMax, L2regularization, dropoutRate, sentenceAttention, wordwiseAttention, batchSize) totalExamples = 0 stats = Stats(self.logger, expName) # Training self.logger.Log("Model configs: {0}".format(self.configs)) self.logger.Log("Starting training with {0} epochs, {1} batchSize," " {2} learning rate, {3} L2regularization coefficient, and {4} dropout rate".format( numEpochs, batchSize, learnRateVal, L2regularization, dropoutRate)) predictFunc = self.predictFunc(inputPremise, inputHypothesis, dropoutRate) for epoch in xrange(numEpochs): self.logger.Log("Epoch number: %d" %(epoch)) if numExamplesToTrain > 0: minibatches = getMinibatchesIdx(numExamplesToTrain, batchSize) else: minibatches = getMinibatchesIdx(len(trainGoldLabel), batchSize) numExamples = 0 for _, minibatch in minibatches: self.dropoutMode.set_value(1.0) numExamples += len(minibatch) totalExamples += len(minibatch) self.logger.Log("Processed {0} examples in current epoch". format(str(numExamples))) batchPremiseTensor, batchHypothesisTensor, batchLabels = \ convertDataToTrainingBatch(valPremiseIdxMat, self.numTimestepsPremise, valHypothesisIdxMat, self.numTimestepsHypothesis, pad, self.embeddingTable, valGoldLabel, minibatch) gradHypothesisOut = fGradSharedHypothesis(batchPremiseTensor, batchHypothesisTensor, batchLabels) gradPremiseOut = fGradSharedPremise(batchPremiseTensor, batchHypothesisTensor, batchLabels) fUpdatePremise(learnRateVal) fUpdateHypothesis(learnRateVal) predictLabels = self.predict(batchPremiseTensor, batchHypothesisTensor, predictFunc) #self.logger.Log("Labels in epoch {0}: {1}".format(epoch, str(predictLabels))) cost = costFn(batchPremiseTensor, batchHypothesisTensor, batchLabels) stats.recordCost(totalExamples, cost) # Note: Big time sink happens here if totalExamples%(100) == 0: # TODO: Don't compute accuracy of dev set self.dropoutMode.set_value(0.0) devAccuracy = self.computeAccuracy(valPremiseIdxMat, valHypothesisIdxMat, valGoldLabel, predictFunc) stats.recordAcc(totalExamples, devAccuracy, "dev") stats.recordFinalTrainingTime(totalExamples) # Save model to disk self.logger.Log("Saving model...") self.extractParams() configString = "batch={0},epoch={1},learnRate={2},dimHidden={3},dimInput={4}".format(str(batchSize), str(numEpochs), str(learnRateVal), str(self.dimHidden), str(self.dimInput)) self.saveModel(currDir + "/savedmodels/basicLSTM_"+configString+".npz") self.logger.Log("Model saved!") # Set dropout to 0. again for testing self.dropoutMode.set_value(0.0) #Train Accuracy # trainAccuracy = self.computeAccuracy(trainPremiseIdxMat, # trainHypothesisIdxMat, trainGoldLabel, predictFunc) # self.logger.Log("Final training accuracy: {0}".format(trainAccuracy)) # Val Accuracy valAccuracy = self.computeAccuracy(valPremiseIdxMat, valHypothesisIdxMat, valGoldLabel, predictFunc) # TODO: change -1 for training acc to actual value when I enable train computation stats.recordFinalStats(totalExamples, -1, valAccuracy)
def train(self, numEpochs=1, batchSize=5, learnRateVal=0.1, numExamplesToTrain=-1, gradMax=3., L2regularization=0.0, dropoutRate=0.0, sentenceAttention=False, wordwiseAttention=False): """ Takes care of training model, including propagation of errors and updating of parameters. """ expName = "Epochs_{0}_LRate_{1}_L2Reg_{2}_dropout_{3}_sentAttn_{4}_" \ "wordAttn_{5}".format(str(numEpochs), str(learnRateVal), str(L2regularization), str(dropoutRate), str(sentenceAttention), str(wordwiseAttention)) self.configs.update(locals()) trainPremiseIdxMat, trainHypothesisIdxMat = self.embeddingTable.convertDataToIdxMatrices( self.trainData, self.trainDataStats) trainGoldLabel = convertLabelsToMat(self.trainData) valPremiseIdxMat, valHypothesisIdxMat = self.embeddingTable.convertDataToIdxMatrices( self.valData, self.valDataStats) valGoldLabel = convertLabelsToMat(self.valData) # If you want to train on less than full dataset if numExamplesToTrain > 0: valPremiseIdxMat = valPremiseIdxMat[:, range(numExamplesToTrain), :] valHypothesisIdxMat = valHypothesisIdxMat[:, range(numExamplesToTrain ), :] valGoldLabel = valGoldLabel[range(numExamplesToTrain)] #Whether zero-padded on left or right pad = "right" # Get full premise/hypothesis tensors # batchPremiseTensor, batchHypothesisTensor, batchLabels = \ # convertDataToTrainingBatch(valPremiseIdxMat, self.numTimestepsPremise, valHypothesisIdxMat, # self.numTimestepsHypothesis, "right", self.embeddingTable, # valGoldLabel, range(len(valGoldLabel))) #sharedValPremise = theano.shared(batchPremiseTensor) #sharedValHypothesis = theano.shared(batchHypothesisTensor) #sharedValLabels = theano.shared(batchLabels) inputPremise = T.ftensor3(name="inputPremise") inputHypothesis = T.ftensor3(name="inputHypothesis") yTarget = T.fmatrix(name="yTarget") learnRate = T.scalar(name="learnRate", dtype='float32') fGradSharedHypothesis, fGradSharedPremise, fUpdatePremise, \ fUpdateHypothesis, costFn, _, _ = self.trainFunc(inputPremise, inputHypothesis, yTarget, learnRate, gradMax, L2regularization, dropoutRate, sentenceAttention, wordwiseAttention, batchSize) totalExamples = 0 stats = Stats(self.logger, expName) # Training self.logger.Log("Model configs: {0}".format(self.configs)) self.logger.Log( "Starting training with {0} epochs, {1} batchSize," " {2} learning rate, {3} L2regularization coefficient, and {4} dropout rate" .format(numEpochs, batchSize, learnRateVal, L2regularization, dropoutRate)) predictFunc = self.predictFunc(inputPremise, inputHypothesis, dropoutRate) for epoch in xrange(numEpochs): self.logger.Log("Epoch number: %d" % (epoch)) if numExamplesToTrain > 0: minibatches = getMinibatchesIdx(numExamplesToTrain, batchSize) else: minibatches = getMinibatchesIdx(len(trainGoldLabel), batchSize) numExamples = 0 for _, minibatch in minibatches: self.dropoutMode.set_value(1.0) numExamples += len(minibatch) totalExamples += len(minibatch) self.logger.Log( "Processed {0} examples in current epoch".format( str(numExamples))) batchPremiseTensor, batchHypothesisTensor, batchLabels = \ convertDataToTrainingBatch(valPremiseIdxMat, self.numTimestepsPremise, valHypothesisIdxMat, self.numTimestepsHypothesis, pad, self.embeddingTable, valGoldLabel, minibatch) gradHypothesisOut = fGradSharedHypothesis( batchPremiseTensor, batchHypothesisTensor, batchLabels) gradPremiseOut = fGradSharedPremise(batchPremiseTensor, batchHypothesisTensor, batchLabels) fUpdatePremise(learnRateVal) fUpdateHypothesis(learnRateVal) predictLabels = self.predict(batchPremiseTensor, batchHypothesisTensor, predictFunc) #self.logger.Log("Labels in epoch {0}: {1}".format(epoch, str(predictLabels))) cost = costFn(batchPremiseTensor, batchHypothesisTensor, batchLabels) stats.recordCost(totalExamples, cost) # Note: Big time sink happens here if totalExamples % (100) == 0: # TODO: Don't compute accuracy of dev set self.dropoutMode.set_value(0.0) devAccuracy = self.computeAccuracy(valPremiseIdxMat, valHypothesisIdxMat, valGoldLabel, predictFunc) stats.recordAcc(totalExamples, devAccuracy, "dev") stats.recordFinalTrainingTime(totalExamples) # Save model to disk self.logger.Log("Saving model...") self.extractParams() configString = "batch={0},epoch={1},learnRate={2},dimHidden={3},dimInput={4}".format( str(batchSize), str(numEpochs), str(learnRateVal), str(self.dimHidden), str(self.dimInput)) self.saveModel(currDir + "/savedmodels/basicLSTM_" + configString + ".npz") self.logger.Log("Model saved!") # Set dropout to 0. again for testing self.dropoutMode.set_value(0.0) #Train Accuracy # trainAccuracy = self.computeAccuracy(trainPremiseIdxMat, # trainHypothesisIdxMat, trainGoldLabel, predictFunc) # self.logger.Log("Final training accuracy: {0}".format(trainAccuracy)) # Val Accuracy valAccuracy = self.computeAccuracy(valPremiseIdxMat, valHypothesisIdxMat, valGoldLabel, predictFunc) # TODO: change -1 for training acc to actual value when I enable train computation stats.recordFinalStats(totalExamples, -1, valAccuracy)