sequence['dayofweek'] = (sequence['dayofweek'] - meanDayOfWeek) / stdDayOfWeek ds = getPyBrainDataSet(sequence, nTrain, predictionStep, useTimeOfDay, useDayOfWeek) print "train LSTM with " + str(rptNum) + " epochs" random.seed(6) net = initializeLSTMnet(nDimInput=len(ds.getSample()[0]), nDimOutput=1, nLSTMcells=20) trainer = RPropMinusTrainer(net, dataset=ds, verbose=True) error = [] for rpt in xrange(rptNum): err = trainer.train() error.append(err) print "test LSTM" net.reset() predictedInput = np.zeros((len(sequence), )) targetInput = np.zeros((len(sequence), )) trueData = np.zeros((len(sequence), )) for i in xrange(1, len(sequence) - predictionStep): if useTimeOfDay and useDayOfWeek: sample = np.array([ sequence['data'][i], sequence['timeofday'][i], sequence['dayofweek'][i] ]) elif useTimeOfDay:
def main(): config = MU.ConfigReader('configs/%s' % sys.argv[1]) config.read() logDir = '%s-%s' % (__file__, sys.argv[1]) os.mkdir(logDir) with open('%s/config.txt' % logDir, 'w') as outfile: json.dump(config.getConfigDict(), outfile, indent=4) dr = MU.DataReader(config['input_tsv_path']) data = dr.read(config['interested_columns']) inLabels = config['input_columns'] outLabels = config['output_columns'] tds, vds = seqDataSetPair(data, inLabels, outLabels, config['seq_label_column'], config['test_seqno'], config['validation_seqno']) inScale = config.getDataScale(inLabels) outScale = config.getDataScale(outLabels) normalizeDataSet(tds, ins = inScale, outs = outScale) normalizeDataSet(vds, ins = inScale, outs = outScale) trainData = tds validationData = vds fdim = tds.indim / 2 + 15 xdim = tds.outdim * 2 rnn = buildNetwork(tds.indim, fdim, fdim, xdim, tds.outdim, hiddenclass=SigmoidLayer, recurrent=True) rnn.addRecurrentConnection(FullConnection(rnn['hidden2'], rnn['hidden0'])) rnn.sortModules() trainer = RPropMinusTrainer(rnn, dataset=trainData, batchlearning=True, verbose=True, weightdecay=0.005) #trainer = RPropMinusTrainer(rnn, dataset=trainData, batchlearning=True, verbose=True) #trainer = BackpropTrainer(rnn, dataset=trainData, learningrate=0.0001, # lrdecay=1.0, momentum=0.4, verbose=True, batchlearning=False, # weightdecay=0) errTime = [] errTrain = [] errValidation = [] epochNo = 0 while True: for i in range(config['epochs_per_update']): trainer.train() epochNo += config['epochs_per_update'] NetworkWriter.writeToFile(rnn, '%s/Epoch_%d.xml' % (logDir, epochNo)) NetworkWriter.writeToFile(rnn, '%s/Latest.xml' % logDir) tOut = ModuleValidator.calculateModuleOutput(rnn, trainData) vOut = ModuleValidator.calculateModuleOutput(rnn, validationData) tScaler = config.getDataScale([config['output_scalar_label']])[0][1] tAvgErr = NP.sqrt(NP.mean((trainData['target'] - tOut) ** 2)) * tScaler vAvgErr = NP.sqrt(NP.mean((validationData['target'] - vOut) ** 2)) * tScaler tMaxErr = NP.max(NP.abs(trainData['target'] - tOut)) * tScaler vMaxErr = NP.max(NP.abs(validationData['target'] - vOut)) * tScaler errTrain.append(tAvgErr) errValidation.append(vAvgErr) errTime.append(epochNo) print "Training error: avg %5.3f degC max %5.3f degC" % (tAvgErr, tMaxErr) print "Validation error: avg %5.3f degC max %5.3f degC" % (vAvgErr, vMaxErr) print "------------------------------------------------------------------------------" if (config['visualize_on_training'] == 'yes'): PL.figure(1) PL.ioff() visulizeDataSet(rnn, trainData, 0, config['visualized_columns']['input'], config['visualized_columns']['output']) PL.ion() PL.draw() PL.figure(2) PL.ioff() visulizeDataSet(rnn, validationData, 0, config['visualized_columns']['input'], config['visualized_columns']['output']) PL.ion() PL.draw() p = PL.figure(3) PL.ioff() p.clear() PL.plot(errTime, errTrain, label = 'Train') PL.plot(errTime, errValidation, label = 'Validation') PL.legend() PL.ion() PL.draw()
def main(): config = MU.ConfigReader('configs/%s' % sys.argv[1]) config.read() logDir = '%s-%s' % (__file__, sys.argv[1]) os.mkdir(logDir) with open('%s/config.txt' % logDir, 'w') as outfile: json.dump(config.getConfigDict(), outfile, indent=4) dr = MU.DataReader(config['input_tsv_path']) data = dr.read(config['interested_columns']) inLabels = config['input_columns'] outLabels = config['output_columns'] tds, vds = seqDataSetPair(data, inLabels, outLabels, config['seq_label_column'], config['test_seqno'], config['validation_seqno']) inScale = config.getDataScale(inLabels) outScale = config.getDataScale(outLabels) normalizeDataSet(tds, ins=inScale, outs=outScale) normalizeDataSet(vds, ins=inScale, outs=outScale) trainData = tds validationData = vds fdim = tds.indim / 5 + 5 xdim = tds.outdim * 2 rnn = buildNetwork(tds.indim, fdim, fdim, fdim, xdim, tds.outdim, hiddenclass=SigmoidLayer, recurrent=True) rnn.addRecurrentConnection(FullConnection(rnn['hidden0'], rnn['hidden0'])) rnn.addRecurrentConnection(FullConnection(rnn['hidden1'], rnn['hidden1'])) rnn.addRecurrentConnection(FullConnection(rnn['hidden2'], rnn['hidden2'])) rnn.sortModules() trainer = RPropMinusTrainer(rnn, dataset=trainData, batchlearning=True, verbose=True, weightdecay=0.005) errTime = [] errTrain = [] errValidation = [] epochNo = 0 while True: for i in range(config['epochs_per_update']): trainer.train() epochNo += config['epochs_per_update'] NetworkWriter.writeToFile(rnn, '%s/Epoch_%d.xml' % (logDir, epochNo)) NetworkWriter.writeToFile(rnn, '%s/Latest.xml' % logDir) tOut = ModuleValidator.calculateModuleOutput(rnn, trainData) vOut = ModuleValidator.calculateModuleOutput(rnn, validationData) tScaler = config.getDataScale([config['output_scalar_label']])[0][1] tAvgErr = NP.sqrt(NP.mean((trainData['target'] - tOut)**2)) * tScaler vAvgErr = NP.sqrt(NP.mean( (validationData['target'] - vOut)**2)) * tScaler tMaxErr = NP.max(NP.abs(trainData['target'] - tOut)) * tScaler vMaxErr = NP.max(NP.abs(validationData['target'] - vOut)) * tScaler errTrain.append(tAvgErr) errValidation.append(vAvgErr) errTime.append(epochNo) print "Training error: avg %5.3f max %5.3f" % (tAvgErr, tMaxErr) print "Validation error: avg %5.3f max %5.3f" % (vAvgErr, vMaxErr) print "------------------------------------------------------------------------------" if (config['visualize_on_training'] == 'yes'): PL.figure(1) PL.ioff() visulizeDataSet(rnn, trainData, 0, config['visualized_columns']['input'], config['visualized_columns']['output']) PL.ion() PL.draw() PL.figure(2) PL.ioff() visulizeDataSet(rnn, validationData, 0, config['visualized_columns']['input'], config['visualized_columns']['output']) PL.ion() PL.draw() p = PL.figure(3) PL.ioff() p.clear() PL.plot(errTime, errTrain, label='Train') PL.plot(errTime, errValidation, label='Validation') PL.legend() PL.ion() PL.draw()
meanDayOfWeek = np.mean(sequence['dayofweek']) stdDayOfWeek = np.std(sequence['dayofweek']) sequence['dayofweek'] = (sequence['dayofweek'] - meanDayOfWeek)/stdDayOfWeek ds = getPyBrainDataSetScalarEncoder(sequence, nTrain, encoderInput, encoderOutput, predictionStep, useTimeOfDay, useDayOfWeek) print "train LSTM with "+str(rptNum)+" repeats" net = initializeLSTMnet(nDimInput=len(ds.getSample()[0]), nDimOutput=len(ds.getSample()[1]), nLSTMcells=20) trainer = RPropMinusTrainer(net, dataset=ds, verbose=True) error = [] for rpt in xrange(rptNum): err = trainer.train() error.append(err) print "test LSTM" net.reset() targetInput = np.zeros((len(sequence),)) trueData = np.zeros((len(sequence),)) predictedInput = np.zeros((len(sequence),)) bucketValues = encoderOutput.getBucketValues() if encoderOutput is not None: predictedDistribution = np.zeros((len(sequence), encoderOutput.n)) targetDistribution = np.zeros((len(sequence), encoderOutput.n))
def main(): config = MU.ConfigReader("configs/%s" % sys.argv[1]) config.read() logDir = "%s-%s" % (__file__, sys.argv[1]) os.mkdir(logDir) with open("%s/config.txt" % logDir, "w") as outfile: json.dump(config.getConfigDict(), outfile, indent=4) dr = MU.DataReader(config["input_tsv_path"]) data = dr.read(config["interested_columns"]) inLabels = config["input_columns"] outLabels = config["output_columns"] tds, vds = seqDataSetPair( data, inLabels, outLabels, config["seq_label_column"], config["test_seqno"], config["validation_seqno"] ) inScale = config.getDataScale(inLabels) outScale = config.getDataScale(outLabels) normalizeDataSet(tds, ins=inScale, outs=outScale) normalizeDataSet(vds, ins=inScale, outs=outScale) trainData = tds validationData = vds fdim = tds.indim / 5 + 5 xdim = tds.outdim * 2 rnn = buildNetwork(tds.indim, fdim, fdim, xdim, tds.outdim, hiddenclass=SigmoidLayer, recurrent=True) rnn.addRecurrentConnection(FullConnection(rnn["hidden0"], rnn["hidden0"])) rnn.addRecurrentConnection(FullConnection(rnn["hidden1"], rnn["hidden1"])) rnn.sortModules() trainer = RPropMinusTrainer(rnn, dataset=trainData, batchlearning=True, verbose=True) errTime = [] errTrain = [] errValidation = [] epochNo = 0 while True: for i in range(config["epochs_per_update"]): trainer.train() epochNo += config["epochs_per_update"] NetworkWriter.writeToFile(rnn, "%s/Epoch_%d.xml" % (logDir, epochNo)) NetworkWriter.writeToFile(rnn, "%s/Latest.xml" % logDir) tOut = ModuleValidator.calculateModuleOutput(rnn, trainData) vOut = ModuleValidator.calculateModuleOutput(rnn, validationData) tScaler = config.getDataScale([config["output_scalar_label"]])[0][1] tAvgErr = NP.sqrt(NP.mean((trainData["target"] - tOut) ** 2)) * tScaler vAvgErr = NP.sqrt(NP.mean((validationData["target"] - vOut) ** 2)) * tScaler tMaxErr = NP.max(NP.abs(trainData["target"] - tOut)) * tScaler vMaxErr = NP.max(NP.abs(validationData["target"] - vOut)) * tScaler errTrain.append(tAvgErr) errValidation.append(vAvgErr) errTime.append(epochNo) print "Training error: avg %5.3f degC max %5.3f degC" % (tAvgErr, tMaxErr) print "Validation error: avg %5.3f degC max %5.3f degC" % (vAvgErr, vMaxErr) print "------------------------------------------------------------------------------" if config["visualize_on_training"] == "yes": PL.figure(1) PL.ioff() visulizeDataSet( rnn, trainData, 0, config["visualized_columns"]["input"], config["visualized_columns"]["output"] ) PL.ion() PL.draw() PL.figure(2) PL.ioff() visulizeDataSet( rnn, validationData, 0, config["visualized_columns"]["input"], config["visualized_columns"]["output"] ) PL.ion() PL.draw() p = PL.figure(3) PL.ioff() p.clear() PL.plot(errTime, errTrain, label="Train") PL.plot(errTime, errValidation, label="Validation") PL.legend() PL.ion() PL.draw()