def visulizeDataSet(network, data, seqno, in_labels, out_labels): seq = data.getSequence(seqno) tmpDs = SequentialDataSet(data.indim, data.outdim) tmpDs.newSequence() for i in xrange(data.getSequenceLength(seqno)): tmpDs.addSample(seq[0][i], seq[1][i]) nplots = len(in_labels) + len(out_labels) for i in range(len(in_labels)): p = PL.subplot(nplots, 1, i + 1) p.clear() p.plot(tmpDs['input'][:, i]) p.set_ylabel(in_labels[i]) for i in range(len(out_labels)): p = PL.subplot(nplots, 1, i + 1 + len(in_labels)) p.clear() output = ModuleValidator.calculateModuleOutput(network, tmpDs) p.plot(tmpDs['target'][:, i], label='train') p.plot(output[:, i], label='sim') p.legend() p.set_ylabel(out_labels[i])
def testOnSequenceData(module, dataset): """ Fetch targets and calculate the modules output on dataset. Output and target are in one-of-many format. The class for each sequence is determined by argmax OF THE LAST ITEM IN THE SEQUENCE. """ target = dataset.getField("target") output = ModuleValidator.calculateModuleOutput(module, dataset) # determine last indices of the sequences inside dataset ends = SequenceHelper.getSequenceEnds(dataset) class_output = array([argmax(output[end]) for end in ends]) class_target = array([argmax(target[end]) for end in ends]) return Validator.classificationPerformance(class_output, class_target)
def main(): train, val = getData() train_errs = [] val_errs = [] for hiddensize in range(10, 100, 5): net = buildNetwork(39, hiddensize, 1, hiddenclass=SigmoidLayer, outclass=SigmoidLayer) trainer = BackpropTrainer(net, train) for i in range(20): trainer.train() verr = ModuleValidator.MSE(net, val) print hiddensize, ": ", verr val_errs.append(verr) train_errs.append(trainer.train()) return (train_errs, val_errs)
def get_segregation( file_path= "/computed/sentence_analysis_reg.pkl.gz", error_metric = regression_metric): # Loading necessary data. best_module, testData, Y_test = data.load( root + file_path ) n_samples = len(Y_test) # Computing error and sorting and grouping errors by rating groups Y_pred = ModuleValidator.calculateModuleOutput(best_module, testData) error = [ error_metric(Y_pred[i],Y_test[i]) for i in xrange(n_samples) ] err_and_revidx = zip( error, range(n_samples) ) sorted_err = {0:[], 1:[], 2:[], 3:[], 4:[]} # for some reason the last n_samples/2 are corrupted and are not alligned to the reviews. for idx in range(n_samples/2): sorted_err[ Y_test[idx] ].append( err_and_revidx[idx] ) for idx in range(5): sorted_err[idx] = sorted( sorted_err[idx] ) return sorted_err
def train(self, args): if(self.data.ds == None): print("Can't train without loaded data") return if(args != [] and len(args) >= 2): self.net.epochs = int(args[1]) if(self.net.trainingType == "gradient"): if(self.trainer == None): self.trainer, self.returnsNet = self.__getGradientTrainer(); self.__train(self.trainer.trainEpochs, self.returnsNet) elif(self.net.trainingType == "optimization"): if(self.trainer == None): self.trainer, self.returnsNet = self.__getOptimizationTrainer(); self.__train(self.trainer.learn, self.returnsNet) return elif(self.trainingType == "crossval"): if(self.trainer == None): self.trainer, self.returnsNet = self.__getGradientTrainer(); evaluation = ModuleValidator.classificationPerformance(self.trainer.module, self.data.ds) validator = CrossValidator(trainer=self.trainer, dataset=self.trainer.ds, n_folds=5, valfunc=evaluation, verbose=True, max_epochs=1) print(validator.validate()) else: raise Exception("Cannot create trainer, no network type specified" + self.trainingType)
if tstsplot and ctstsplot: pylab.plot(tsts['input'], ctsts, c='g') pylab.xlabel('x') pylab.ylabel('y') pylab.title('Neuron Number:' + str(nneuron)) pylab.grid(True) plotname = os.path.join(plotdir, ('jpq2layers_plot' + str(iter))) pylab.savefig(plotname) # set-up the neural network nneuron = 5 mom = 0.98 netname = "LSL-" + str(nneuron) + "-" + str(mom) mv = ModuleValidator() v = Validator() #create the test DataSet x = numpy.arange(0.0, 1.0 + 0.01, 0.01) s = 0.5 + 0.4 * numpy.sin(2 * numpy.pi * x) tsts = SupervisedDataSet(1, 1) tsts.setField('input', x.reshape(len(x), 1)) tsts.setField('target', s.reshape(len(s), 1)) #read the train DataSet from file trndata = SupervisedDataSet.loadFromFile(os.path.join(os.getcwd(), 'trndata')) myneuralnet = os.path.join(os.getcwd(), 'myneuralnet.xml') if os.path.isfile(myneuralnet): n = NetworkReader.readFrom(myneuralnet, name=netname) #calculate the test DataSet based on the trained Neural Network
starttime = time.time() print("Training Network 6") trainer6 = backprop.BackpropTrainer(net6, dataset=train) score6 = trainer6.train() trainerrorlist.append(score6) timeTaken.append(time.time() - starttime) print("BackPropTrainer 1 has an error rate of " + score1.__str__()) print("BackPropTrainer 2 has an error rate of " + score2.__str__()) print("BackPropTrainer 3 has an error rate of " + score3.__str__()) print("BackPropTrainer 4 has an error rate of " + score4.__str__()) print("BackPropTrainer 5 has an error rate of " + score5.__str__()) print("BackPropTrainer 6 has an error rate of " + score6.__str__()) testErrorArray = [] score1a = ModuleValidator.MSE(net1, test) testErrorArray.append(score1a) score2a = ModuleValidator.MSE(net2, test) testErrorArray.append(score2a) score3a = ModuleValidator.MSE(net3, test) testErrorArray.append(score3a) score4a = ModuleValidator.MSE(net4, test) testErrorArray.append(score4a) score5a = ModuleValidator.MSE(net5, test) testErrorArray.append(score5a) score6a = ModuleValidator.MSE(net6, test) testErrorArray.append(score6a) print("Printing mean square error values") print("BackPropTrainer 1 has a test error rate of " + score1a.__str__()) print("BackPropTrainer 2 has a test error rate of " + score2a.__str__())
alldata.addSample([-1, -1], [0]) alldata.addSample([-1, -1], [0]) alldata.addSample([-1, -1], [0]) alldata.addSample([-1, -1], [0]) alldata.addSample([1, 1], [1]) alldata.addSample([1, 1], [1]) alldata.addSample([1, 1], [1]) alldata.addSample([1, 1], [1]) alldata.addSample([1, 1], [1]) tstdata, trndata = alldata.splitWithProportion(0.25) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() # We can also examine the dataset print "Number of training patterns: ", len(trndata) print "Input and output dimensions: ", trndata.indim, trndata.outdim print "First sample (input, target, class):" print trndata['input'][0], trndata['target'][0], trndata['class'][0] fnn = buildNetwork( trndata.indim, 5, trndata.outdim, recurrent=False ) trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01 ) # I am not sure about this, I don't think my production code is implemented like this modval = ModuleValidator() trainer.trainEpochs(20) trainer.trainOnDataset(dataset=trndata) cv = CrossValidator( trainer, trndata, n_folds=5, valfunc=modval.MSE ) print "MSE %f" %( cv.validate() )
if tstsplot and ctstsplot: pylab.plot(tsts['input'], ctsts,c='g') pylab.xlabel('x') pylab.ylabel('y') pylab.title('Neuron Number:'+str(nneuron)) pylab.grid(True) plotname = os.path.join(plotdir,('jpq2layers_plot'+ str(iter))) pylab.savefig(plotname) # set-up the neural network nneuron = 5 mom = 0.98 netname="LSL-"+str(nneuron)+"-"+str(mom) mv=ModuleValidator() v = Validator() n=FeedForwardNetwork(name=netname) inLayer = LinearLayer(1,name='in') hiddenLayer = SigmoidLayer(nneuron,name='hidden0') outLayer = LinearLayer(1,name='out') biasinUnit = BiasUnit(name="bhidden0") biasoutUnit = BiasUnit(name="bout") n.addInputModule(inLayer) n.addModule(hiddenLayer) n.addModule(biasinUnit) n.addModule(biasoutUnit) n.addOutputModule(outLayer) in_to_hidden = FullConnection(inLayer,hiddenLayer) bias_to_hidden = FullConnection(biasinUnit,hiddenLayer) bias_to_out = FullConnection(biasoutUnit,outLayer)
def main(): config = MU.ConfigReader('configs/%s' % sys.argv[1]) config.read() logDir = '%s-%s' % (__file__, sys.argv[1]) os.mkdir(logDir) with open('%s/config.txt' % logDir, 'w') as outfile: json.dump(config.getConfigDict(), outfile, indent=4) dr = MU.DataReader(config['input_tsv_path']) data = dr.read(config['interested_columns']) inLabels = config['input_columns'] outLabels = config['output_columns'] tds, vds = seqDataSetPair(data, inLabels, outLabels, config['seq_label_column'], config['test_seqno'], config['validation_seqno']) inScale = config.getDataScale(inLabels) outScale = config.getDataScale(outLabels) normalizeDataSet(tds, ins=inScale, outs=outScale) normalizeDataSet(vds, ins=inScale, outs=outScale) trainData = tds validationData = vds fdim = tds.indim / 5 + 5 xdim = tds.outdim * 2 rnn = buildNetwork(tds.indim, fdim, fdim, fdim, xdim, tds.outdim, hiddenclass=SigmoidLayer, recurrent=True) rnn.addRecurrentConnection(FullConnection(rnn['hidden0'], rnn['hidden0'])) rnn.addRecurrentConnection(FullConnection(rnn['hidden1'], rnn['hidden1'])) rnn.addRecurrentConnection(FullConnection(rnn['hidden2'], rnn['hidden2'])) rnn.sortModules() trainer = RPropMinusTrainer(rnn, dataset=trainData, batchlearning=True, verbose=True, weightdecay=0.005) errTime = [] errTrain = [] errValidation = [] epochNo = 0 while True: for i in range(config['epochs_per_update']): trainer.train() epochNo += config['epochs_per_update'] NetworkWriter.writeToFile(rnn, '%s/Epoch_%d.xml' % (logDir, epochNo)) NetworkWriter.writeToFile(rnn, '%s/Latest.xml' % logDir) tOut = ModuleValidator.calculateModuleOutput(rnn, trainData) vOut = ModuleValidator.calculateModuleOutput(rnn, validationData) tScaler = config.getDataScale([config['output_scalar_label']])[0][1] tAvgErr = NP.sqrt(NP.mean((trainData['target'] - tOut)**2)) * tScaler vAvgErr = NP.sqrt(NP.mean( (validationData['target'] - vOut)**2)) * tScaler tMaxErr = NP.max(NP.abs(trainData['target'] - tOut)) * tScaler vMaxErr = NP.max(NP.abs(validationData['target'] - vOut)) * tScaler errTrain.append(tAvgErr) errValidation.append(vAvgErr) errTime.append(epochNo) print "Training error: avg %5.3f max %5.3f" % (tAvgErr, tMaxErr) print "Validation error: avg %5.3f max %5.3f" % (vAvgErr, vMaxErr) print "------------------------------------------------------------------------------" if (config['visualize_on_training'] == 'yes'): PL.figure(1) PL.ioff() visulizeDataSet(rnn, trainData, 0, config['visualized_columns']['input'], config['visualized_columns']['output']) PL.ion() PL.draw() PL.figure(2) PL.ioff() visulizeDataSet(rnn, validationData, 0, config['visualized_columns']['input'], config['visualized_columns']['output']) PL.ion() PL.draw() p = PL.figure(3) PL.ioff() p.clear() PL.plot(errTime, errTrain, label='Train') PL.plot(errTime, errValidation, label='Validation') PL.legend() PL.ion() PL.draw()
timeSpentBuilding.append(end - start) print("Trainer created") start = time.clock() trainer = BackpropTrainer(LSTMNetwork, dataset=train) error = trainer.train() end = time.clock() print str(end - start) + " seconds for the training the network" print("LSTMNetwork" + str(LSTMNetworkNumber) + " has an error rate of: " + str(error)) overallError.append(error) timeSpentTraining.append(end - start) print("Validating the trained network to the testing one") start = time.clock() validation = ModuleValidator.MSE(LSTMNetwork, test) end = time.clock() print str(end - start) + " seconds for testing the network" print("Backpropagation Trainer " + str(LSTMNetworkNumber) + " has test error rate of: " + str(validation)) testComparison.append(validation) timeSpentTesting.append(end - start) LSTMNetworkNumber += 1 # Values for OverallError and TestComparison print str(overallError) print str(testComparison) # Plot of network's error after training plt.plot([1, 2, 3, 4], overallError)
def runTraining(self, convergence=0, **kwargs): """ Trains the network on the stored dataset. If convergence is >0, check after that many epoch increments whether test error is going down again, and stop training accordingly. CAVEAT: No support for Sequential datasets!""" assert isinstance(self.Trainer, Trainer) if self.Graph is not None: self.Graph.setLabels(x='epoch', y='normalized regression error') self.Graph.setLegend(['training','test'],loc='upper right') epoch = 0 inc = self.epoinc best_error = Infinity best_epoch = 0 learncurve_x = [0] learncurve_y = [0.0] valcurve_y = [0.0] converged = False convtest = 0 if convergence>0: logging.info("Convergence criterion: %d batches of %d epochs w/o improvement" % (convergence, inc)) while epoch<=self.maxepochs and not converged: self.Trainer.trainEpochs(inc) epoch+=inc learncurve_x.append(epoch) # calculate errors on TRAINING data err_trn = ModuleValidator.validate(Validator.MSE, self.Trainer.module, self.DS) learncurve_y.append(err_trn) if self.TDS is None: logging.info("epoch: %6d, err_trn: %10g" % (epoch, err_trn)) else: # calculate same errors on TEST data err_tst = ModuleValidator.validate(Validator.MSE, self.Trainer.module, self.TDS) valcurve_y.append(err_tst) if err_tst < best_error: # store best error and parameters best_epoch = epoch best_error = err_tst bestweights = self.Trainer.module.params.copy() convtest = 0 else: convtest += 1 logging.info("epoch: %6d, err_trn: %10g, err_tst: %10g, best_tst: %10g" % (epoch, err_trn, err_tst, best_error)) if self.Graph is not None: self.Graph.addData(1, epoch, err_tst) # check if convegence criterion is fulfilled (no improvement after N epoincs) if convtest >= convergence: converged = True if self.Graph is not None: self.Graph.addData(0, epoch, err_trn) self.Graph.update() # training finished! logging.info("Best epoch: %6d, with error: %10g" % (best_epoch, best_error)) if self.VDS is not None: # calculate same errors on VALIDATION data self.Trainer.module.params[:] = bestweights.copy() err_val = ModuleValidator.validate(Validator.MSE, self.Trainer.module, self.VDS) logging.info("Result on evaluation data: %10g" % err_val) # store training curve for saving into file self.trainCurve = (learncurve_x, learncurve_y, valcurve_y)
def main(): config = MU.ConfigReader('configs/%s' % sys.argv[1]) config.read() logDir = '%s-%s' % (__file__, sys.argv[1]) os.mkdir(logDir) with open('%s/config.txt' % logDir, 'w') as outfile: json.dump(config.getConfigDict(), outfile, indent=4) dr = MU.DataReader(config['input_tsv_path']) data = dr.read(config['interested_columns']) inLabels = config['input_columns'] outLabels = config['output_columns'] tds, vds = seqDataSetPair(data, inLabels, outLabels, config['seq_label_column'], config['test_seqno'], config['validation_seqno']) inScale = config.getDataScale(inLabels) outScale = config.getDataScale(outLabels) normalizeDataSet(tds, ins = inScale, outs = outScale) normalizeDataSet(vds, ins = inScale, outs = outScale) trainData = tds validationData = vds fdim = tds.indim / 2 + 15 xdim = tds.outdim * 2 rnn = buildNetwork(tds.indim, fdim, fdim, xdim, tds.outdim, hiddenclass=SigmoidLayer, recurrent=True) rnn.addRecurrentConnection(FullConnection(rnn['hidden2'], rnn['hidden0'])) rnn.sortModules() trainer = RPropMinusTrainer(rnn, dataset=trainData, batchlearning=True, verbose=True, weightdecay=0.005) #trainer = RPropMinusTrainer(rnn, dataset=trainData, batchlearning=True, verbose=True) #trainer = BackpropTrainer(rnn, dataset=trainData, learningrate=0.0001, # lrdecay=1.0, momentum=0.4, verbose=True, batchlearning=False, # weightdecay=0) errTime = [] errTrain = [] errValidation = [] epochNo = 0 while True: for i in range(config['epochs_per_update']): trainer.train() epochNo += config['epochs_per_update'] NetworkWriter.writeToFile(rnn, '%s/Epoch_%d.xml' % (logDir, epochNo)) NetworkWriter.writeToFile(rnn, '%s/Latest.xml' % logDir) tOut = ModuleValidator.calculateModuleOutput(rnn, trainData) vOut = ModuleValidator.calculateModuleOutput(rnn, validationData) tScaler = config.getDataScale([config['output_scalar_label']])[0][1] tAvgErr = NP.sqrt(NP.mean((trainData['target'] - tOut) ** 2)) * tScaler vAvgErr = NP.sqrt(NP.mean((validationData['target'] - vOut) ** 2)) * tScaler tMaxErr = NP.max(NP.abs(trainData['target'] - tOut)) * tScaler vMaxErr = NP.max(NP.abs(validationData['target'] - vOut)) * tScaler errTrain.append(tAvgErr) errValidation.append(vAvgErr) errTime.append(epochNo) print "Training error: avg %5.3f degC max %5.3f degC" % (tAvgErr, tMaxErr) print "Validation error: avg %5.3f degC max %5.3f degC" % (vAvgErr, vMaxErr) print "------------------------------------------------------------------------------" if (config['visualize_on_training'] == 'yes'): PL.figure(1) PL.ioff() visulizeDataSet(rnn, trainData, 0, config['visualized_columns']['input'], config['visualized_columns']['output']) PL.ion() PL.draw() PL.figure(2) PL.ioff() visulizeDataSet(rnn, validationData, 0, config['visualized_columns']['input'], config['visualized_columns']['output']) PL.ion() PL.draw() p = PL.figure(3) PL.ioff() p.clear() PL.plot(errTime, errTrain, label = 'Train') PL.plot(errTime, errValidation, label = 'Validation') PL.legend() PL.ion() PL.draw()
import pylab, numpy from pybrain.tools.shortcuts import buildNetwork from pybrain.structure import TanhLayer from pybrain.datasets import SupervisedDataSet from pybrain.supervised.trainers import BackpropTrainer from pybrain.tools.validation import CrossValidator, ModuleValidator results = pylab.loadtxt('credit.txt') target = results[:, -1] data = numpy.delete(results, -1, 1) #print "data", tuple(data[0]) #print "target", (target[0],) #net = buildNetwork(14, 10, 1) net = buildNetwork(14, 10, 1, hiddenclass=TanhLayer) #print net.activate([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]) ds = SupervisedDataSet(14, 1) for i in range(len(data)): ds.addSample(tuple(data[i]), (target[i], )) trainer = BackpropTrainer(net, ds) evaluation = ModuleValidator() validator = CrossValidator(trainer=trainer, dataset=trainer.ds, n_folds=5, valfunc=evaluation.MSE) print(validator.validate())
def correct(output, target): return ModuleValidator.validate(correctValFunc, output, target)
if tstsplot and ctstsplot: pylab.plot(tsts['input'], ctsts,c='g') pylab.xlabel('x') pylab.ylabel('y') pylab.title('Neuron Number:'+str(nneuron)) pylab.grid(True) plotname = os.path.join(plotdir,('jpq2layers_plot'+ str(iter))) pylab.savefig(plotname) # set-up the neural network nneuron = 5 mom = 0.98 netname="LSL-"+str(nneuron)+"-"+str(mom) mv=ModuleValidator() v = Validator() #create the test DataSet x = numpy.arange(0.0, 1.0+0.01, 0.01) s = 0.5+0.4*numpy.sin(2*numpy.pi*x) tsts = SupervisedDataSet(1,1) tsts.setField('input',x.reshape(len(x),1)) tsts.setField('target',s.reshape(len(s),1)) #read the train DataSet from file trndata = SupervisedDataSet.loadFromFile(os.path.join(os.getcwd(),'trndata')) myneuralnet = os.path.join(os.getcwd(),'myneuralnet.xml') if os.path.isfile(myneuralnet): n = NetworkReader.readFrom(myneuralnet,name=netname)
treinadorSupervisionado = BackpropTrainer(rn, dados) numeroDeAcessos = 10 numeroDeEpocasPorAcesso = 50 fig1 = plt.figure() ax1 = fig1.add_subplot(111) ax1.axis([0, 2 * math.pi, -1.5, 1.5]) fig1.hold() fig2 = plt.figure() ax2 = fig2.add_subplot(111) ax2.axis([-50, numeroDeAcessos * numeroDeEpocasPorAcesso + 50, 0.00001, 4]) ax2.set_yscale('log') fig2.hold() meansq = ModuleValidator() erro2 = meansq.MSE(treinadorSupervisionado.module, dados) print erro2 ax2.plot([0], [erro2], 'bo') tempoPausa = 1 for i in range(numeroDeAcessos): treinadorSupervisionado.trainEpochs(numeroDeEpocasPorAcesso) meansq = ModuleValidator() erro2 = meansq.MSE(treinadorSupervisionado.module, dados) print erro2 ax1.plot(dados['input'], dados['target'], 'bo', markersize=7, markeredgewidth=0)
trainer = ReinforcedTrainer(module=nn, rewarder=rewardFunc) from pybrain.tools.validation import ModuleValidator import thread def userthread(): from IPython.Shell import IPShellEmbed ipshell = IPShellEmbed() ipshell() #thread.start_new_thread(userthread, ()) # carry out the training while True: trndata = generateData(nseq=20, ratevarlimit=random.uniform(0.0, 0.3)) tstdata = generateData(nseq=20) trainer.setData(trndata) trainer.train() trnresult = 100. * (ModuleValidator.MSE(nn, trndata)) tstresult = 100. * (ModuleValidator.MSE(nn, tstdata)) print "train error: %5.2f%%" % trnresult, ", test error: %5.2f%%" % tstresult s = getRandomSeq(100, ratevarlimit=random.uniform(0.0, 1.0)) print " real:", seqStr(s) print " nn:", getSeqOutputFromNN(nn, s)
hidden_layer_size=200 net = buildNetwork( t-1 , hidden_layer_size, 1 ,bias = True ) trainer = BackpropTrainer( module=net, dataset=ds1, momentum=0.1, verbose=True, weightdecay=0.01) for i in range(20): print("Training Epoch #"+str(i)) trainer.trainEpochs( 1 ) p = net.activateOnDataset(ds2) p = p.argmax(axis=1) # the highest output activation gives the class p = p.reshape(-1,-1) print p hitrate = ModuleValidator.classificationPerformance(module=net,dataset=ds1) print hitrate #trainer.trainUntilConvergence( verbose = True, validationProportion = 0.15, maxEpochs = 20) # for i in range(20): # trainer.trainEpochs(3) # trnresult = percentError(trainer.testOnClassData(),y_train) # tstresult = percentError(trainer.testOnClassData(dataset=ds2),y_test) # print "epoch: %4d" % trainer.totalepochs, \ # "train error: %5.2f%%" % trnresult, \ # "test error: %5.2f%%" % tstresult # out = net.activateOnDataset(ds1) # out = out.argmax(axis=1) # the highest output activation gives the class # #print out # figure(1)
def eval_nn(params): global nn, trndata nn.reset() nn.params[:] = params return ModuleValidator.MSE(nn, trndata)
def score(self, x_data, y_datas): return ModuleValidator.validate(regression_score, self.net, self._prepare_dataset(x_data, y_datas))
trainer = RPropMinusTrainer(net, dataset=train_ds) train_errors = [] # save errors for plotting later EPOCHS_PER_CYCLE = 10 CYCLES = 10 EPOCHS = EPOCHS_PER_CYCLE * CYCLES for a in xrange(CYCLES): trainer.trainEpochs(EPOCHS_PER_CYCLE) train_errors.append(trainer.testOnData()) epoch = (a+1) * EPOCHS_PER_CYCLE print("\r epoch {}/{}".format(epoch, EPOCHS), end="") stdout.flush() print("final error for training =", train_errors[-1]) err_tst = ModuleValidator.validate(Validator.MSE, net, dataset=test_ds) eval_err.append(err_tst) modnet.append(net) print("test_Err", err_tst) print(eval_err) pmin = eval_err.index(min(eval_err)) print(pmin) net = modnet[pmin] hypernet.append(net) hypereval.append(min(eval_err)) hypermin = hypereval.index(min(eval_err)) net = hypernet[hypermin] print("number of hidden layers", hypermin+1)
def main(): config = MU.ConfigReader("configs/%s" % sys.argv[1]) config.read() logDir = "%s-%s" % (__file__, sys.argv[1]) os.mkdir(logDir) with open("%s/config.txt" % logDir, "w") as outfile: json.dump(config.getConfigDict(), outfile, indent=4) dr = MU.DataReader(config["input_tsv_path"]) data = dr.read(config["interested_columns"]) inLabels = config["input_columns"] outLabels = config["output_columns"] tds, vds = seqDataSetPair( data, inLabels, outLabels, config["seq_label_column"], config["test_seqno"], config["validation_seqno"] ) inScale = config.getDataScale(inLabels) outScale = config.getDataScale(outLabels) normalizeDataSet(tds, ins=inScale, outs=outScale) normalizeDataSet(vds, ins=inScale, outs=outScale) trainData = tds validationData = vds fdim = tds.indim / 5 + 5 xdim = tds.outdim * 2 rnn = buildNetwork(tds.indim, fdim, fdim, xdim, tds.outdim, hiddenclass=SigmoidLayer, recurrent=True) rnn.addRecurrentConnection(FullConnection(rnn["hidden0"], rnn["hidden0"])) rnn.addRecurrentConnection(FullConnection(rnn["hidden1"], rnn["hidden1"])) rnn.sortModules() trainer = RPropMinusTrainer(rnn, dataset=trainData, batchlearning=True, verbose=True) errTime = [] errTrain = [] errValidation = [] epochNo = 0 while True: for i in range(config["epochs_per_update"]): trainer.train() epochNo += config["epochs_per_update"] NetworkWriter.writeToFile(rnn, "%s/Epoch_%d.xml" % (logDir, epochNo)) NetworkWriter.writeToFile(rnn, "%s/Latest.xml" % logDir) tOut = ModuleValidator.calculateModuleOutput(rnn, trainData) vOut = ModuleValidator.calculateModuleOutput(rnn, validationData) tScaler = config.getDataScale([config["output_scalar_label"]])[0][1] tAvgErr = NP.sqrt(NP.mean((trainData["target"] - tOut) ** 2)) * tScaler vAvgErr = NP.sqrt(NP.mean((validationData["target"] - vOut) ** 2)) * tScaler tMaxErr = NP.max(NP.abs(trainData["target"] - tOut)) * tScaler vMaxErr = NP.max(NP.abs(validationData["target"] - vOut)) * tScaler errTrain.append(tAvgErr) errValidation.append(vAvgErr) errTime.append(epochNo) print "Training error: avg %5.3f degC max %5.3f degC" % (tAvgErr, tMaxErr) print "Validation error: avg %5.3f degC max %5.3f degC" % (vAvgErr, vMaxErr) print "------------------------------------------------------------------------------" if config["visualize_on_training"] == "yes": PL.figure(1) PL.ioff() visulizeDataSet( rnn, trainData, 0, config["visualized_columns"]["input"], config["visualized_columns"]["output"] ) PL.ion() PL.draw() PL.figure(2) PL.ioff() visulizeDataSet( rnn, validationData, 0, config["visualized_columns"]["input"], config["visualized_columns"]["output"] ) PL.ion() PL.draw() p = PL.figure(3) PL.ioff() p.clear() PL.plot(errTime, errTrain, label="Train") PL.plot(errTime, errValidation, label="Validation") PL.legend() PL.ion() PL.draw()