def train(self, network_module, training_data, testing_data, n=20, s=5): trainer = BackpropTrainer(module=network_module, dataset=training_data, verbose=True) for i in range(n / s): trainer.trainEpochs(epochs=s) print 'epoch', (i + 1) * s, 'finished' # modified from testOnClassData source code training_data.reset() print '\nTRAINING: {:.2f}% correct'.format( testOnSequenceData(network_module, training_data) * 100) print 'TESTING: {:.2f}% correct\n'.format( testOnSequenceData(network_module, testing_data) * 100)
def create_network(timesteps): trndata, validdata, tstdata = read_data_MNIST(timesteps) rnn = buildNetwork( trndata.indim, 20, trndata.outdim, hiddenclass=LSTMLayer, outclass=SoftmaxLayer, outputbias=True, recurrent=True ) # 20 is the number of LSTM blocks in the hidden layer # we use the BPTT algo to train trainer = BackpropTrainer(rnn, dataset=trndata, verbose=True, momentum=0.9, learningrate=0.00001) print "Training started ..." t1 = time.clock() # trainer.trainEpochs(10) trainer.trainUntilConvergence(maxEpochs=1000) t2 = time.clock() print "Training 1000 epochs took : ", (t2 - t1) / 60.0, "minutes " # train for 1000 epochs trnresult = 100.0 * (1.0 - testOnSequenceData(rnn, trndata)) tstresult = 100.0 * (1.0 - testOnSequenceData(rnn, tstdata)) print "Train Error : %5.2f%%" % trnresult, " , test error :%5.2f%%" % tstresult
# #rnn.addConnection(FullConnection(rnn['in'], rnn['hidden'], name='c1')) #rnn.addConnection(FullConnection(rnn['hidden'], rnn['out'], name='c2')) # #rnn.addRecurrentConnection(FullConnection(rnn['hidden'], rnn['hidden'], name='c3')) #rnn.sortModules() # define a training method trainer = RPropMinusTrainer(rnn, dataset=trndata, verbose=True ) # instead, you may also try ##trainer = BackpropTrainer( rnn, dataset=trndata, verbose=True, momentum=0.9, learningrate=0.00001 ) # carry out the training for i in range(100): trainer.trainEpochs( 2 ) trnresult = 100. * (1.0-testOnSequenceData(rnn, trndata)) tstresult = 100. * (1.0-testOnSequenceData(rnn, tstdata)) print("train error: %5.2f%%" % trnresult, ", test error: %5.2f%%" % tstresult) # just for reference, plot the first 5 timeseries plot(trndata['input'][0:250,:],'-o') hold(True) plot(trndata['target'][0:250,0]) show()
if exists("params.xml"): rnn = NetworkReader.readFrom('params.xml') else: # construct LSTM network - note the missing output bias rnn = buildNetwork( trndata.indim, 5, trndata.outdim, hiddenclass=LSTMLayer, outclass=SoftmaxLayer, outputbias=False, recurrent=True) # define a training method trainer = BackpropTrainer( rnn, dataset=trndata, momentum=0.1, weightdecay=0.01) # lets training (exclamation point) for i in range(100): # setting the ephocs for the training trainer.trainEpochs( 2 ) # calculating the error trnresult = (1.0-testOnSequenceData(rnn, trndata)) tstresult = (1.0-testOnSequenceData(rnn, tstdata)) #print("train error: %5.2f%%" % trnresult, ", test error: %5.2f%%" % tstresult) # activating the softmax layer out = rnn.activate(X_train[0]) out = out.argmax(axis=0) index=0 # evaluate the net in test data result = [] for x in X_test: result.append(rnn.activate(x).argmax())
def runTraining(self, convergence=0, **kwargs): """ Trains the network on the stored dataset. If convergence is >0, check after that many epoch increments whether test error is going down again, and stop training accordingly. """ assert isinstance(self.Trainer, Trainer) if self.Graph is not None: self.Graph.setLabels(x='epoch', y='% classification error') self.Graph.setLegend(['training','test'],loc='lower right') epoch = 0 inc = self.epoinc best_error = 100.0 best_epoch = 0 learncurve_x = [0] learncurve_y = [0.0] valcurve_y = [0.0] converged = False convtest = 0 if convergence>0: logging.info("Convergence criterion: %d batches of %d epochs w/o improvement" % (convergence, inc)) while epoch<=self.maxepochs and not converged: self.Trainer.trainEpochs(inc) epoch+=inc learncurve_x.append(epoch) # calculate errors on TRAINING data if isinstance(self.DS, SequentialDataSet): r_trn = 100. * (1.0-testOnSequenceData(self.Trainer.module, self.DS)) else: # FIXME: messy - validation does not belong into the Trainer... out, trueclass = self.Trainer.testOnClassData(return_targets=True) r_trn = 100. * (1.0-Validator.classificationPerformance(out, trueclass)) learncurve_y.append(r_trn) if self.TDS is None: logging.info("epoch: %6d, err_trn: %5.2f%%" % (epoch, r_trn)) else: # calculate errors on TEST data if isinstance(self.DS, SequentialDataSet): r_tst = 100. * (1.0-testOnSequenceData(self.Trainer.module, self.TDS)) else: # FIXME: messy - validation does not belong into the Trainer... out, trueclass = self.Trainer.testOnClassData(return_targets=True, dataset=self.TDS) r_tst = 100. * (1.0-Validator.classificationPerformance(out, trueclass)) valcurve_y.append(r_tst) if r_tst < best_error: best_epoch = epoch best_error = r_tst bestweights = self.Trainer.module.params.copy() convtest = 0 else: convtest += 1 logging.info("epoch: %6d, err_trn: %5.2f%%, err_tst: %5.2f%%, best_tst: %5.2f%%" % (epoch, r_trn, r_tst, best_error)) if self.Graph is not None: self.Graph.addData(1, epoch, r_tst) # check if convegence criterion is fulfilled (no improvement after N epoincs) if convtest >= convergence: converged = True if self.Graph is not None: self.Graph.addData(0, epoch, r_trn) self.Graph.update() logging.info("Best epoch: %6d, with error: %5.2f%%" % (best_epoch, best_error)) if self.VDS is not None: # calculate errors on VALIDATION data self.Trainer.module.params[:] = bestweights.copy() if isinstance(self.DS, SequentialDataSet): r_val = 100. * (1.0-testOnSequenceData(self.Trainer.module, self.VDS)) else: out, trueclass = self.Trainer.testOnClassData(return_targets=True, dataset=self.VDS) r_val = 100. * (1.0-Validator.classificationPerformance(out, trueclass)) logging.info("Result on evaluation data: %5.2f%%" % r_val) self.trainCurve = (learncurve_x, learncurve_y, valcurve_y)
ds = SupervisedDataSet(x_dimension, y_dimension) for i in range(len(X_train)): ds.addSample(X_train[i], Y_train[i]) # construct LSTM network - note the missing output bias rnn = buildNetwork(x_dimension, x_dimension, y_dimension, hiddenclass=LSTMLayer, outclass=SoftmaxLayer, outputbias=False, recurrent=True) # define a training method trainer = RPropMinusTrainer(rnn, dataset=ds, verbose=True) # instead, you may also try ##trainer = BackpropTrainer( rnn, dataset=trndata, verbose=True, momentum=0.9, learningrate=0.00001 ) # carry out the training for i in range(100): trainer.trainEpochs(2) trnresult = 100. * (1.0 - testOnSequenceData(rnn, ds)) tstresult = 100. * (1.0 - testOnSequenceData(rnn, ds)) print("train error: %5.2f%%" % trnresult, ", test error: %5.2f%%" % tstresult) # just for reference, plot the first 5 timeseries plot(ds['input'][0:250, :], '-o') hold(True) plot(ds['target'][0:250, 0]) show()
correct += 1.0 test_accuracy2 = correct / float(len(Y_test)) print "test accuracy is ", test_accuracy2 """ x_dimension = len(X_train[0]) y_dimension = len(Y_train[0]) ds = SupervisedDataSet(x_dimension, y_dimension) for i in range(len(X_train)): ds.addSample(X_train[i], Y_train[i]) # construct LSTM network - note the missing output bias rnn = buildNetwork(x_dimension, x_dimension, y_dimension, hiddenclass=LSTMLayer, outclass=SoftmaxLayer, outputbias=False, recurrent=True) # define a training method trainer = RPropMinusTrainer( rnn, dataset=ds, verbose=True ) # instead, you may also try ##trainer = BackpropTrainer( rnn, dataset=trndata, verbose=True, momentum=0.9, learningrate=0.00001 ) # carry out the training for i in range(100): trainer.trainEpochs(2) trnresult = 100. * (1.0-testOnSequenceData(rnn, ds)) tstresult = 100. * (1.0-testOnSequenceData(rnn, ds)) print("train error: %5.2f%%" % trnresult, ", test error: %5.2f%%" % tstresult) # just for reference, plot the first 5 timeseries plot(ds['input'][0:250,:],'-o') hold(True) plot(ds['target'][0:250,0]) show()
# must re-sort after adding another connection recursive_network.sortModules() print "------Before Training:" def test_on_sentence(the_sentence): recursive_network.reset() for i, word in enumerate(the_sentence): if i < len(the_sentence)-1: recursive_network.activate(word) else: print recursive_network.activate(word) print 'num_correct / len_dataset' print testOnSequenceData(recursive_network, sds) # what a find! sys.stdout.flush() trainer = BackpropTrainer(recursive_network, sds, verbose=False) trainer.trainEpochs(500) print "------After Training:" for a_sentence in sentences: test_on_sentence(a_sentence) print 'num_correct / len_dataset' print testOnSequenceData(recursive_network, sds) #print recursive_network['in']