def crossValidation(self, filename): trainer = BackpropTrainer(self.net) crossValidator = CrossValidator(trainer, self.createDataSetFromFile(filename), n_folds=10) result = crossValidator.validate() print result * 100, "%"
def _validate(self, params): """ The overridden validate function, that uses cross-validation in order to determine the params' performance value. """ trainer = self._getTrainerForParams(params) return CrossValidator(trainer, self._dataset, self._n_folds, **self._validator_kwargs).validate()
def _validate(self, params): """ See GridSearchCostGamma """ glob_idx = tuple(params) perf = self._performances if glob_idx not in perf: trainer = self._getTrainerForParams(params) local_perf = CrossValidator(trainer, self._dataset, self._n_folds, **self._validator_kwargs).validate() perf[glob_idx] = local_perf else: local_perf = perf[glob_idx] return local_perf
def cross_validate(self, dataset=None): trainer = BackpropTrainer(self.neural_network, dataset=dataset, momentum=0.1, verbose=True, weightdecay=0.01) validator = CrossValidator(trainer=trainer, dataset=dataset, n_folds=10) mean_validation_result = validator.validate() self.cross_validation_result = mean_validation_result logger.info('cross val result: {result}'.format( result=self.cross_validation_result))
def cvnntester(tx, ty, rx, ry, iterations, folds): network = buildNetwork(100, 50, 1, bias=True) ds = ClassificationDataSet(100, 1, class_labels=["valley", "hill"]) for i in xrange(len(tx)): ds.addSample(tx[i], [ty[i]]) trainer = BackpropTrainer(network, ds, learningrate=0.005) cv = CrossValidator(trainer, ds, n_folds=folds, max_epochs=iterations, verbosity=True) print cv.validate() print sum((np.array([round(network.activate(test)) for test in rx]) - ry)**2) / float(len(ry))
def train(self, args): if(self.data.ds == None): print("Can't train without loaded data") return if(args != [] and len(args) >= 2): self.net.epochs = int(args[1]) if(self.net.trainingType == "gradient"): if(self.trainer == None): self.trainer, self.returnsNet = self.__getGradientTrainer(); self.__train(self.trainer.trainEpochs, self.returnsNet) elif(self.net.trainingType == "optimization"): if(self.trainer == None): self.trainer, self.returnsNet = self.__getOptimizationTrainer(); self.__train(self.trainer.learn, self.returnsNet) return elif(self.trainingType == "crossval"): if(self.trainer == None): self.trainer, self.returnsNet = self.__getGradientTrainer(); evaluation = ModuleValidator.classificationPerformance(self.trainer.module, self.data.ds) validator = CrossValidator(trainer=self.trainer, dataset=self.trainer.ds, n_folds=5, valfunc=evaluation, verbose=True, max_epochs=1) print(validator.validate()) else: raise Exception("Cannot create trainer, no network type specified" + self.trainingType)
def get_pybrain_data_set(data, input_cols, target_cols=1): data_set = SupervisedDataSet(input_cols, target_cols) for row in data: # get first X columns for inputs # (technically "all indices less than X") # get last column as target data_set.addSample(tuple(row[:input_cols]), tuple([row[input_cols]])) return data_set # normalize all values p['wti_var'] = normalize(p['wti_var']) p['wti_skew'] = normalize(p['wti_skew']) p['wti_curt'] = normalize(p['wti_curt']) p['i_entr'] = normalize(p['i_entr']) # shuffle up data p.reindex(np.random.permutation(p.index)) trainer = BackpropTrainer( buildNetwork(4, 5, 1), # 2 input nodes, 3 hidden nodes, 1 output node get_pybrain_data_set(p.as_matrix(), 4), verbose=True) #print(trainer.train()) #evaluator = ModuleValidator.classificationPerformance(trainer.module, trainer.ds) cv = CrossValidator(trainer, trainer.ds, n_folds=5) cv.setArgs(max_epochs=2, verbose=True) print(cv.validate())
alldata.addSample([-1, -1], [0]) alldata.addSample([-1, -1], [0]) alldata.addSample([-1, -1], [0]) alldata.addSample([-1, -1], [0]) alldata.addSample([1, 1], [1]) alldata.addSample([1, 1], [1]) alldata.addSample([1, 1], [1]) alldata.addSample([1, 1], [1]) alldata.addSample([1, 1], [1]) tstdata, trndata = alldata.splitWithProportion(0.25) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() # We can also examine the dataset print "Number of training patterns: ", len(trndata) print "Input and output dimensions: ", trndata.indim, trndata.outdim print "First sample (input, target, class):" print trndata['input'][0], trndata['target'][0], trndata['class'][0] fnn = buildNetwork( trndata.indim, 5, trndata.outdim, recurrent=False ) trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01 ) # I am not sure about this, I don't think my production code is implemented like this modval = ModuleValidator() trainer.trainEpochs(20) trainer.trainOnDataset(dataset=trndata) cv = CrossValidator( trainer, trndata, n_folds=5, valfunc=modval.MSE ) print "MSE %f" %( cv.validate() )
#将类别转化为5位 dsTrain_test._convertToOneOfMany(bounds=[0, 1]) dsTest_test._convertToOneOfMany(bounds=[0, 1]) #print dsTrain_test['target'] #划分训练集跟测试集 dsTrain,dsTest = dsBuild(data) #训练神经网络 netModel = netBuild(dsTrain_test) modval = ModuleValidator() netModel.trainEpochs(20) netModel.trainUntilConvergence(maxEpochs=1000) cv = CrossValidator(netModel, dsTrain_test, n_folds=5, valfunc=modval.MSE ) print "MSE %f" %( cv.validate() ) from sklearn.externals import joblib joblib.dump(netModel, "train_model.m") netModel =joblib.load("train_model.m") #f1值检验 pred=[] really =[] yuanma = [] calma = []
languages = [] for g in glob.glob("./data/*.txt"): language, num = g.split("/")[-1].split("_") languages.append(Language(io.open(g, 'r+'), language)) n = Network(languages) n.train() n.trainer.verbose = True n.trainer.trainUntilConvergence() def correctValFunc(output, target): assert len(output) == len(target) n_correct = 0 for idx, instance in enumerate(output): # This will find the maximum liklihood language classification = instance.argmax(axis=0) objective = target[idx].argmax(axis=0) if objective == classification: n_correct += 1 return 1 - (float(n_correct) / float(len(output))) def correct(output, target): return ModuleValidator.validate(correctValFunc, output, target) cv = CrossValidator(n.trainer, n.dataSet, valfunc=correct, n_folds=2) print cv.validate()
data_set = common.get_bc_data_for_nn() test_means = [] test_std = [] x_vals = [2, 3, 4, 5, 6, 7, 8] for x in x_vals: means = [] for i in range(20): trainer = BackpropTrainer( buildNetwork(3, x, 1), data_set, verbose=True ) print "%d %d" % (x, i) trainer.trainEpochs(3) cv = CrossValidator(trainer, trainer.ds, n_folds=5, valfunc=ModuleValidator.MSE) means.append(cv.validate()) test_means.append(np.mean(means)) test_std.append(np.std(means)) common.plot_nn_mse( title = "Breast Cancer Survival Neural Network", x_label = "Number of hidden nodes", x_vals = x_vals, y_means = np.array(test_means), y_std = np.array(test_std) )
import pylab, numpy from pybrain.tools.shortcuts import buildNetwork from pybrain.structure import TanhLayer from pybrain.datasets import SupervisedDataSet from pybrain.supervised.trainers import BackpropTrainer from pybrain.tools.validation import CrossValidator, ModuleValidator results = pylab.loadtxt('credit.txt') target = results[:, -1] data = numpy.delete(results, -1, 1) #print "data", tuple(data[0]) #print "target", (target[0],) #net = buildNetwork(14, 10, 1) net = buildNetwork(14, 10, 1, hiddenclass=TanhLayer) #print net.activate([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]) ds = SupervisedDataSet(14, 1) for i in range(len(data)): ds.addSample(tuple(data[i]), (target[i], )) trainer = BackpropTrainer(net, ds) evaluation = ModuleValidator() validator = CrossValidator(trainer=trainer, dataset=trainer.ds, n_folds=5, valfunc=evaluation.MSE) print(validator.validate())
def compare_l2_regularization(): train_features, train_labels, test_features, test_labels = get_breast_cancer_data( ) optimal_num_layers = 6 num_neurons = [optimal_num_layers * [16]] start_time = datetime.now() train_accuracy1 = [] test_accuracy1 = [] train_accuracy2 = [] test_accuracy2 = [] iterations = range(250) nn1 = buildNetwork(30, 16, 1, bias=True) nn2 = buildNetwork(30, 16, 1, bias=True) dataset = ClassificationDataSet(len(train_features[0]), len(train_labels[0]), class_labels=["1", "2"]) for instance in range(len(train_features)): dataset.addSample(train_features[instance], train_labels[instance]) trainer1 = BackpropTrainer(nn1, dataset, weightdecay=0.0001) validator1 = CrossValidator(trainer1, dataset) print(validator1.validate()) trainer2 = BackpropTrainer(nn2, dataset, weightdecay=0.001) validator2 = CrossValidator(trainer2, dataset) print(validator2.validate()) for iteration in iterations: train_accuracy1.append( sum((np.array( [np.round(nn1.activate(test)) for test in train_features]) - train_labels)**2) / float(len(train_labels))) test_accuracy1.append( sum((np.array( [np.round(nn1.activate(test)) for test in test_features]) - test_labels)**2) / float(len(test_labels))) train_accuracy2.append( sum((np.array( [np.round(nn2.activate(test)) for test in train_features]) - train_labels)**2) / float(len(train_labels))) test_accuracy2.append( sum((np.array( [np.round(nn2.activate(test)) for test in test_features]) - test_labels)**2) / float(len(test_labels))) plt.plot(iterations, train_accuracy1) plt.plot(iterations, test_accuracy1) plt.plot(iterations, train_accuracy2) plt.plot(iterations, test_accuracy2) plt.legend([ "Train Accuracy (0.0001)", "Test Accuracy (0.0001)", "Train Accuracy (0.001)", "Test Accuracy (0.001" ]) plt.xlabel("Num Epoch") plt.ylabel("Percent Error") plt.title("Neural Network on Breast Cancer Data with " + str(num_neurons) + " layers") plt.savefig("nn_breast_cancer_weight_decay.png")