def classify(Xtrain, Ytrain, n_hidden=5): """ Use entirety of provided X, Y to predict Arguments Xtrain -- Training data Ytrain -- Training prediction Returns classifier -- a classifier fitted to Xtrain and Ytrain """ # PyBrain expects data in its DataSet format trndata = ClassificationDataSet(Xtrain.shape[1], nb_classes=2) trndata.setField('input', Xtrain) # Apprently, arrays don't work here as they try to access second dimension size... trndata.setField('target', mat(Ytrain).transpose()) trndata._convertToOneOfMany() # one output neuron per class # build neural net and train it net = buildNetwork(trndata.indim, n_hidden, trndata.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer(net, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01) trainer.trainUntilConvergence() #trainer.trainEpochs(5) print "trained" #trainer.trainEpochs(5) # Return a functor that wraps calling predict return NeuralNetworkClassifier(trainer)
def load_data(filename): """ load dataset for classification """ assert os.path.exists(filename)==True dat = scipy.io.loadmat(filename) inputs = dat['inputs'] #print len(inputs) targets = dat['targets'] #print len(targets) assert len(inputs)==len(targets) global alldata global indim global outdim indim = len(inputs[0]) outdim = 1 #print indim alldata = ClassificationDataSet(indim, outdim, nb_classes = 8) alldata.setField('input',inputs) alldata.setField('target',targets) assert len(alldata['input'])==len(alldata['target']) print type(alldata)
def norm_data(X, y): num_features = X.shape[-1] num_classes = len(set(y)) data = ClassificationDataSet(num_features, 1, nb_classes=num_classes) y.shape = -1, 1 data.setField('input', X) data.setField('target', y) data._convertToOneOfMany() return data
def ConvertToOneOfMany(d,nb_classes,bounds=(0,1)): d2 = ClassificationDataSet(d.indim, d.outdim, nb_classes=nb_classes) for n in range(d.getLength()): d2.addSample( d.getSample(n)[0], d.getSample(n)[1] ) oldtarg=d.getField('target') newtarg=np.zeros([len(d),nb_classes],dtype='Int32')+bounds[0] for i in range(len(d)): newtarg[i,int(oldtarg[i])]=bounds[1] d2.setField('class',oldtarg) d2.setField('target',newtarg) return(d2)
def ConvertToOneOfMany(d, nb_classes, bounds=(0, 1)): d2 = ClassificationDataSet(d.indim, d.outdim, nb_classes=nb_classes) for n in range(d.getLength()): d2.addSample(d.getSample(n)[0], d.getSample(n)[1]) oldtarg = d.getField('target') newtarg = np.zeros([len(d), nb_classes], dtype='Int32') + bounds[0] for i in range(len(d)): newtarg[i, int(oldtarg[i])] = bounds[1] d2.setField('class', oldtarg) d2.setField('target', newtarg) return (d2)
def convert_to_pybrain_dataset(X, Y=None): if Y is None: Y = [0]*X.shape[0] # Apprently, arrays don't work here as they try to access second dimension size... Y = mat(Y).transpose() data = ClassificationDataSet(X.shape[1], Y.shape[1], nb_classes=2) data.setField('input', X) data.setField('target', Y) data._convertToOneOfMany() # one output neuron per class return data
def convert_to_pybrain_dataset(X, Y=None): if Y is None: Y = [0] * X.shape[0] # Apprently, arrays don't work here as they try to access second dimension size... Y = mat(Y).transpose() data = ClassificationDataSet(X.shape[1], Y.shape[1], nb_classes=2) data.setField('input', X) data.setField('target', Y) data._convertToOneOfMany() # one output neuron per class return data
def loss_multiclass_nn_old(X_feats, Y, nn): DS = ClassificationDataSet( X_feats.shape[1], 1, nb_classes=2 ) #for i in range(X_feats.shape[0]): # DS.addSample( X_feats[i,:], [0.0] ) DS.setField('input', X_feats) DS.setField('target', np.zeros((X_feats.shape[0],1))) DS._convertToOneOfMany() prob = nn.activateOnDataset(DS) Y2 = classifier.to_one_of_k_coding(Y, 0) local_likelihood = -np.dot(np.log(prob).flat, Y2.flat) likelihood = mpi.COMM.allreduce(local_likelihood) num_data = mpi.COMM.allreduce(len(Y)) return float(likelihood) / num_data
def cross_validation(trndata, folds=3, **kwargs): """ kwargs are parameters for the model """ input = np.vsplit(trndata['input'], folds) target = np.vsplit(trndata['target'], folds) zipped = zip(input, target) accuracy_sum = 0 for i in len(zipped): new_train = ClassificationDataSet(attributes, nb_classes=classes_number) new_test = ClassificationDataSet(attributes, nb_classes=classes_number) test_zipped = zipped[i] train_zipped = zipped[:i] + zipped[(i+1):] new_train.setField('input', np.vstack[train_zipped[0]]) new_train.setField('target', np.vstack[train_zipped[1]]) new_test.setField('input', test_zipped[0]) new_test.setField('target', train_zipped[1]) model = FNNClassifier() model.train(new_train, new_test, kwargs) out, targ = model.predict(new_test) accuracy_sum += accuracy(out, targ) return accuracy_sum / folds
def big_training(np_data, num_nets=1, num_epoch=20, net_builder=net_full, train_size=.1, testing=False): sss = cross_validation.StratifiedShuffleSplit(np_data[:,:1].ravel(), n_iter=num_nets , test_size=1-train_size, random_state=3476) nets=[None for net_ind in range(num_nets)] trainaccu=[[0 for i in range(num_epoch)] for net_ind in range(num_nets)] testaccu=[[0 for i in range(num_epoch)] for net_ind in range(num_nets)] net_ind=0 for train_index, test_index in sss: print ('%s Building %d. network.' %(time.ctime(), net_ind+1)) #print("TRAIN:", len(train_index), "TEST:", len(test_index)) trainset = ClassificationDataSet(np_data.shape[1] - 1, 1) trainset.setField('input', np_data[train_index,1:]/100-.6) trainset.setField('target', np_data[train_index,:1]) trainset._convertToOneOfMany( ) trainlabels = trainset['class'].ravel().tolist() if testing: testset = ClassificationDataSet(np_data.shape[1] - 1, 1) testset.setField('input', np_data[test_index,1:]/100-.6) testset.setField('target', np_data[test_index,:1]) testset._convertToOneOfMany( ) testlabels = testset['class'].ravel().tolist() nets[net_ind] = net_builder() trainer = BackpropTrainer(nets[net_ind], trainset) for i in range(num_epoch): for ii in range(3): err = trainer.train() print ('%s Epoch %d: Network trained with error %f.' %(time.ctime(), i+1, err)) trainaccu[net_ind][i]=accuracy_score(trainlabels,trainer.testOnClassData()) print ('%s Epoch %d: Train accuracy is %f' %(time.ctime(), i+1, trainaccu[net_ind][i])) print ([sum([trainaccu[y][i]>tres for y in range(net_ind+1)]) for tres in [0,.1,.2,.3,.4,.5,.6]]) if testing: testaccu[net_ind][i]=accuracy_score(testlabels,trainer.testOnClassData(testset)) print ('%s Epoch %d: Test accuracy is %f' %(time.ctime(), i+1, testaccu[net_ind][i])) NetworkWriter.writeToFile(nets[net_ind], 'nets/'+net_builder.__name__+str(net_ind)+'.xml') net_ind +=1 return [nets, trainaccu, testaccu]
def validate(trainer, dataset, n_folds, max_epochs): l = dataset.getLength() inp = dataset.getField("input") tar = dataset.getField("target") indim = dataset.indim outdim = dataset.outdim assert l > n_folds perms = np.array_split(np.arange(l), n_folds) perf = 0.0 for i in range(n_folds): # determine train indices train_perms_idxs = range(n_folds) train_perms_idxs.pop(i) temp_list = [] for train_perms_idx in train_perms_idxs: temp_list.append(perms[ train_perms_idx ]) train_idxs = np.concatenate(temp_list) # determine test indices test_idxs = perms[i] # train #print("training iteration", i) train_ds = ClassificationDataSet(indim, outdim) train_ds.setField("input" , inp[train_idxs]) train_ds.setField("target" , tar[train_idxs]) train_ds._convertToOneOfMany() trainer = copy.deepcopy(trainer) trainer.setData(train_ds) if not max_epochs: trainer.train() else: trainer.trainEpochs(max_epochs) # test #print("testing iteration", i) test_ds = ClassificationDataSet(indim, outdim) test_ds.setField("input" , inp[test_idxs]) test_ds.setField("target" , tar[test_idxs]) test_ds._convertToOneOfMany() # perf += self.getPerformance( trainer.module, dataset ) # perf += self._calculatePerformance(trainer.module, dataset) perf += percentError(trainer.testOnClassData(dataset=test_ds), test_ds['class']) perf /= n_folds return perf
def generateGridData(x,y, return_ticks=False): """ Generates a dataset containing a regular grid of points. The x and y arguments contain start, end, and step each. Returns the dataset and the x and y mesh or ticks.""" x = np.arange(x[0], x[1], x[2]) y = np.arange(y[0], y[1], y[2]) X, Y = np.meshgrid(x, y) shape = X.shape # need column vectors in dataset, not arrays ds = ClassificationDataSet(2,1) ds.setField('input', np.concatenate((X.reshape(X.size, 1),Y.reshape(X.size, 1)), 1)) ds.setField('target', np.zeros([X.size,1])) ds._convertToOneOfMany() if return_ticks: return (ds, x, y) else: return (ds, X, Y)
def train(self, X, y): ds = ClassificationDataSet(self.numFeatures, self.numLabels, nb_classes=self.numLabels) ds.setField("input", X) ds.setField("target", y) self.net = buildNetwork(self.numFeatures, len(y), self.numLabels, outclass=SigmoidLayer, bias=True) print X print y trainer = BackpropTrainer(self.net, ds, learningrate=0.12) print "Training now.... on " + str(len(y)) + " training examples" startDate = datetime.datetime.now() trainer.trainUntilConvergence(verbose=True, validationProportion=0.05) datetime.timedelta(0, 8, 562000) dateDiff = datetime.datetime.now() - startDate timeDiff = divmod(dateDiff.days * 86400 + dateDiff.seconds, 60) print "DONE TRAINING. TOOK %s min %s sec\r" % (timeDiff[0], timeDiff[1]) print "=======================================================================================\r"
def get_predictions_nn_old(self, X, special_bias=None): X_feats = np.ascontiguousarray(np.hstack((X[self.feat_list[i]] for i in range(len(self.feat_list))))) X_feats -= self.m X_feats /= self.std if special_bias != None: X_feats = np.ascontiguousarray(np.hstack((X_feats, special_bias))) DS = ClassificationDataSet( X_feats.shape[1], 1, nb_classes=2 ) #for i in range(X_feats.shape[0]): # DS.addSample( X_feats[i,:], [0.0] ) DS.setField('input', X_feats) DS.setField('target', np.zeros((X_feats.shape[0],1))) DS._convertToOneOfMany() prob = self._nn.activateOnDataset(DS) prob = mpi.COMM.gather(prob) if mpi.is_root(): return np.vstack(prob) else: return np.zeros((0))
def Accuracy(self, X, Y, special_bias = None): X_feats = np.ascontiguousarray(np.hstack((X[self.feat_list[i]] for i in range(len(self.feat_list))))) X_feats -= self.m X_feats /= self.std if special_bias != None: X_feats = np.ascontiguousarray(np.hstack((X_feats, special_bias))) if self._type=='linsvm' or self._type=='logreg' or self._type=='logreg_atwv': self.test_accu = classifier.Evaluator.accuracy(Y, np.dot(X_feats,self.w)+self.b) elif self._type=='nn_atwv': pred = get_predictions_nn(X_feats, self._weights_nn, arch=[10])[0] pred[:,0] = 0.5 self.test_accu = classifier.Evaluator.accuracy(Y, pred) else: DS = ClassificationDataSet( X_feats.shape[1], 1, nb_classes=2 ) #for i in range(X_feats.shape[0]): # DS.addSample( X_feats[i,:], [Y[i]] ) DS.setField('input', X_feats) DS.setField('target', Y[:,np.newaxis]) DS._convertToOneOfMany() predict,targts = self._nn_trainer.testOnClassData(DS, verbose=True,return_targets=True) self.test_accu = np.sum(np.array(predict)==np.array(targts))/float(len(targts)) return self.test_accu
def train(self, X, y): ds = ClassificationDataSet(self.numFeatures, self.numLabels, nb_classes=self.numLabels) ds.setField('input', X) ds.setField('target', y) self.net = buildNetwork(self.numFeatures, len(y), self.numLabels, outclass=SigmoidLayer, bias=True) print X print y trainer = BackpropTrainer(self.net, ds, learningrate=0.12) print 'Training now.... on ' + str(len(y)) + ' training examples' startDate = datetime.datetime.now() trainer.trainUntilConvergence(verbose=True, validationProportion=0.05) datetime.timedelta(0, 8, 562000) dateDiff = datetime.datetime.now() - startDate timeDiff = divmod(dateDiff.days * 86400 + dateDiff.seconds, 60) print 'DONE TRAINING. TOOK %s min %s sec\r' % (timeDiff[0], timeDiff[1]) print '=======================================================================================\r'
def bootstrap(trndata, iter=100): """ check http://sci2s.ugr.es/keel/pdf/specific/articulo/jain_boot_87.pdf for notation """ print trndata.calculateStatistics() np_array = np.hstack((trndata['input'], trndata['target'])) my_range = range(np_array.shape[0]) print trndata['target'].shape app_sum = 0 e0_sum = 0 for i in range(iter): indices = list(set([random.choice(my_range) for i in my_range])) np_train_array = np.vstack(np_array[indices]) new_training_samples = ClassificationDataSet(attributes, classes_number) new_training_samples.setField('input', np_train_array[:, :54]) new_training_samples.setField('target', np_train_array[:, 54:55]) new_training_samples._convertToOneOfMany() test_indices = list(set(my_range) - set(indices)) new_test_samples = ClassificationDataSet(attributes, classes_number) np_test_array = np.vstack(np_array[test_indices]) new_test_samples.setField('input', np_test_array[:, :54]) new_test_samples.setField('target', np_test_array[:, 54:55]) new_test_samples._convertToOneOfMany() print new_training_samples.calculateStatistics() print new_test_samples.calculateStatistics() model = FNNClassifier() model.train(new_training_samples, new_test_samples) (xtrn, ytrn) = model.predict(new_training_samples) (xtest, ytest) = model.predict(new_test_samples) app_sum += (1 - accuracy(xtrn, ytrn)) e0_sum += (1 - accuracy(xtest, ytest)) app = app_sum / float(iter) e0 = e0_sum / float(iter) e632 = 0.368 * app + 0.632 * e0 print e632 return e632
def NNBackPropCustom(trainInputs, trainTarget, testInputs, testTarget, inputDim, targetDim, numClass, classLabels, bias=True, numHiddenLayers=2, numEpoch=10, momentum=0.1, weightdecay=0.01): #NN Data Preparation assert ( trainInputs.shape[0] == trainTarget.shape[0] ), "Inputs count and target count for your training data do not match for NN Analysis" assert ( testInputs.shape[0] == testTarget.shape[0] ), "Inputs count and target count for your test data do not match for NN Analysis" training_data = ClassificationDataSet(inputDim, targetDim, nb_classes=numClass, class_labels=classLabels) test_data = ClassificationDataSet(inputDim, targetDim, nb_classes=numClass, class_labels=classLabels) training_data.setField('input', trainInputs) training_data.setField('target', trainTarget) training_data.setField('class', trainTarget) test_data.setField('input', testInputs) test_data.setField('target', testTarget) test_data.setField('class', testTarget) training_data._convertToOneOfMany() test_data._convertToOneOfMany() # NN With BackPropagation fnn_backprop = buildNetwork(training_data.indim, numHiddenLayers, training_data.outdim, bias=bias, outclass=SoftmaxLayer) trainer = BackpropTrainer(fnn_backprop, dataset=training_data, momentum=momentum, verbose=True, weightdecay=weightdecay) epochs = numEpoch epoch_v = [] trnerr_backprop = [] tsterr_backprop = [] for i in xrange(epochs): # If you set the 'verbose' trainer flag, this will print the total error as it goes. trainer.trainEpochs(1) trnresult = percentError(trainer.testOnClassData(), training_data['class']) tstresult = percentError(trainer.testOnClassData(dataset=test_data), test_data['class']) print("epoch: %4d" % trainer.totalepochs, " train error: %5.2f%%" % trnresult, " test error: %5.2f%%" % tstresult) epoch_v.append(trainer.totalepochs) trnerr_backprop.append(trnresult) tsterr_backprop.append(tstresult) return epoch_v, trnerr_backprop, tsterr_backprop
# OutputLayer classnumber = 10 outputLayer = LinearLayer(classnumber, name="Ouput") nn.addOutputModule(outputLayer) #nn.addConnection(FullConnection(thirdHiddenLayer, outputLayer, name="hidden2 to out")) nn.addConnection(FullConnection(secondHiddenLayer, outputLayer, name="hidden2 to out")) #nn.addConnection(FullConnection(biaiToOuput, outputLayer, name="hidden2 to out")) nn.sortModules() print(nn) alldata = ClassificationDataSet(feature_size, nb_classes=10, class_labels=[str(x) for x in range(0, 10)]) alldata.setField("input", X) alldata.setField("target", Y) alldatatest = ClassificationDataSet(feature_size, nb_classes=10, class_labels=[str(x) for x in range(0, 10)]) alldatatest.setField("input", Xtest) alldatatest.setField("target", Ytest) alldata._convertToOneOfMany() alldatatest._convertToOneOfMany() print("Number of training patterns: ", len(alldata)) print("Input and output dimensions: ", alldata.indim, alldata.outdim) print("First sample (input, target, class):") print(alldata['input'][0], alldata['target'][0], alldata['class'][0]) # nn = NetworkReader.readFrom('filename.xml')
print('Gathering full predictions from models...') full_preds = zeros((test.shape[0],n_clfs)) for n,clf in enumerate(clfs): clf.fit(X, y) full_preds[:,n] = clf.predict(test) if opts.ann: # prevent PyBrain index error by pretending there are 5 classes ds = ClassificationDataSet(n_clfs, 1, nb_classes=5) # we gotta reshape, PyBrain expects each label to be on its own row # PyBrain requires classes starting from zero (?!?) blend_targets = reshape(blend_targets - 1, (blend_targets.shape[0], 1)) # check that they have the right dimensions before moving on assert(blend_inputs.shape[0] == blend_targets.shape[0]) ds.setField('input', blend_inputs) ds.setField('target', blend_targets) trainDS, testDS = ds.splitWithProportion(0.25) trainDS._convertToOneOfMany() testDS._convertToOneOfMany() print('Training neural network...') net = buildNetwork(trainDS.indim, 5, trainDS.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer(net, dataset=trainDS, momentum=0.1, weightdecay=0.01) # do 20 iterations of 5 epochs each (total 100 epochs) for n_iter in range(20): trainer.trainEpochs(5) trnresult = percentError(trainer.testOnClassData(), trainDS['class']) tstresult = percentError(trainer.testOnClassData(dataset=testDS), testDS['class']) print("train error: %5.2f%%" % trnresult) print("test error: %5.2f%%" % tstresult)
validation_proportion = 0.15 # load data x_train = np.loadtxt("input.dat", delimiter=' ') print x_train.shape y_train = np.loadtxt("output.dat", delimiter=' ') print y_train.shape input_size = x_train.shape[1] target_size = y_train.shape[1] # prepare dataset ds = ClassificationDataSet(input_size, target_size, nb_classes=3) ds.setField('input', x_train) ds.setField('target', y_train) tstdata, trndata = ds.splitWithProportion(0.25) # init and train net = buildNetwork(input_size, hidden_size, target_size, outclass=SoftmaxLayer) trainer = BackpropTrainer(net, dataset=trndata, learningrate=0.01, verbose=True, weightdecay=.01) print "training for {} epochs...".format(epochs) trainer.trainUntilConvergence(verbose=True,
X = X1[:, 0:size_of_example] Y = X1[:, size_of_example:X1.shape[1]] # add the contents of digits to a dataset train_data = ClassificationDataSet(size_of_example, num_of_labels) test_data = ClassificationDataSet(size_of_example, num_of_labels) data_split = int(num_of_examples * 0.9) for i in range(0, data_split): train_data.addSample(X[i, :], Y[i, :]) # setting the field names train_data.setField('input', X[0:data_split, :]) train_data.setField('target', Y[0:data_split, :]) for i in range(data_split, num_of_examples): test_data.addSample(X[i, :], Y[i, :]) test_data.setField('input', X[data_split:num_of_examples, :]) test_data.setField('target', Y[data_split:num_of_examples, :]) if os.path.isfile('dig.xml'): net = NetworkReader.readFrom('dig.xml') else: net = buildNetwork(size_of_example, 185, num_of_labels,
def Train(self,feat_list=None,type='logreg',gamma=0.0,domeanstd=True,special_bias=None,add_bias=True, weight=None, class_instance=None, method='sigmoid',factor=10.0,arch=[10], cv_feats=None, cv_special_bias=None,cv_class_instance=None): if feat_list==None: feat_list=self.features self.feat_list=feat_list self._gamma=gamma self._type=type self._special_bias = special_bias self._add_bias = add_bias Xtrain_feats = np.ascontiguousarray(np.hstack((self._Xtrain[feat] for feat in feat_list))) self.m, self.std = classifier.feature_meanstd(Xtrain_feats) if domeanstd==False: #hacky, overwrite the things we computed self.m[:] = 0 self.std[:] = 1 Xtrain_feats -= self.m Xtrain_feats /= self.std if special_bias != None: Xtrain_feats = np.ascontiguousarray(np.hstack((Xtrain_feats, special_bias))) #CV if cv_feats!=None: cv_feats = np.ascontiguousarray(np.hstack((cv_feats[feat] for feat in feat_list))) cv_feats -= self.m cv_feats /= self.std if special_bias != None: cv_feats = np.ascontiguousarray(np.hstack((cv_feats, cv_special_bias))) '''Classifier stage''' if type=='linsvm': self.w, self.b = classifier.svm_onevsall(Xtrain_feats, self._Ytrain, self._gamma, weight = weight, special_bias=special_bias, add_bias=add_bias) return (self.w,self.b) elif type=='logreg': self.w, self.b = l2logreg_onevsall(Xtrain_feats, self._Ytrain, self._gamma, weight = weight, special_bias=special_bias, add_bias=add_bias) return (self.w,self.b) elif type=='logreg_atwv': self.w, self.b = Train_atwv(Xtrain_feats,class_instance=class_instance,weight=weight,special_bias=special_bias, add_bias=add_bias, method=method, factor=factor, gamma=self._gamma, cv_class_instance=cv_class_instance, cv_feats=cv_feats) elif type=='nn_atwv': self._arch = arch self._weights_nn = Train_atwv_nn(Xtrain_feats,class_instance=class_instance,weight=weight,special_bias=special_bias, add_bias=add_bias, arch=self._arch, method=method, factor=factor, gamma=self._gamma, cv_class_instance=cv_class_instance, cv_feats=cv_feats) #self._weights_nn = Train_atwv_nn(Xtrain_feats,class_instance=class_instance,weight=self._weights_nn,special_bias=special_bias, add_bias=add_bias, # arch=self._arch, method=method, factor=factor*10.0) elif type=='nn_debug': if mpi.COMM.Get_size() > 1: print 'Warning!!! Running NN training with MPI with more than one Node!' #FIXME: Collect X and Y at root to avoid this # prob = mpi.COMM.gather(prob) # if mpi.is_root(): # np.vstack(prob) # #Train # mpi.COMM.Bcast(self._nn) # mpi.distribute(prob) DS = ClassificationDataSet( Xtrain_feats.shape[1], 1, nb_classes=2 ) #for i in range(Xtrain_feats.shape[0]): # DS.addSample( Xtrain_feats[i,:], [self._Ytrain[i]] ) DS.setField('input', Xtrain_feats) DS.setField('target', self._Ytrain[:,np.newaxis]) DS._convertToOneOfMany() self._nn = buildNetwork(DS.indim, 10, DS.outdim, outclass=SoftmaxLayer, fast=True) self._nn_trainer = BackpropTrainer( self._nn, dataset=DS, momentum=0.1, verbose=True, weightdecay=gamma, learningrate=0.01, lrdecay=1.0) self._nn_trainer.trainOnDataset(DS,epochs=8) self._nn_trainer = BackpropTrainer( self._nn, dataset=DS, momentum=0.1, verbose=True, weightdecay=gamma, learningrate=0.001, lrdecay=1.0) self._nn_trainer.trainOnDataset(DS,epochs=8) self._nn_trainer = BackpropTrainer( self._nn, dataset=DS, momentum=0.1, verbose=True, weightdecay=gamma, learningrate=0.0001, lrdecay=1.0) self._nn_trainer.trainOnDataset(DS,epochs=5) return self._nn
data = ClassificationDataSet(16, 1, nb_classes=4) fnn = buildNetwork(16, 10, 10, 4, hiddenclass=SigmoidLayer, outclass=SoftmaxLayer) game = _2048(length=4, pick_rate=0.2) for i in xrange(200): print 'Learning %02d round(s)' % i tr_x, tr_l = game.mul_test( 10, lambda a, b, c, d, e: softmax_dec(a, b, c, d, e, f=fnn.activate), addition_arg=True) data.setField('input', tr_x) data.setField('target', tr_l.reshape(-1, 1)) data._convertToOneOfMany() trainer = BackpropTrainer( fnn, dataset=data) #, momentum=0.1, verbose=True, weightdecay=0.01) print trainer.train() ''' best child ------------------------------------------------------------------------- max round: 0589 | avr round: 215.40 max point: 8300 | avr point: 2400.40 max block: 512 '''
from sklearn import datasets from pybrain.datasets import ClassificationDataSet from pybrain.supervised.trainers import BackpropTrainer from pybrain.tools.shortcuts import buildNetwork # In[7]: target = target.reshape(-1,1) # In[8]: ds = ClassificationDataSet(hogs.shape[1],1,nb_classes=40) ds.setField('input',hogs) ds.setField('target',target) tstdata,trndata = ds.splitWithProportion(0.25) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() print "picture_size",trndata.indim,"number of pictures",trndata.outdim # In[9]: fnn = buildNetwork(trndata.indim,110,trndata.outdim) trainer = BackpropTrainer(fnn,dataset=trndata,momentum=0.9,learningrate=0.01,verbose=True) # In[15]:
def ANN(X_train, Y_train, X_test, Y_test, *args): """ An Artificial Neural Network, based on the python library pybrain. In the future this function should be modified to use the SkyNet ANN code instead. INPUTS: X_train - An array containing the features of the training set, of size (N_samples, N_features) Y_train - An array containing the class labels of the training set, of size (N_samples,) X_test - An array containing the features of the testeing set, of size (N_samples, N_features) Y_test - An array containing the class labels of the testing set, of size (N_samples) *args - Currently unused. In the future could specify the network architecture and activation functions at each node. OUTPUTS: probs - an array containing the probabilities for each class for each member of the testing set, of size (N_samples, N_classes) """ Y_train_copy = Y_train.copy() Y_test_copy = Y_test.copy() #Convert class labels from 1,2,3 to 0,1,2 as _convertToOneOfMany requires this Y_train_copy[(Y_train_copy==1)]=0 Y_train_copy[(Y_train_copy==2)]=1 Y_train_copy[(Y_train_copy==3)]=2 Y_test_copy[(Y_test_copy==1)]=0 Y_test_copy[(Y_test_copy==2)]=1 Y_test_copy[(Y_test_copy==3)]=2 #Put all the data in datasets as required by pybrain Y_train_copy = np.expand_dims(Y_train_copy, axis=1) Y_test_copy = np.expand_dims(Y_test_copy, axis=1) traindata = ClassificationDataSet(X_train.shape[1], nb_classes = len(np.unique(Y_train_copy))) #Preallocate dataset traindata.setField('input', X_train) #Add named fields traindata.setField('target', Y_train_copy) traindata._convertToOneOfMany() #Convert classes 0, 1, 2 to 001, 010, 100 testdata = ClassificationDataSet(X_test.shape[1], nb_classes=len(np.unique(Y_test_copy))) testdata.setField('input', X_test) testdata.setField('target', Y_test_copy) testdata._convertToOneOfMany() #Create ANN with n_features inputs, n_classes outputs and HL_size nodes in hidden layers N = pb.FeedForwardNetwork() HL_size1 = X_train.shape[1]*2+2 HL_size2 = X_train.shape[1]*2+2 #Create layers and connections in_layer = LinearLayer(X_train.shape[1]) hidden_layer1 = SigmoidLayer(HL_size1) hidden_layer2 = SigmoidLayer(HL_size2) out_layer = SoftmaxLayer(len(np.unique(Y_test_copy))) #Normalizes output so as to sum to 1 in_to_hidden1 = FullConnection(in_layer, hidden_layer1) hidden1_to_hidden2 = FullConnection(hidden_layer1, hidden_layer2) hidden2_to_out = FullConnection(hidden_layer2, out_layer) #Connect them up N.addInputModule(in_layer) N.addModule(hidden_layer1) N.addModule(hidden_layer2) N.addOutputModule(out_layer) N.addConnection(in_to_hidden1) N.addConnection(hidden1_to_hidden2) N.addConnection(hidden2_to_out) N.sortModules() #Create the backpropagation object trainer = BackpropTrainer(N, dataset=traindata, momentum=0.1, verbose=False, weightdecay=0.01) #Train the network on the data for some number of epochs for counter in np.arange(40): trainer.train() #Run the network on testing data probs = N.activate(X_test[0, :]) probs = np.expand_dims(probs, axis=0) for counter in np.arange(X_test.shape[0]-1): next_probs = N.activate(X_test[counter+1, :]) next_probs = np.expand_dims(next_probs, axis=0) probs = np.append(probs, next_probs, axis=0) return probs
raw_data = np.genfromtxt('BreastCancerWisconsinDataset_modified.txt', delimiter=",", skip_header=1) raw_inputs = raw_data[:, 0:-1] raw_target = raw_data[:, 9:] assert (raw_inputs.shape[0] == raw_target.shape[0] ), "Inputs count and target count do not match" all_data = ClassificationDataSet(9, 1, nb_classes=2, class_labels=['Benign', 'Malignant']) all_data.setField('input', raw_inputs) all_data.setField('target', raw_target) all_data.setField('class', raw_target) test_data_temp, training_data_temp = all_data.splitWithProportion(0.33) test_data = ClassificationDataSet(9, 1, nb_classes=2, class_labels=['Benign', 'Malignant']) for n in xrange(0, test_data_temp.getLength()): test_data.addSample( test_data_temp.getSample(n)[0], test_data_temp.getSample(n)[1]) training_data = ClassificationDataSet(9,
performance = PerformanceMetrics(y_test) # Train Models: Bayes, Tree and MLP classifier_bayes = BernoulliNB() classifier_bayes.fit(X_train, y_train) classifier_forest = RandomForestClassifier(max_depth=5) classifier_forest.fit(X_train, y_train) classifier_decisionTree = DecisionTreeClassifier(max_depth=5) classifier_decisionTree.fit(X_train, y_train) ## Data for MLP target = y_train.reshape(-1, 1) networkTrainDataset = ClassificationDataSet(X_train.shape[1], 1, nb_classes=len(np.unique(y_train))) networkTrainDataset.setField('input', X_train) networkTrainDataset.setField('target', target) networkTrainDataset._convertToOneOfMany() target = y_test.reshape(-1, 1) networkTestDataset = ClassificationDataSet(X_test.shape[1], 1, nb_classes=len(np.unique(y_test))) networkTestDataset.setField('input', X_test) networkTestDataset.setField('target', target) networkTestDataset._convertToOneOfMany() ##Train the MLP epochs = 2 hidden_layer_size = X_train.shape[1] / 2 #5 feedfwdNeuNet = buildNetwork(
# In[6]: from sklearn import datasets from pybrain.datasets import ClassificationDataSet from pybrain.supervised.trainers import BackpropTrainer from pybrain.tools.shortcuts import buildNetwork # In[7]: target = target.reshape(-1, 1) # In[8]: ds = ClassificationDataSet(hogs.shape[1], 1, nb_classes=40) ds.setField('input', hogs) ds.setField('target', target) tstdata, trndata = ds.splitWithProportion(0.25) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() print "picture_size", trndata.indim, "number of pictures", trndata.outdim # In[9]: fnn = buildNetwork(trndata.indim, 110, trndata.outdim) trainer = BackpropTrainer(fnn, dataset=trndata, momentum=0.9, learningrate=0.01, verbose=True)
# initialize performance matrix performance = PerformanceMetrics(y_test) # Train Models: Bayes, Tree and MLP classifier_bayes = BernoulliNB() classifier_bayes.fit(X_train, y_train) classifier_forest = RandomForestClassifier(max_depth=5) classifier_forest.fit(X_train, y_train) classifier_decisionTree = DecisionTreeClassifier(max_depth=5) classifier_decisionTree.fit(X_train, y_train) ## Data for MLP target = y_train.reshape(-1,1) networkTrainDataset = ClassificationDataSet(X_train.shape[1], 1, nb_classes=len(np.unique(y_train))) networkTrainDataset.setField('input', X_train) networkTrainDataset.setField('target', target) networkTrainDataset._convertToOneOfMany() target = y_test.reshape(-1,1) networkTestDataset = ClassificationDataSet(X_test.shape[1], 1, nb_classes=len(np.unique(y_test))) networkTestDataset.setField('input', X_test) networkTestDataset.setField('target', target) networkTestDataset._convertToOneOfMany() ##Train the MLP epochs = 2 hidden_layer_size = X_train.shape[1]/2 #5 feedfwdNeuNet = buildNetwork(networkTrainDataset.indim, hidden_layer_size, networkTrainDataset.outdim, bias = True, outclass=SoftmaxLayer )#buildNetwork( X_train.shape[1], 5, len(np.unique(y_test)), outclass=SoftmaxLayer ) trainer = BackpropTrainer(feedfwdNeuNet, dataset=networkTrainDataset, momentum=0.1, verbose=False, weightdecay=0.01) print "Training MLP..."
def set_pybrain_nn(X, y): params_len = len(X[0]) print(params_len) hidden_size = 100 output_layer_num = 2 epochs = 200 # init and train net = FeedForwardNetwork() """ Next, we're constructing the input, hidden and output layers. """ inLayer = LinearLayer(params_len) hiddenLayer = SigmoidLayer(hidden_size) hiddenLayer1 = SigmoidLayer(hidden_size) hiddenLayer2 = SigmoidLayer(hidden_size) outLayer = LinearLayer(output_layer_num) """ (Note that we could also have used a hidden layer of type TanhLayer, LinearLayer, etc.) Let's add them to the network: """ net.addInputModule(inLayer) net.addModule(hiddenLayer) net.addModule(hiddenLayer1) net.addModule(hiddenLayer2) net.addOutputModule(outLayer) """ We still need to explicitly determine how they should be connected. For this we use the most common connection type, which produces a full connectivity between two layers (or Modules, in general): the 'FullConnection'. """ in2hidden = FullConnection(inLayer, hiddenLayer) hidden2hidden = FullConnection(hiddenLayer, hiddenLayer1) hidden2hidden1 = FullConnection(hiddenLayer1, hiddenLayer2) hidden2out = FullConnection(hiddenLayer2, outLayer) net.addConnection(in2hidden) net.addConnection(hidden2hidden) net.addConnection(hidden2hidden1) net.addConnection(hidden2out) """ All the elements are in place now, so we can do the final step that makes our MLP usable, which is to call the 'sortModules()' method. """ net.sortModules() #ds = SupervisedDataSet(params_len, output_layer_num) ds = ClassificationDataSet(params_len, output_layer_num, nb_classes=2) ds.setField('input', X) ds.setField('target', y) trainer = BackpropTrainer(net, ds) print("training for {} epochs...".format(epochs)) #trainer.trainUntilConvergence(verbose=True) #trainer.train() for i in range(epochs): mse = trainer.train() rmse = sqrt(mse) print("training RMSE, epoch {}: {}".format(i + 1, rmse)) pickle.dump(net, open('model/nn_brain', 'wb'))
def hillclimb(domain,costf): # Create a random solution sol=[random.randint(domain[i][0],domain[i][1]) for i in range(len(domain))] # Main loop while 1: # Create list of neighboring solutions neighbors=[] for j in range(len(domain)): # One away in each direction if sol[j]>domain[j][0]: neighbors.append(sol[0:j]+[sol[j]+1]+sol[j+1:]) if sol[j]<domain[j][1]: neighbors.append(sol[0:j]+[sol[j]-1]+sol[j+1:]) # See what the best solution amongst the neighbors is current=costf(sol) best=current for j in range(len(neighbors)): cost=costf(neighbors[j]) if cost<best: best=cost sol=neighbors[j] # If there's no improvement, then we've reached the top if best==current: break return sol def plot_learning_curve(x, training_erorr, test_error, graph_title, graph_xlabel, graph_ylabel, ylim=None, xlim=None): plt.figure() plt.title(graph_title) if ylim is not None: plt.ylim(*ylim) if xlim is not None: plt.xlim(*xlim) plt.xlabel(graph_xlabel) plt.ylabel(graph_ylabel) train_error_mean = np.mean(training_erorr) train_error_std = np.std(training_erorr) test_error_mean = np.mean(test_error) test_error_std = np.std(test_error) plt.grid() plt.fill_between(x, training_erorr - train_error_std, training_erorr + train_error_std, alpha=0.1, color="r") plt.fill_between(x, test_error - test_error_std, test_error + test_error_std, alpha=0.1, color="g") print x print train_error_mean print training_erorr plt.plot(x, training_erorr, 'o-', color="r", label="Training score") plt.plot(x, test_error, 'o-', color="g", label="Test Score") plt.legend(loc="best") plt.savefig('plots/'+graph_title+'.png') plt.close() #plt.show() #************************End of Functions************************************************** #************************Start Data Prep******************************************** raw_data = np.genfromtxt('BreastCancerWisconsinDataset_modified.txt', delimiter=",", skip_header=1) raw_inputs = raw_data[:,0:-1] raw_target = raw_data[:,9:] assert (raw_inputs.shape[0] == raw_target.shape[0]),"Inputs count and target count do not match" all_data = ClassificationDataSet(9, 1, nb_classes=2, class_labels=['Benign','Malignant']) all_data.setField('input', raw_inputs) all_data.setField('target', raw_target) all_data.setField('class', raw_target) test_data_temp, training_data_temp = all_data.splitWithProportion(0.33) test_data = ClassificationDataSet(9, 1, nb_classes=2, class_labels=['Benign','Malignant']) for n in xrange(0, test_data_temp.getLength()): test_data.addSample(test_data_temp.getSample(n)[0], test_data_temp.getSample(n)[1]) training_data = ClassificationDataSet(9, 1, nb_classes=2, class_labels=['Benign','Malignant']) for n in xrange(0, training_data_temp.getLength()): training_data.addSample(training_data_temp.getSample(n)[0], training_data_temp.getSample(n)[1]) training_data._convertToOneOfMany() test_data._convertToOneOfMany() #********************End of Data Preparation*************************** #********************NN With GA*************************** def fitFunction (net, dataset=training_data, targetClass=training_data['class']): error = percentError(testOnClassData_custom(net, dataset=training_data), targetClass) return error stepSize = [.05, .5, 1] for s in stepSize: fnn_ga = buildNetwork(training_data.indim, 2, training_data.outdim, bias=True, outclass=SoftmaxLayer) domain = [(-1,1)]*len(fnn_ga.params) #print domain epochs = 20 epoch_v = [] trnerr_ga = [] tsterr_ga = [] iteration = 5 for i in xrange(epochs): winner = geneticoptimize(iteration,domain,fnn_ga,fitFunction,popsize=100,step=s, mutprob=0.2,elite=0.2) fnn_ga.params[:] = winner[:] training_error = fitFunction(fnn_ga, dataset=training_data, targetClass=training_data['class']) test_error = fitFunction(fnn_ga, dataset=test_data, targetClass=test_data['class']) epoch_v.append(i*iteration) trnerr_ga.append(training_error) tsterr_ga.append(test_error) print ("This is the training and test error at the epoch: ", training_error, test_error, i*iteration) ylim = (0, 70) xlim = (50, 1005) print ("This is epoch_value",epoch_v) print ("This is training ga",trnerr_ga) print ("This is test ga",tsterr_ga) plot_learning_curve(epoch_v, trnerr_ga, tsterr_ga, "Neural Network With GA_step_"+str(s), "Epochs", "Error %", ylim, xlim=None) #*****************End of GA NN******************************* print ("This is the length of the training and test data, respectively", len(training_data), len(test_data)) print (training_data.indim, training_data.outdim) print ("This is the shape of the input", all_data['input'].shape) print ("This is the shape of the target", all_data['target'].shape) print ("This is the shape of the class", all_data['class'].shape) print ("This is count of classes", all_data.nClasses) print ("Here is the statistics on the class", all_data.calculateStatistics()) print ("Here the linked fields", all_data.link) print ("This is the shape of the input in training", training_data['input'].shape) print ("This is the shape of the target in training", training_data['target'].shape) print ("This is the shape of the class in training", training_data['class'].shape) print ("This is the shape of the input in training", test_data['input'].shape) print ("This is the shape of the target in training", test_data['target'].shape) print ("This is the shape of the class in training", test_data['class'].shape)
for subdir, dirs, files in os.walk(directory): for f in files: path = os.path.join(subdir, f) image = cv2.imread(path) if image != None and image.shape[:2] != (0,0): image = cv2.resize(image, (200,200)); imarray.append(np.ravel(image)) tararray.append(target) return (imarray,tararray) good,targood = getData("food",[1]) bad,tarbad = getData("nonfoods",[0]) DS = ClassificationDataSet(200*200*3, 1, 2,class_labels=['food', 'nonfood']) DS.setField('input',good + bad) DS.setField('target',targood + tarbad) tstdata, trndata = DS.splitWithProportion( 0.50 ) trndata._convertToOneOfMany( ) tstdata._convertToOneOfMany( ) print "Number of training patterns: ", len(trndata) print "Input and output dimensions: ", trndata.indim, trndata.outdim print "First sample (input, target, class):" print trndata['input'][0], trndata['target'][0], trndata['class'][0] if os.path.isfile('food.xml'): print "previous xml found:" fnn = NetworkReader.readFrom('food.xml') else:
#print(Out) # Converter caracteres de saida para numeros inteiros com base no indice em labels Outputs = np.empty((1, 1), dtype=int) for i in np.nditer(Out): Outputs = np.append(Outputs, np.array([[Labels.index(i)]]), 0) Outputs = np.delete(Outputs, 0, 0) print("tamanhoooooooooo") print(len(Outputs)) # Construir dataset Dataset = ClassificationDataSet(120, 1, nb_classes=len(Labels)) assert (Inputs.shape[0] == Outputs.shape[0]) Dataset.setField('input', Inputs) Dataset.setField('target', Outputs) Dataset._convertToOneOfMany() #Construir e configurar as redes #RedeSoft1 Camada oculta - Linear Camada externa Softmax #RedeSoft2 Camada oculta - Sogmoide Camada externa Softmax #RedeSoft3 Camada oculta - Tangente Hiperbolica Camada externa Softmax RedeSoft1 = buildNetwork(120, 61, len(Labels), bias=True, hiddenclass=LinearLayer, outclass=SoftmaxLayer) RedeSoft2 = buildNetwork(120, 61,
#Load the Test Set as X_Test df_4 = pd.read_csv('C:/LearningMaterials/Kaggle/Mlsp/test_FNC.csv') df_5 = pd.read_csv('C:/LearningMaterials/Kaggle/Mlsp/test_SBM.csv') df_test_fnc = df_4.ix[:, 1:] df_test_sbm = df_5.ix[:,1:] np_test_fnc = df_test_fnc.values np_test_sbm = df_test_sbm.values X_test = np.hstack((np_test_fnc,np_test_sbm)) y_test_dummy = np.zeros((119748,1)) print "Dimensions of test X" print X_test.shape in_size = X_test.shape[1] ds_test = CDS( in_size, class_labels=['Healthy','Schizo'] ) ds_test.setField( 'input', X_test ) ds_test.setField( 'target', y_test_dummy) p = net.activateOnDataset( ds_test ) #for pred in p: # print pred[0] np.savetxt('submission.csv', p, delimiter=",", fmt = '%1.4f') #X_test_identifers = df_1.ix[:,0].values #y_predict = clf.predict_proba(X_test) #print y_predict #print y #y_predict = y_predict[:,1] #np.savetxt('submission.csv', y_predict, delimiter=",", fmt = '%1.4f')
scale = preprocessing.Normalizer().fit(XtrainPos) XtrainPos = scale.fit_transform(XtrainPos) XtestPos = scale.fit_transform(XtestPos) # Neural Network YtrainPos = YtrainPos.reshape( -1, 1 ) YtestPos = YtestPos.reshape( -1, 1 ) input_size = XtrainPos.shape[1] target_size = YtrainPos.shape[1] hidden_size = 50 # arbitrarily chosen #ds = SupervisedDataSet(input_size,target_size ) ds = ClassificationDataSet(21) ds.setField( 'input', XtrainPos ) ds.setField( 'target', YtrainPos ) ds._convertToOneOfMany(bounds=[0, 1]) net = buildNetwork( input_size, hidden_size, 5, bias = True ) trainer = BackpropTrainer( net, ds ) epochs = 2 print "training for {} epochs...".format( epochs ) for i in range( epochs ): mse = trainer.train() rmse = sqrt(mse) print "training RMSE, epoch {}: {}".format( i + 1, rmse ) #trainer.trainUntilConvergence( verbose = True, validationProportion = 0.15, maxEpochs = 1000, continueEpochs = 10 )
from pybrain_nnet.networks import * from pybrain.tools.xml.networkwriter import NetworkWriter ################################################ from pybrain.tools.xml.networkreader import NetworkReader #NetworkWriter.writeToFile(net, 'net_shared_13-5.xml') #net = NetworkReader.readFrom('filename.xml') #################################### # load data from from kaggle files - csv np_train = np.genfromtxt('data/train.csv', delimiter=',', skip_header= True, dtype='uint8') np_test = np.genfromtxt('data/test.csv', delimiter=',', skip_header= True, dtype='uint8') dset = ClassificationDataSet(np_train.shape[1] - 1, 1) dset.setField('input', np_train[:,1:]) dset.setField('target', np_train[:,:1]) dset._convertToOneOfMany( ) labels = dset['class'].ravel().tolist() ############################################## net=net_shared2() trainer = BackpropTrainer(net,trainset) for i in range(10): print (time.ctime() + ': Training epoch ' + str(i+1) + ' started') err = trainer.train() if i%1==0: out = trainer.testOnClassData() accu = accuracy_score(out,trainlabels)
df_3 = pd.read_csv('C:/LearningMaterials/Kaggle/Mlsp/train_labels.csv') df_train_labels = df_3.ix[:,1] y = df_train_labels.values y = y.reshape(-1, 1) print "Dimensions of input feature vector X " print X.shape input_size = X.shape[1] #Get a linear model from the sklearn #clf = linear_model.LogisticRegression(C=0.16,penalty='l1', tol=0.001, fit_intercept=True) #clf.fit(X, y) #Get a network that trains based on backpropagation method using the training data ds = CDS( input_size, class_labels=['Healthy','Schizo'] ) ds.setField( 'input', X ) ds.setField( 'target', y) print len(ds) # init and train net = buildNetwork( input_size, hidden_size, 1, bias = True, outclass=SigmoidLayer )#feature vector, hidden layer size, trainer = BackpropTrainer( net,ds ) trainer.trainUntilConvergence( verbose = True, validationProportion = 0.15, maxEpochs = 1000, continueEpochs = 10 ) pickle.dump( net, open( output_model_file, 'wb' )) p = net.activateOnDataset( ds ) np.savetxt('nn_sub2.csv', p, delimiter=",", fmt = '%1.4f') #print net['in']
# build neural net and train it net = buildNetwork(trndata.indim, n_hidden, trndata.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer(net, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01) trainer.trainUntilConvergence() #trainer.trainEpochs(5) print "trained" #trainer.trainEpochs(5) # Return a functor that wraps calling predict return NeuralNetworkClassifier(trainer) if __name__ == "__main__": # First obtain our training and testing data # Training has 50K samples, Testing 100K Xt, Yt, Xv = load_validation_data() # Run Neural Network over training data classifier = classify(Xt, Yt) # Prepare validation data and predict tstdata = ClassificationDataSet(Xv.shape[1], 1, nb_classes=2) tstdata.setField('input', Xv) tstdata._convertToOneOfMany() # one output neuron per class predictions = classifier.predict(tstdata) # Write prediction to file write_test_prediction("out_nn.txt", np.array(majority))
# Определение основных констант HIDDEN_NEURONS_NUM = 100 # Количество нейронов, содержащееся в скрытом слое сети MAX_EPOCHS = 100 # Максимальное число итераций алгоритма оптимизации параметров сети # Инициализируем структуру данных ClassificationDataSet, используемую библиотекой pybrain. Для инициализации структура принимает два аргумента: количество признаков *np.shape(X)[1]* и количество различных меток классов *len(np.unique(y))*. # # Кроме того, произведем бинаризацию целевой переменной с помощью функции *_convertToOneOfMany( )* и разбиение данных на обучающую и контрольную части. # In[22]: # Конвертация данных в структуру ClassificationDataSet # Обучающая часть ds_train = ClassificationDataSet(np.shape(X)[1], nb_classes=len(np.unique(y_train))) # Первый аргумент -- количество признаков np.shape(X)[1], второй аргумент -- количество меток классов len(np.unique(y_train))) ds_train.setField('input', X_train) # Инициализация объектов ds_train.setField('target', y_train[:, np.newaxis]) # Инициализация ответов; np.newaxis создает вектор-столбец ds_train._convertToOneOfMany( ) # Бинаризация вектора ответов # Контрольная часть ds_test = ClassificationDataSet(np.shape(X)[1], nb_classes=len(np.unique(y_train))) ds_test.setField('input', X_test) ds_test.setField('target', y_test[:, np.newaxis]) ds_test._convertToOneOfMany( ) # Инициализируем двуслойную сеть и произведем оптимизацию ее параметров. Аргументами для инициализации являются: # # ds.indim -- количество нейронов на входном слое сети, совпадает с количеством признаков (в нашем случае 11), # # HIDDEN_NEURONS_NUM -- количество нейронов в скрытом слое сети, #
epochs = 600 train = np.loadtxt( train_file, delimiter = ',' ) validation = np.loadtxt( validation_file, delimiter = ',' ) train = np.vstack(( train, validation )) x_train = train[:,0:-1] y_train = train[:,-1] y_train = y_train.reshape( -1, 1 ) input_size = x_train.shape[1] target_size = y_train.shape[1] alldata = ClassificationDataSet(input_size, target_size, nb_classes=2) alldata.setField( 'input', x_train ) alldata.setField( 'target', y_train ) tstdata, trndata = alldata.splitWithProportion( 0.15 ) trndata._convertToOneOfMany( ) tstdata._convertToOneOfMany( ) fnn = buildNetwork( trndata.indim, 5, trndata.outdim, outclass=SoftmaxLayer ) trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01) for i in range( epochs ): mse = trainer.train() rmse = sqrt( mse ) print "training RMSE, epoch {}: {}".format( i + 1, rmse )
validation_proportion = 0.15 # load data x_train = np.loadtxt("input.dat", delimiter = ' ') print x_train.shape y_train = np.loadtxt( "output.dat", delimiter = ' ' ) print y_train.shape input_size = x_train.shape[1] target_size = y_train.shape[1] # prepare dataset ds = ClassificationDataSet( input_size, target_size,nb_classes=3 ) ds.setField( 'input', x_train ) ds.setField( 'target', y_train ) tstdata, trndata = ds.splitWithProportion( 0.25 ) # init and train net = buildNetwork( input_size, hidden_size, target_size,outclass=SoftmaxLayer) trainer = BackpropTrainer( net,dataset=trndata, learningrate=0.01 ,verbose=True,weightdecay=.01 ) print "training for {} epochs...".format( epochs ) trainer.trainUntilConvergence( verbose = True, validationProportion = validation_proportion, maxEpochs = epochs, continueEpochs = continue_epochs ) trnresult = percentError(trainer.testOnClassData(),trndata['target']) tstresult = percentError(trainer.testOnClassData(dataset=tstdata), tstdata['target']) print("epoch: %4d" % trainer.totalepochs,
def createDataset(X, Y): ds = ClassificationDataSet(nunits, 1, nb_classes=5) ds.setField('input', X) ds.setField('target', np.asmatrix(Y).T) ds._convertToOneOfMany() return ds
verbose=True, weightdecay=0.01) trainer.trainUntilConvergence() #trainer.trainEpochs(5) print "trained" #trainer.trainEpochs(5) # Return a functor that wraps calling predict return NeuralNetworkClassifier(trainer) if __name__ == "__main__": # First obtain our training and testing data # Training has 50K samples, Testing 100K Xt, Yt, Xv = load_validation_data() # Run Neural Network over training data classifier = classify(Xt, Yt) # Prepare validation data and predict tstdata = ClassificationDataSet(Xv.shape[1], 1, nb_classes=2) tstdata.setField('input', Xv) tstdata._convertToOneOfMany() # one output neuron per class predictions = classifier.predict(tstdata) # Write prediction to file write_test_prediction("out_nn.txt", np.array(majority))
def loadDataSet(x,y): dataset = ClassificationDataSet(x.shape[1], y.shape[1], nb_classes=10) dataset.setField('input', x) dataset.setField('target', y) return dataset
# for klass in range(4): # input = multivariate_normal(means[klass], cov[klass]) # alldata.addSample(input, [klass]) #### load data from file from sys import argv file = argv[1] alldata = ClassificationDataSet(2, 1, nb_classes = 4) data = np.load(file) inputs = data['inputs'] target = data['targets'] alldata.setField('input',inputs) alldata.setField('target',target) print type(alldata) tstdata_temp, trndata_temp = alldata.splitWithProportion(0.25) tstdata = ClassificationDataSet(2, 1, nb_classes=4) for n in xrange(0, tstdata_temp.getLength()): tstdata.addSample( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1]) trndata = ClassificationDataSet(2, 1, nb_classes=4) for n in xrange(0, trndata_temp.getLength()): trndata.addSample(
# scale = preprocessing.Normalizer().fit(XtrainPos) XtrainPos = scale.fit_transform(XtrainPos) XtestPos = scale.fit_transform(XtestPos) # Neural Network YtrainPos = YtrainPos.reshape(-1, 1) YtestPos = YtestPos.reshape(-1, 1) input_size = XtrainPos.shape[1] target_size = YtrainPos.shape[1] hidden_size = 50 # arbitrarily chosen #ds = SupervisedDataSet(input_size,target_size ) ds = ClassificationDataSet(21) ds.setField('input', XtrainPos) ds.setField('target', YtrainPos) ds._convertToOneOfMany(bounds=[0, 1]) net = buildNetwork(input_size, hidden_size, 5, bias=True) trainer = BackpropTrainer(net, ds) epochs = 2 print "training for {} epochs...".format(epochs) for i in range(epochs): mse = trainer.train() rmse = sqrt(mse) print "training RMSE, epoch {}: {}".format(i + 1, rmse) #trainer.trainUntilConvergence( verbose = True, validationProportion = 0.15, maxEpochs = 1000, continueEpochs = 10 )
# Определение основных констант HIDDEN_NEURONS_NUM = 100 # Количество нейронов, содержащееся в скрытом слое сети MAX_EPOCHS = 100 # Максимальное число итераций алгоритма оптимизации параметров сети # Инициализируем структуру данных ClassificationDataSet, используемую библиотекой pybrain. Для инициализации # структура принимает два аргумента: количество признаков np.shape(X)[1] и количество различных меток классов # len(np.unique(y)). # Кроме того, произведем бинаризацию целевой переменной с помощью функции _convertToOneOfMany( ) и разбиение # данных на обучающую и контрольную части. #%% # Конвертация данных в структуру ClassificationDataSet # Обучающая часть ds_train = ClassificationDataSet(np.shape(X)[1], nb_classes=len(np.unique(y_train))) # Первый аргумент -- количество признаков np.shape(X)[1], второй аргумент -- количество меток классов len(np.unique(y_train))) ds_train.setField('input', X_train) # Инициализация объектов ds_train.setField('target', y_train[:, np.newaxis]) # Инициализация ответов; np.newaxis создает вектор-столбец ds_train._convertToOneOfMany( ) # Бинаризация вектора ответов # Контрольная часть ds_test = ClassificationDataSet(np.shape(X)[1], nb_classes=len(np.unique(y_train))) ds_test.setField('input', X_test) ds_test.setField('target', y_test[:, np.newaxis]) ds_test._convertToOneOfMany( ) # Инициализируем двуслойную сеть и произведем оптимизацию ее параметров. Аргументами для инициализации являются: # ds.indim -- количество нейронов на входном слое сети, совпадает с количеством признаков (в нашем случае 11), # HIDDEN_NEURONS_NUM -- количество нейронов в скрытом слое сети, # ds.outdim -- количество нейронов на выходном слое сети, совпадает с количеством различных меток классов # (в нашем случае 3), # SoftmaxLayer -- функция softmax, используемая на выходном слое для решения задачи многоклассовой классификации. #%%
#alldata = ClassificationDataSet(2, 1, nb_classes=3) #for n in xrange(400): # for klass in range(3): # input = multivariate_normal(means[klass],cov[klass]) # alldata.addSample(input, [klass]) # #tstdata,trndata = alldata.splitWithProportion( 0.25 ) (X_train, Y_train, X_test, Y_test) = prepare_sets(TM_dict['TM_sparse'], split=1.0) class_dict = {'cotidiano':0, 'esporte':1, 'mundo':2, 'poder':3, 'ilustrada':4, 'mercado':5} ord_labels = ['cotidiano', 'esporte', 'mundo', 'poder', 'ilustrada', 'mercado'] DS = ClassificationDataSet(inp=X_train.shape[1], nb_classes=len(class_dict), class_labels=ord_labels) assert(X_train.shape[0] == Y_train.shape[0]) DS.setField('input', X_train) DS.setField('target', Y_train) tstdata,trndata = DS.splitWithProportion( 0.25 ) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() print "Number of training patterns: ", len(trndata) print "Input and output dimensions: ", trndata.indim, trndata.outdim print "First sample (input, target, class):" print trndata['input'][0], trndata['target'][0], trndata['class'][0] nneuronios = 10 fnn = buildNetwork( trndata.indim, nneuronios, trndata.outdim, outclass=SoftmaxLayer ) trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01)