def classifer(labels, data): """ data in format (value, label) """ clsff = ClassificationDataSet(2,class_labels=labels) for d in data: clsff.appendLinked(d[0], d[1]) clsff.calculateStatistics()
def consturt_train_data(self): # print len(self.output_train) # print len(self.eigenvector) ds = ClassificationDataSet(self.vct_len, 1, nb_classes=2) for i in range(len(self.output_train)): ds.appendLinked(self.eigenvector[i], self.output_train[i]) # print ds # print ds ds.calculateStatistics() # split training, testing, validation data set (proportion 4:1) tstdata_temp, trndata_temp = ds.splitWithProportion(0.25) tstdata = ClassificationDataSet(self.vct_len, 1, nb_classes=2) for n in range(0, tstdata_temp.getLength()): tstdata.appendLinked( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1]) trndata = ClassificationDataSet(self.vct_len, 1, nb_classes=2) for n in range(0, trndata_temp.getLength()): trndata.appendLinked( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1]) # one hot encoding # print trndata testdata = ClassificationDataSet(self.vct_len, 1, nb_classes=2) test_data_temp = self.test_data for n in range(len(test_data_temp)): testdata.addSample(test_data_temp[n], [0]) # print testdata trndata._convertToOneOfMany() tstdata._convertToOneOfMany() testdata._convertToOneOfMany() return trndata, tstdata, testdata, ds
def createDataset(): data = ClassificationDataSet(100,nb_classes=len(lettersDict.keys()), class_labels=lettersDict.keys()) allTheLetters = string.uppercase for letter in lettersDict.keys(): data.addSample(lettersDict[letter], allTheLetters.index(letter)) data._convertToOneOfMany(bounds=[0, 1]) print data.calculateStatistics() return data
def bootstrap(trndata, iter=100): """ check http://sci2s.ugr.es/keel/pdf/specific/articulo/jain_boot_87.pdf for notation """ print trndata.calculateStatistics() np_array = np.hstack((trndata['input'], trndata['target'])) my_range = range(np_array.shape[0]) print trndata['target'].shape app_sum = 0 e0_sum = 0 for i in range(iter): indices = list(set([random.choice(my_range) for i in my_range])) np_train_array = np.vstack(np_array[indices]) new_training_samples = ClassificationDataSet(attributes, classes_number) new_training_samples.setField('input', np_train_array[:, :54]) new_training_samples.setField('target', np_train_array[:, 54:55]) new_training_samples._convertToOneOfMany() test_indices = list(set(my_range) - set(indices)) new_test_samples = ClassificationDataSet(attributes, classes_number) np_test_array = np.vstack(np_array[test_indices]) new_test_samples.setField('input', np_test_array[:, :54]) new_test_samples.setField('target', np_test_array[:, 54:55]) new_test_samples._convertToOneOfMany() print new_training_samples.calculateStatistics() print new_test_samples.calculateStatistics() model = FNNClassifier() model.train(new_training_samples, new_test_samples) (xtrn, ytrn) = model.predict(new_training_samples) (xtest, ytest) = model.predict(new_test_samples) app_sum += (1 - accuracy(xtrn, ytrn)) e0_sum += (1 - accuracy(xtest, ytest)) app = app_sum / float(iter) e0 = e0_sum / float(iter) e632 = 0.368 * app + 0.632 * e0 print e632 return e632
def init_brain(learn_data, epochs, hidden_count, TrainerClass=BackpropTrainer): global data_dir print("\t Epochs: ", epochs) if learn_data is None: return None print("Building network") net = buildNetwork(7 * 7, hidden_count, 4, hiddenclass=SigmoidLayer) # net = buildNetwork(64 * 64, 32 * 32, 8 * 8, 5) # net = buildNetwork(64 * 64, 5, hiddenclass=LinearLayer) # fill dataset with learn data trans = {'0': 0, '1': 1, '2': 2, '3': 3} ds = ClassificationDataSet(7 * 7, nb_classes=4, class_labels=['0', '1', '2', '3']) for inp, out in learn_data: ds.appendLinked(inp, [trans[out]]) ds.calculateStatistics() print("\tNumber of classes in dataset = {0}".format(ds.nClasses)) print("\tOutput in dataset is ", ds.getField('target').transpose()) ds._convertToOneOfMany(bounds=[0, 1]) print("\tBut after convert output in dataset is \n", ds.getField('target')) trainer = TrainerClass(net, learningrate=0.1, verbose=True) trainer.setData(ds) print( "\tEverything is ready for learning.\nPlease wait, training in progress..." ) start = time.time() trainer.trainEpochs(epochs=epochs) end = time.time() f = open(data_dir + "/values.txt", "w") f.write("Training time: %.2f \n" % (end - start)) f.write("Total epochs: %s \n" % (trainer.totalepochs)) # f.write("Error: %.22f" % (trainer.trainingErrors[len(trainer.trainingErrors) - 1])) f.close() print("Percent of error: ", percentError(trainer.testOnClassData(), ds['class'])) print("\tOk. We have trained our network.") NetworkWriter.writeToFile(net, data_dir + "/net.xml") return net
def hillclimb(domain,costf): # Create a random solution sol=[random.randint(domain[i][0],domain[i][1]) for i in range(len(domain))] # Main loop while 1: # Create list of neighboring solutions neighbors=[] for j in range(len(domain)): # One away in each direction if sol[j]>domain[j][0]: neighbors.append(sol[0:j]+[sol[j]+1]+sol[j+1:]) if sol[j]<domain[j][1]: neighbors.append(sol[0:j]+[sol[j]-1]+sol[j+1:]) # See what the best solution amongst the neighbors is current=costf(sol) best=current for j in range(len(neighbors)): cost=costf(neighbors[j]) if cost<best: best=cost sol=neighbors[j] # If there's no improvement, then we've reached the top if best==current: break return sol def plot_learning_curve(x, training_erorr, test_error, graph_title, graph_xlabel, graph_ylabel, ylim=None, xlim=None): plt.figure() plt.title(graph_title) if ylim is not None: plt.ylim(*ylim) if xlim is not None: plt.xlim(*xlim) plt.xlabel(graph_xlabel) plt.ylabel(graph_ylabel) train_error_mean = np.mean(training_erorr) train_error_std = np.std(training_erorr) test_error_mean = np.mean(test_error) test_error_std = np.std(test_error) plt.grid() plt.fill_between(x, training_erorr - train_error_std, training_erorr + train_error_std, alpha=0.1, color="r") plt.fill_between(x, test_error - test_error_std, test_error + test_error_std, alpha=0.1, color="g") print x print train_error_mean print training_erorr plt.plot(x, training_erorr, 'o-', color="r", label="Training score") plt.plot(x, test_error, 'o-', color="g", label="Test Score") plt.legend(loc="best") plt.savefig('plots/'+graph_title+'.png') plt.close() #plt.show() #************************End of Functions************************************************** #************************Start Data Prep******************************************** raw_data = np.genfromtxt('BreastCancerWisconsinDataset_modified.txt', delimiter=",", skip_header=1) raw_inputs = raw_data[:,0:-1] raw_target = raw_data[:,9:] assert (raw_inputs.shape[0] == raw_target.shape[0]),"Inputs count and target count do not match" all_data = ClassificationDataSet(9, 1, nb_classes=2, class_labels=['Benign','Malignant']) all_data.setField('input', raw_inputs) all_data.setField('target', raw_target) all_data.setField('class', raw_target) test_data_temp, training_data_temp = all_data.splitWithProportion(0.33) test_data = ClassificationDataSet(9, 1, nb_classes=2, class_labels=['Benign','Malignant']) for n in xrange(0, test_data_temp.getLength()): test_data.addSample(test_data_temp.getSample(n)[0], test_data_temp.getSample(n)[1]) training_data = ClassificationDataSet(9, 1, nb_classes=2, class_labels=['Benign','Malignant']) for n in xrange(0, training_data_temp.getLength()): training_data.addSample(training_data_temp.getSample(n)[0], training_data_temp.getSample(n)[1]) training_data._convertToOneOfMany() test_data._convertToOneOfMany() #********************End of Data Preparation*************************** #********************NN With GA*************************** def fitFunction (net, dataset=training_data, targetClass=training_data['class']): error = percentError(testOnClassData_custom(net, dataset=training_data), targetClass) return error stepSize = [.05, .5, 1] for s in stepSize: fnn_ga = buildNetwork(training_data.indim, 2, training_data.outdim, bias=True, outclass=SoftmaxLayer) domain = [(-1,1)]*len(fnn_ga.params) #print domain epochs = 20 epoch_v = [] trnerr_ga = [] tsterr_ga = [] iteration = 5 for i in xrange(epochs): winner = geneticoptimize(iteration,domain,fnn_ga,fitFunction,popsize=100,step=s, mutprob=0.2,elite=0.2) fnn_ga.params[:] = winner[:] training_error = fitFunction(fnn_ga, dataset=training_data, targetClass=training_data['class']) test_error = fitFunction(fnn_ga, dataset=test_data, targetClass=test_data['class']) epoch_v.append(i*iteration) trnerr_ga.append(training_error) tsterr_ga.append(test_error) print ("This is the training and test error at the epoch: ", training_error, test_error, i*iteration) ylim = (0, 70) xlim = (50, 1005) print ("This is epoch_value",epoch_v) print ("This is training ga",trnerr_ga) print ("This is test ga",tsterr_ga) plot_learning_curve(epoch_v, trnerr_ga, tsterr_ga, "Neural Network With GA_step_"+str(s), "Epochs", "Error %", ylim, xlim=None) #*****************End of GA NN******************************* print ("This is the length of the training and test data, respectively", len(training_data), len(test_data)) print (training_data.indim, training_data.outdim) print ("This is the shape of the input", all_data['input'].shape) print ("This is the shape of the target", all_data['target'].shape) print ("This is the shape of the class", all_data['class'].shape) print ("This is count of classes", all_data.nClasses) print ("Here is the statistics on the class", all_data.calculateStatistics()) print ("Here the linked fields", all_data.link) print ("This is the shape of the input in training", training_data['input'].shape) print ("This is the shape of the target in training", training_data['target'].shape) print ("This is the shape of the class in training", training_data['class'].shape) print ("This is the shape of the input in training", test_data['input'].shape) print ("This is the shape of the target in training", test_data['target'].shape) print ("This is the shape of the class in training", test_data['class'].shape)
''' # one-hot encoding wm_df = pd.get_dummies(df) X = wm_df[wm_df.columns[1:-2]] # input Y = wm_df[wm_df.columns[-2:]] # output label = wm_df.columns._data[-2:] # class label # construction of data in pybrain's formation from pybrain.datasets import ClassificationDataSet ds = ClassificationDataSet(19, 1, nb_classes=2, class_labels=label) for i in range(len(Y)): y = 0 if Y['好瓜_是'][i] == 1: y = 1 ds.appendLinked(X.values[i], y) ds.calculateStatistics() # generation of train set and test set (3:1) tstdata_temp, trndata_temp = ds.splitWithProportion(0.25) tstdata = ClassificationDataSet(19, 1, nb_classes=2, class_labels=label) for n in range(0, tstdata_temp.getLength()): tstdata.appendLinked( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1] ) trndata = ClassificationDataSet(19, 1, nb_classes=2, class_labels=label) for n in range(0, trndata_temp.getLength()): trndata.appendLinked( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1] ) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() '''
data = [map(float, line.rstrip().split()) for line in f] # outputs = [[gene[-1]] for gene in data] # for gene in data: # del gene[-1] ds = ClassificationDataSet(6, 1) for i, gene in enumerate(data): ds.addSample(gene[:-1], gene[-1]) tstdata, trndata = ds.splitWithProportion( 0.25 ) trndata._convertToOneOfMany( ) tstdata._convertToOneOfMany( ) print ds.calculateStatistics() print ds.nClasses print "Number of training patterns: ", len(trndata) print "Input and output dimensions: ", trndata.indim, trndata.outdim print "First sample (input, target, class):" print trndata['input'][0], trndata['target'][0], trndata['class'][0] fnn = buildNetwork( trndata.indim, 7, trndata.outdim, outclass=LinearLayer ) trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01) # # ticks = arange(-3.,6.,0.2) # X, Y = meshgrid(ticks, ticks)
from pybrain.supervised.trainers import BackpropTrainer from pybrain.utilities import percentError #return the error(%) in the form of list/array import matplotlib.pyplot as plt data_set = load_breast_cancer() X = data_set.data # feature feature_names = data_set.feature_names y = data_set.target # label target_names = data_set.target_names normalized_X = preprocessing.normalize(X) '''construction of data in pybrain"s formulation ''' ds = ClassificationDataSet(30, 1, nb_classes=2, class_labels=y) for i in range(len(y)): ds.appendLinked(X[i], y[i]) ds.calculateStatistics() #return a class histogram """split of training and testing dataset""" tstdata_temp, trndata_temp = ds.splitWithProportion(0.5) tstdata = ClassificationDataSet( 30, 1, nb_classes=2 ) #the first parameter inp is used to specify the dimensionality of the input for n in range(0, tstdata_temp.getLength()): tstdata.appendLinked( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1]) trndata = ClassificationDataSet(30, 1, nb_classes=2) for n in range(0, trndata_temp.getLength()): trndata.appendLinked( trndata_temp.getSample(n)[0],
#convert back to a single column of class labels #alldata._convertToClassNb() #Target dimension is supposed to be 1 #The targets are class labels starting from zero for i in range(N): alldata.appendLinked(Xdf.ix[i,:],Ydf['default_Yes'].ix[i,:]) #generate training and testing data sets tstdata, trndata = alldata.splitWithProportion(0.10) #classes are encoded into one output unit per class, that takes on a certain value if the class is present trndata._convertToOneOfMany( ) tstdata._convertToOneOfMany( ) len(tstdata), len(trndata) #calculate statistics and generate histograms alldata.calculateStatistics() print alldata.classHist print alldata.nClasses print alldata.getClass(1) ######################################################################################### ######################################################################################### ######################################################################################### ######################################################################################### #construct the network from pybrain.structure import FeedForwardNetwork net=FeedForwardNetwork() #constructing the input, hidden and output layers from pybrain.structure import LinearLayer, SigmoidLayer inLayer = LinearLayer(3,name="input_nodes")
data_folder = join(roslib.packages.get_pkg_dir(PACKAGE), 'common', 'data') cfg_folder = join(roslib.packages.get_pkg_dir(PACKAGE), 'common', 'config') if isinstance(args.datasets, list): datasets = [Dataset(data_folder, name) for name in args.datasets] dataset = JointDataset(datasets) else: dataset = Dataset(data_folder, args.datasets) X, Y, labels = dataset.load(args.objects) mean = np.mean(X, axis=0) std = np.std(X, axis=0) X -= mean X /= std cds = ClassificationDataSet(len(mean), class_labels=labels) for x, y in zip(X, Y): cds.addSample(x, y) print cds.calculateStatistics() tstdata, trndata = cds.splitWithProportion(0.2) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() nn = buildNetwork(trndata.indim, 12, trndata.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer(nn, dataset=trndata, momentum=0.05, verbose=True, weightdecay=0.01) trainer.trainUntilConvergence(maxEpochs=400) trnr = percentError(trainer.testOnClassData(), trndata['class']) tstr = percentError(trainer.testOnClassData(dataset=tstdata), tstdata['class']) print 'Epochs: %4d, train error: %5.2f%%, test error: %5.2f%%' % ( trainer.totalepochs, trnr, tstr)
if ds is None: if classification: ds = ClassificationDataSet(len(input_data), nb_classes=len(fitness.outputsClass), class_labels=fitness.outputsClass) else: ds = SupervisedDataSet(len(input_data), len(output_data)) ds.addSample(input_data, output_data) if count % 10 == 0: print count, count += 1 print "" if classification: ds._convertToOneOfMany() print "total count:", ds.calculateStatistics() print "indim:", ds.indim, "outdim:", ds.outdim, "rows:", ds.endmarker['input'] output_model_file = 'model.pkl' hidden_layers = [300, 100] print "hidden_layers:", hidden_layers #tstdata, trndata = ds.splitWithProportion(validation_percentage ) #trndata._convertToOneOfMany( ) #tstdata._convertToOneOfMany( ) #print "verify data rows:", tstdata.endmarker['input'] #print "train data rows:", trndata.endmarker['input']
#supervised learning tutorial from pybrain.datasets import SupervisedDataSet from pybrain.datasets import ClassificationDataSet # DS = SupervisedDataSet(3,2) # DS.appendLinked([1,2,3], [4,5]) # print(len(DS)) # DS['input'] # array([[1., 2., 3.]]) DS = ClassificationDataSet(2, class_labels=['Urd', 'Verdandi', 'skuld']) DS.appendLinked([0.1, 0.5] , [0]) DS.appendLinked([1.2, 1.2] , [1]) DS.appendLinked([1.4, 1.6] , [1]) DS.appendLinked([1.6, 1.8] , [1]) DS.appendLinked([0.10, 0.80] , [2]) DS.appendLinked([0.20, 0.90] , [2]) print(DS.calculateStatistics()) print(DS.classHist) print(DS.nClasses) print(DS.getClass(1)) print(DS.getField('target').transpose())
def castToRegression(self, values): """Converts data set into a SupervisedDataSet for regression. Classes are used as indices into the value array given.""" regDs = SupervisedDataSet(self.indim, 1) fields = self.getFieldNames() fields.remove('target') for f in fields: regDs.setField(f, self[f]) regDs.setField('target', values[self['class'].astype(int)]) return regDs if __name__ == "__main__": dataset = ClassificationDataSet(2, 1, class_labels=['Urd', 'Verdandi', 'Skuld']) dataset.appendLinked([ 0.1, 0.5 ] , [0]) dataset.appendLinked([ 1.2, 1.2 ] , [1]) dataset.appendLinked([ 1.4, 1.6 ] , [1]) dataset.appendLinked([ 1.6, 1.8 ] , [1]) dataset.appendLinked([ 0.10, 0.80 ] , [2]) dataset.appendLinked([ 0.20, 0.90 ] , [2]) dataset.calculateStatistics() print(("class histogram:", dataset.classHist)) print(("# of classes:", dataset.nClasses)) print(("class 1 is: ", dataset.getClass(1))) print(("targets: ", dataset.getField('target'))) dataset._convertToOneOfMany(bounds=[0, 1]) print("converted targets: ") print((dataset.getField('target'))) dataset._convertToClassNb() print(("reconverted to original:", dataset.getField('target')))
dataset = pd.get_dummies(df) pd.set_option('display.max_columns', 1000) # 把所有的列全部显示出来 X = dataset[dataset.columns[:-2]] Y = dataset[dataset.columns[-2:]] labels = dataset.columns._data[-2:] # Step 3:将数据转换为SupervisedDataSet/ClassificationDtaSet对象 from pybrain.datasets import ClassificationDataSet ds = ClassificationDataSet(19, 1, nb_classes=2, class_labels=labels) for i in range(len(Y)): y = 0 if Y['好瓜_是'][i] == 1: y = 1 ds.appendLinked(X.ix[i], y) ds.calculateStatistics() # 返回一个类直方图?搞不懂在做什么 # Step 4: 分开测试集和训练集 testdata = ClassificationDataSet(19, 1, nb_classes=2, class_labels=labels) testdata_temp, traindata_temp = ds.splitWithProportion(0.25) for n in range(testdata_temp.getLength()): testdata.appendLinked( testdata_temp.getSample(n)[0], testdata_temp.getSample(n)[1]) print(testdata) testdata._convertToOneOfMany() print(testdata) traindata = ClassificationDataSet(19, 1, nb_classes=2, class_labels=labels) for n in range(traindata_temp.getLength()): traindata.appendLinked( traindata_temp.getSample(n)[0],
X = data_set.data # feature feature_names = data_set.feature_names y = data_set.target # label target_names = data_set.target_names # data normalization from sklearn import preprocessing normalized_X = preprocessing.normalize(X) # construction of data in pybrain's formation from pybrain.datasets import ClassificationDataSet ds = ClassificationDataSet(30, 1, nb_classes=2, class_labels=y) for i in range(len(y)): ds.appendLinked(X[i], y[i]) ds.calculateStatistics() # split of training and testing dataset tstdata_temp, trndata_temp = ds.splitWithProportion(0.5) tstdata = ClassificationDataSet(30, 1, nb_classes=2) for n in range(0, tstdata_temp.getLength()): tstdata.appendLinked( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1]) trndata = ClassificationDataSet(30, 1, nb_classes=2) for n in range(0, trndata_temp.getLength()): trndata.appendLinked( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1]) trndata._convertToOneOfMany()
image_vector = image.flatten() ds_training.appendLinked(image_vector, [category]) category+=1 category = 0 for shape in shapes: for i in range(8): image = imread('C:/Users/alexis.matelin/Documents/Neural Networks/Visual classification/shapes/testing/'+shape+str(i+1)+'.png', as_grey=True, plugin=None, flatten=None) image_vector = image.flatten() ds_testing.appendLinked(image_vector, [category]) ds_training.calculateStatistics() ds_training.getClass(0) print(ds_training.getField('target')) ds_training._convertToOneOfMany(bounds=[0, 1]) ds_testing._convertToOneOfMany(bounds=[0, 1]) print(ds_training.getField('target')) net = buildNetwork(1024,12, 12, 3, hiddenclass = TanhLayer, outclass=SoftmaxLayer) trainer = BackpropTrainer(net, dataset=ds_training, verbose=True, learningrate=0.01) trainer.trainUntilConvergence()
for tr in output_train: continue #print("This is the training output value: ", tr) trnresult = percentError(trainer.testOnClassData(), training_data['class']) tstresult = percentError(trainer.testOnClassData(dataset=test_data), test_data['class']) print("epoch: %4d" % trainer.totalepochs, " train error: %5.2f%%" % trnresult, " test error: %5.2f%%" % tstresult) trnerr.append(trnresult) tsterr.append(tstresult) fig_nn = plt.figure() ax = fig_nn.add_subplot(1, 1, 1) ax.set_title("Neural Network Convergence") ax.set_xlabel('Epoch') ax.set_ylabel('Error') ax.semilogy(range(len(trnerr)), trnerr, 'b', range(len(tsterr)), tsterr, 'r') print fnn.activate([7, 3, 2, 10, 5, 10, 5, 4, 4]) print("This is the length of the training and test data, respectively", len(training_data), len(test_data)) print(training_data.indim, training_data.outdim) print("This is the shape of the input", all_data['input'].shape) print("This is the shape of the target", all_data['target'].shape) print("This is the shape of the class", all_data['class'].shape) print("This is count of classes", all_data.nClasses) print("Here is the statistics on the class", all_data.calculateStatistics()) print("Here the linked fields", all_data.link)
#prepare data for pybrain number_of_columns = Data.shape[1] PyBData = ClassificationDataSet(number_of_columns, 1, nb_classes=2) PyBDataTrain = ClassificationDataSet(number_of_columns, 1, nb_classes=2) PyBDataTest = ClassificationDataSet(number_of_columns, 1, nb_classes=2) for i in xrange(len(Data)): PyBData.appendLinked(Data[i], Target[i]) for i in xrange(len(DataTrain)): PyBDataTrain.appendLinked(DataTrain[i], TargetTrain[i]) for i in xrange(len(DataTest)): PyBDataTest.appendLinked(DataTest[i], TargetTest[i]) print("this is the variable statistics ", PyBData.calculateStatistics()) #*******************End of Preparing Data & Target for Estimators****************** #*******************Neural Network Classification****************** print("Entering Neural Network Classifier with time ", time.localtime()) PyBDataTrain_nn = copy.deepcopy(PyBDataTrain) PyBDataTest_nn = copy.deepcopy(PyBDataTest) PyBDataTrain_nn._convertToOneOfMany() PyBDataTest_nn._convertToOneOfMany() fnn = buildNetwork(PyBDataTrain_nn.indim, 2, PyBDataTrain_nn.outdim, bias=True,