def big_training(np_data, num_nets=1, num_epoch=20, net_builder=net_full, train_size=.1, testing=False): sss = cross_validation.StratifiedShuffleSplit(np_data[:,:1].ravel(), n_iter=num_nets , test_size=1-train_size, random_state=3476) nets=[None for net_ind in range(num_nets)] trainaccu=[[0 for i in range(num_epoch)] for net_ind in range(num_nets)] testaccu=[[0 for i in range(num_epoch)] for net_ind in range(num_nets)] net_ind=0 for train_index, test_index in sss: print ('%s Building %d. network.' %(time.ctime(), net_ind+1)) #print("TRAIN:", len(train_index), "TEST:", len(test_index)) trainset = ClassificationDataSet(np_data.shape[1] - 1, 1) trainset.setField('input', np_data[train_index,1:]/100-.6) trainset.setField('target', np_data[train_index,:1]) trainset._convertToOneOfMany( ) trainlabels = trainset['class'].ravel().tolist() if testing: testset = ClassificationDataSet(np_data.shape[1] - 1, 1) testset.setField('input', np_data[test_index,1:]/100-.6) testset.setField('target', np_data[test_index,:1]) testset._convertToOneOfMany( ) testlabels = testset['class'].ravel().tolist() nets[net_ind] = net_builder() trainer = BackpropTrainer(nets[net_ind], trainset) for i in range(num_epoch): for ii in range(3): err = trainer.train() print ('%s Epoch %d: Network trained with error %f.' %(time.ctime(), i+1, err)) trainaccu[net_ind][i]=accuracy_score(trainlabels,trainer.testOnClassData()) print ('%s Epoch %d: Train accuracy is %f' %(time.ctime(), i+1, trainaccu[net_ind][i])) print ([sum([trainaccu[y][i]>tres for y in range(net_ind+1)]) for tres in [0,.1,.2,.3,.4,.5,.6]]) if testing: testaccu[net_ind][i]=accuracy_score(testlabels,trainer.testOnClassData(testset)) print ('%s Epoch %d: Test accuracy is %f' %(time.ctime(), i+1, testaccu[net_ind][i])) NetworkWriter.writeToFile(nets[net_ind], 'nets/'+net_builder.__name__+str(net_ind)+'.xml') net_ind +=1 return [nets, trainaccu, testaccu]
def ann(training_filename , testing_filename,itr,epoch,model_type): training_start_time = "The generation of data set and training started at :%s" % datetime.datetime.now() training_dataset = np.genfromtxt(training_filename, skip_header=0,dtype="int", delimiter='\t' ) data = ClassificationDataSet(len(training_dataset[0])-1, 2, nb_classes=2) for aSample in training_dataset: data.addSample(aSample[0:len(aSample)-1],[aSample[len(aSample)-1]] ); # data._convertToOneOfMany( ) fann = buildNetwork(314,2,outclass=SoftmaxLayer); trainer = BackpropTrainer( fann, dataset=data, momentum=0.1, verbose=False, weightdecay=0.01) counter = 0; print training_start_time while(counter < itr): trainer.trainEpochs( epoch ); counter = counter + 1; trnresult = percentError( trainer.testOnClassData(),data['class'] ) trained_result_log = "epoch: %4d" % trainer.totalepochs, \ " train error: %5.2f%%" % trnresult; training_time_end = "The training and result logging ended at %s :" % datetime.datetime.now() filename = working_dir + "\models\\"+model_type + ".obj" save_trained_model(fann, filename) log_file.write("\n" + training_start_time+"\n") log_file.write(str(trained_result_log)+"\n") log_file.write(training_time_end+"\n")
def classify(Xtrain, Ytrain, n_hidden=5): """ Use entirety of provided X, Y to predict Arguments Xtrain -- Training data Ytrain -- Training prediction Returns classifier -- a classifier fitted to Xtrain and Ytrain """ # PyBrain expects data in its DataSet format trndata = ClassificationDataSet(Xtrain.shape[1], nb_classes=2) trndata.setField('input', Xtrain) # Apprently, arrays don't work here as they try to access second dimension size... trndata.setField('target', mat(Ytrain).transpose()) trndata._convertToOneOfMany() # one output neuron per class # build neural net and train it net = buildNetwork(trndata.indim, n_hidden, trndata.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer(net, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01) trainer.trainUntilConvergence() #trainer.trainEpochs(5) print "trained" #trainer.trainEpochs(5) # Return a functor that wraps calling predict return NeuralNetworkClassifier(trainer)
def run_nn_fold(training_data, test_data): test_features, ignore, featureMap, labels, labelMap = fs.mutualinfo(training_data) input_len = len(test_features[0]) num_classes = len(labelMap.keys()) train_ds = ClassificationDataSet(input_len, 1,nb_classes=num_classes) for i in range(len(test_features)): train_ds.addSample(tuple(test_features[i]), (labels[i])) train_ds._convertToOneOfMany() net = buildNetwork(train_ds.indim, 2, train_ds.outdim, bias=True, hiddenclass=TanhLayer, outclass=SoftmaxLayer) trainer = BackpropTrainer(net, train_ds, verbose=True) print "training until convergence..." trainer.trainUntilConvergence(maxEpochs=100) print "done. testing..." test_ds = ClassificationDataSet(input_len, 1,nb_classes=num_classes) labels = [] for tweetinfo in test_data: featuresFound = tweetinfo["Features"] label = tweetinfo["Answer"] labels.append(label) features = [0]*len(featureMap.keys()) for feat in featuresFound: if feat in featureMap: features[ featureMap[feat] ] = 1 test_ds.addSample(tuple(features), (labelMap[label])) test_ds._convertToOneOfMany() tstresult = percentError( trainer.testOnClassData( dataset=test_ds ), test_ds['class'] ) print tstresult
def prepare_dataset(): # Prepare output coding. "-" is 1 "." is 0 d_morse_array = '100' # ( 1, 0, 0 ) # D -.. - 100 g_morse_array = '110' # ( 1, 1, 0 ) # G --. - 110 k_morse_array = '101' # ( 1, 0, 1 ) # K -.- - 101 o_morse_array = '111' # ( 1, 1, 1 ) # O --- - 111 r_morse_array = '010' # ( 0, 1, 0 ) # R .-. - 010 s_morse_array = '000' # ( 0, 0, 0 ) # S ... - 000 u_morse_array = '001' # ( 0, 0, 1 ) # U ..- - 001 w_morse_array = '011' # ( 0, 1, 1 ) # W .-- - 011 # Load learning data d_array = read_array( "d" ) g_array = read_array( "g" ) k_array = read_array( "k" ) o_array = read_array( "o" ) r_array = read_array( "r" ) s_array = read_array( "s" ) u_array = read_array( "u" ) w_array = read_array( "w" ) # Create dataset dataset = ClassificationDataSet( 1600, nb_classes=8, class_labels=[d_morse_array,g_morse_array,k_morse_array,o_morse_array,r_morse_array,s_morse_array,u_morse_array,w_morse_array] ) # add all samples to dataset dataset.addSample( d_array, [0] ) dataset.addSample( g_array, [1] ) dataset.addSample( k_array, [2] ) dataset.addSample( o_array, [3] ) dataset.addSample( r_array, [4] ) dataset.addSample( s_array, [5] ) dataset.addSample( u_array, [6] ) dataset.addSample( w_array, [7] ) dataset._convertToOneOfMany( ) return dataset
def simpleNeuralNetworkTrain(fileName, numFeatures, numClasses, possibleOutputs, numHiddenNodes, numTrainingEpochs): data = np.genfromtxt(fileName) trnIn = data[:, 0:5] trnOut = data[:, 6] trnOut = [int(val) for val in trnOut] normalizeData(trnIn, numFeatures) trndata = ClassificationDataSet(numFeatures, possibleOutputs, nb_classes=numClasses) for row in range(0, len(trnIn)): tempListOut = [] tempListIn = [] tempListOut.append(int(trnOut[row])) for i in range(0, numFeatures): tempListIn.append(trnIn[row][i]) trndata.addSample(tempListIn, tempListOut) trndata._convertToOneOfMany() # When running for the first time myNetwork = buildNetwork(numFeatures, numHiddenNodes, numClasses, outclass=SoftmaxLayer, bias=True, recurrent=False) # Read from file after the first try. # myNetwork = NetworkReader.readFrom('firstTime.xml') # Use saved results. trainer = BackpropTrainer(myNetwork, dataset=trndata, momentum=0.0, verbose=True, weightdecay=0.0) for i in range(numTrainingEpochs): trainer.trainOnDataset(dataset=trndata)
class neuralNetwork(): def __init__( self, n_classes ): self.n_classes = n_classes def fit( self, X, Y ): n_features = X.shape[1] self.train_ds = ClassificationDataSet( n_features, 1, nb_classes = self.n_classes ) for train, target in zip( X, Y ): self.train_ds.addSample( train, [target] ) self.train_ds._convertToOneOfMany( ) self.net = buildNetwork( self.train_ds.indim, 2*n_features, self.train_ds.outdim, outclass = SoftmaxLayer ) self.trainer = BackpropTrainer( self.net, self.train_ds ) def predict( self, X ): n_features = X.shape[1] self.test_ds = ClassificationDataSet( n_features, 1, nb_classes = self.n_classes ) for test in X: self.test_ds.addSample( test, [1] ) self.test_ds._convertToOneOfMany( ) for i in range( 100 ): self.trainer.trainEpochs( 5 ) self.labels = self.net.activateOnDataset( self.test_ds ) self.labels = self.labels.argmax(axis=1) return self.labels
class EightBitBrain(object): def __init__(self, dataset, inNodes, outNodes, hiddenNodes, classes): self.__dataset = ClassificationDataSet(inNodes, classes-1) for element in dataset: self.addDatasetSample(self._binaryList(element[0]), element[1]) self.__dataset._convertToOneOfMany() self.__network = buildNetwork(inNodes, hiddenNodes, self.__dataset.outdim, recurrent=True) self.__trainer = BackpropTrainer(self.__network, learningrate = 0.01, momentum = 0.99, verbose = True) self.__trainer.setData(self.__dataset) def _binaryList(self, n): return [int(c) for c in "{0:08b}".format(n)] def addDatasetSample(self, argument, target): self.__dataset.addSample(argument, target) def train(self, epochs): self.__trainer.trainEpochs(epochs) def activate(self, information): result = self.__network.activate(self._binaryList(information)) highest = (0,0) for resultClass in range(len(result)): if result[resultClass] > highest[0]: highest = (result[resultClass], resultClass) return highest[1]
def test(self,filename,classes,trainer,net): testLabels = [] #load test data tstdata = ClassificationDataSet(103, 1, nb_classes=classes) tstdata = self.loaddata(filename, classes) testLabels = tstdata['target']; # some sort of mandatory conversion tstdata._convertToOneOfMany() # using numpy array output = np.array([net.activate(x) for x, _ in tstdata]) output = output.argmax(axis=1) print(output) print("on test data",percentError( output, tstdata['class'] )) for i, l in enumerate(output): print l, '->', testLabels[i][0] # alternate version - using activateOnDataset function out = net.activateOnDataset(tstdata).argmax(axis=1) print out return percentError( out, tstdata['class'])
def build_sample_nn(): means = [(-1,0),(2,4),(3,1)] cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])] alldata = ClassificationDataSet(2, 1, nb_classes=3) for n in xrange(400): for klass in range(3): input = multivariate_normal(means[klass],cov[klass]) alldata.addSample(input, [klass]) tstdata_temp, trndata_temp = alldata.splitWithProportion(0.25) tstdata = ClassificationDataSet(2, 1, nb_classes=3) for n in xrange(0, tstdata_temp.getLength()): tstdata.addSample( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1] ) trndata = ClassificationDataSet(2, 1, nb_classes=3) for n in xrange(0, trndata_temp.getLength()): trndata.addSample( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1] ) trndata._convertToOneOfMany( ) tstdata._convertToOneOfMany( ) fnn = buildNetwork( trndata.indim, 5, trndata.outdim, outclass=SoftmaxLayer ) trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01) return trainer, fnn, tstdata
def get_ds_for_pybrain(X,y): ds = ClassificationDataSet(2127,nb_classes=5) tuples_X = [tuple(map(float,tuple(x))) for x in X.values] tuples_y = [tuple(map(float,(y,))) for y in y.values] for X,y in zip(tuples_X,tuples_y): ds.addSample(X,y) ds._convertToOneOfMany() return ds
def main(): random.seed(50) data, digit = read_data(DATAFILE) # ds = ClassificationDataSet(64, 1, nb_classes=10) # # # for i in xrange(len(data)): # ds.addSample(data[i], [digit[i]]) # ds._convertToOneOfMany() # # simple_network(data, digit, ds) # one_hidden_layer(data, digit, ds) n_folds = 5 perms = np.array_split(np.arange(len(data)), n_folds) simple_results = [] one_hl_results = [] creative_results = [] for i in xrange(n_folds): train_ds = ClassificationDataSet(64, 1, nb_classes = 10) test_ds = ClassificationDataSet(64, 1, nb_classes = 10) train_perms_idxs = range(n_folds) train_perms_idxs.pop(i) temp_list = [] for train_perms_idx in train_perms_idxs: temp_list.append(perms[ train_perms_idx ]) train_idxs = np.concatenate(temp_list) for idx in train_idxs: train_ds.addSample(data[idx], [digit[idx]]) train_ds._convertToOneOfMany() # determine test indices test_idxs = perms[i] for idx in test_idxs: test_ds.addSample(data[idx], [digit[idx]]) test_ds._convertToOneOfMany() simple_results.append(simple_network(data, digit, train_ds, test_ds)) one_hl_results.append(one_hidden_layer(data, digit, train_ds, test_ds)) creative_results.append(creative_network(data, digit, train_ds, test_ds)) for i in xrange(len(simple_results)): print 'Simple %f : Hidden %f : Creative %f' % (simple_results[i], one_hl_results[i], creative_results[i]) print 'Simple mean: %f' % np.mean(simple_results) print 'One hidden layer mean: %f' % np.mean(one_hl_results) print 'Creative mean : %f' % np.mean(creative_results) print "Simple vs onehl" paired_t_test(simple_results, one_hl_results) print "simple vs creative" paired_t_test(simple_results, creative_results) print "onehl vs creative" paired_t_test(one_hl_results, creative_results)
def nn_classify(): # train_X,Y = load_svmlight_file('data/train_metrix') # rows = pd.read_csv('data/log_test2.csv',index_col=0).sort_index().index.unique() # train_X = pd.read_csv('data/train_tfidf.csv',index_col=0) # test_X = pd.read_csv('data/test_tfidf.csv',index_col=0) # select = SelectPercentile(f_classif, percentile=50) # select.fit(train_X,Y) # train_X = select.transform(train_X) # test_X = select.transform(test_X) # print 'dump train...' # dump_svmlight_file(train_X,Y,'data/train_last') # test_Y = [0]*(test_X.shape[0]) # print 'dump test...' # dump_svmlight_file(test_X,test_Y,'data/test_last') train_X,Y = load_svmlight_file('data/train_last') test_X,test_Y = load_svmlight_file('data/test_last') train_X = train_X.toarray() test_X = test_X.toarray() Y = [int(y)-1 for y in Y] print 'Y:',len(Y) rows = pd.read_csv('data/log_test2.csv',index_col=0).sort_index().index.unique() train_n = train_X.shape[0] m = train_X.shape[1] test_n = test_X.shape[0] print train_n,m,#test_n train_data = ClassificationDataSet(m,1,nb_classes=12) test_data = ClassificationDataSet(m,1,nb_classes=12) # test_data = ClassificationDataSet(test_n,m,nb_classes=12) for i in range(train_n): train_data.addSample(np.ravel(train_X[i]),Y[i]) for i in range(test_n): test_data.addSample(test_X[i],Y[i]) trndata = train_data # tstdata = train_data trndata._convertToOneOfMany() # tstdata._convertToOneOfMany() test_data._convertToOneOfMany() # 先用训练集训练出所有的分类器 print 'train classify...' fnn = buildNetwork( trndata.indim, 400 , trndata.outdim, outclass=SoftmaxLayer ) trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1, learningrate=0.01 , verbose=True, weightdecay=0.01) trainer.trainEpochs(3) # print 'Percent Error on Test dataset: ' , percentError( trainer.testOnClassData ( # dataset=tstdata ) # , ) print 'end train classify' pre_y = trainer.testOnClassData(dataset=trndata) print metrics.classification_report(Y,pre_y) pre_y = trainer.testOnClassData(dataset=test_data) print 'write result...' print 'before:',pre_y[:100] pre_y = [int(y)+1 for y in pre_y] print 'after:',pre_y[:100] DataFrame(pre_y,index=rows).to_csv('data/info_test2.csv', header=False) print 'end...'
def __init__(self, data, targets, cv_data, cv_targets, extra, layers, epochs=1, smoothing=1, new=True, filename_in=False): if len(cv_data) != len(cv_targets): raise Exception("Number of CV data and CV targets must be equal") if len(data) != len(targets): raise Exception("Number of data and targets must be equal") if new: class_tr_targets = [str(int(t[0]) - 1) for t in targets] # for pybrain's classification datset print "...training the DNNRegressor" if len(layers) > 2: # TODO testing only net = DNNRegressor(data, extra, class_tr_targets, layers, hidden_layer="TanhLayer", final_layer="SoftmaxLayer", compression_epochs=epochs, bias=True, autoencoding_only=False) print "...running net.fit()" net = net.fit() elif len(layers) == 2: net = buildNetwork(layers[0], layers[-1], outclass=SoftmaxLayer, bias=True) ds = ClassificationDataSet(len(data[0]), 1, nb_classes=9) bag = 1 noisy, _ = self.dropout(data, noise=0.0, bag=bag, debug=True) bagged_targets = [] for t in class_tr_targets: for b in range(bag): bagged_targets.append(t) for i,d in enumerate(noisy): t = bagged_targets[i] ds.addSample(d, t) ds._convertToOneOfMany() print "...smoothing for epochs: ", smoothing self.model = net preds = [self.predict(d) for d in cv_data] cv = score(preds, cv_targets, debug=False) preds = [self.predict(d) for d in data] tr = score(preds, targets, debug=False) trainer = BackpropTrainer(net, ds, verbose=True, learningrate=0.0008, momentum=0.04, weightdecay=0.05) # best score 0.398 after 50 compression epochs and 200 epochs with lr=0.0008, weightdecay=0.05, momentum=0.04. Used dropout of 0.2 in compression, 0.5 in softmax pretraining, and no dropout in smoothing. print "Train score before training: ", tr print "CV score before training: ", cv for i in range(smoothing): trainer.train() self.model = net preds = [self.predict(d) for d in cv_data] cv = score(preds, cv_targets, debug=False) preds = [self.predict(d) for d in data] tr = score(preds, targets, debug=False) print "Train/CV score at epoch ", (i+1), ': ', tr, '/', cv #if i == 1: #print "...saving the model" #save("data/1000_ex_4_hidden/net_epoch_1.txt", net) #elif i == 3: #print "...saving the model" #save("data/1000_ex_4_hidden/net_epoch_3.txt", net) #elif i == 5: #print "...saving the model" #save("data/1000_ex_4_hidden/net_epoch_5.txt", net) print "...saving the model" #save("data/1000_ex_4_hidden/net_epoch_10.txt", net) else: model = load(filename_in) self.model = model
def createDataset(self, inputData): data = ClassificationDataSet(100,nb_classes=len(inputData.keys()), class_labels=inputData.keys()) allTheLetters = string.uppercase for i in range(300): for letter in inputData.keys(): data.addSample(inputData[letter], allTheLetters.index(letter)) data._convertToOneOfMany([0,1]) return data
def makeClassificationDataSet(X, Y, nb_classes=12): """ dim(X) = c(n,m) dim(Y) = c(n,1) the class of Y must be 0 ,1, 2 ..., where its label starts with 0 """ alldata = ClassificationDataSet(inp=X.shape[1], target=1, nb_classes=nb_classes) [alldata.addSample(X[row, :], [Y[row]]) for row in range(X.shape[0])] alldata._convertToOneOfMany() return alldata
def classificationDataSet(subjects=['a2','b','c1','c2'], segClass=0, db=None, seg_width=10, usePCA=True, n_components=5, isTrainingData=False): if not db: db = gyroWalkingData() if usePCA: DS = ClassificationDataSet(n_components*3, nb_classes=2) else: DS = ClassificationDataSet(21*3, nb_classes=2) for subject in subjects: # Initialise data if usePCA: raw = db.pca_dict(n_components=n_components, whiten=False)[subject] else: raw = db.data[subject][:,2:] gradients, standardDeviations = summaryStatistics(raw, std_window=seg_width) # Initialise segments if 0 <= segClass < 4: segs = [s for s,c in db.manual_gait_segments[subject] if c == segClass] else: segs = db.segments[subject] # Add data for i in range(0,len(raw)): """ # Look for segments in window, including those of other classes hasSeg = 0 hasOtherSeg = False for j in range(seg_width): if i+j in segs: hasSeg = 1 else: if i+j in zip(*db.manual_gait_segments[subject])[0]: hasOtherSeg = True if hasOtherSeg: hasSeg = 0 # Add segments to classifier, duplicating rare classes if it is training data for j in range(seg_width): if i+j < len(raw): DS.appendLinked( np.concatenate( [raw[i+j],gradients[i+j],standardDeviations[i+j]] ), [hasSeg] ) if isTrainingData and (hasSeg or hasOtherSeg): for i in range(0): DS.appendLinked( np.concatenate( [raw[i+j],gradients[i+j],standardDeviations[i+j]] ), [hasSeg] ) """ hasSeg = 0 if i in segs: hasSeg = 1 DS.appendLinked( np.concatenate( [raw[i],gradients[i],standardDeviations[i]] ), [hasSeg] ) DS._convertToOneOfMany() if isTrainingData: DS = balanceClassRatios(DS) return DS
def livetest(self,data): trainer, net = self.unpickleModel() testData = ClassificationDataSet(103, 1, nb_classes=9) testData.addSample(data[0],1); testData._convertToOneOfMany() out = net.activateOnDataset(testData).argmax(axis=1) percentError(out, testData['class']) print self.labelToLetter[str(out[0])] return self.labelToLetter[str(out[0])]
def _createDataSet(X, Y, one_based): labels = np.unique(Y) alldata = ClassificationDataSet(X.shape[1], nb_classes = labels.shape[0], class_labels = labels) shift = 1 if one_based else 0 for i in range(X.shape[0]): alldata.addSample(X[i], Y[i] - shift) alldata._convertToOneOfMany() return alldata
def createDataset(): data = ClassificationDataSet(100,nb_classes=len(lettersDict.keys()), class_labels=lettersDict.keys()) allTheLetters = string.uppercase for letter in lettersDict.keys(): data.addSample(lettersDict[letter], allTheLetters.index(letter)) data._convertToOneOfMany(bounds=[0, 1]) print data.calculateStatistics() return data
def buildDataset(labels, data): ''' builds and returns training and test datasets from user image mappings ''' DS = ClassificationDataSet(len(data[0][0].ravel()), 1, nb_classes=len(labels), class_labels=labels) for img, label in data: DS.addSample(img.ravel(), [label]) DS._convertToOneOfMany() return DS
def conv2DS(Xv,yv = None) : if yv == None : yv = np.asmatrix( np.ones( (Xv.shape[0],1) ) ) for j in range(len(classNames)) : yv[j] = j C = len(unique(yv.flatten().tolist()[0])) DS = ClassificationDataSet(M, 1, nb_classes=C) for i in range(Xv.shape[0]) : DS.appendLinked(Xv[i,:].tolist()[0], [yv[i].A[0][0]]) DS._convertToOneOfMany( ) return DS
def main(): in_data=np.genfromtxt('logit-train.csv', delimiter = ',') out_data = np.genfromtxt('logit-test.csv', delimiter = ',') #getting in the data from csv files and making it suitable for further action. in_data=in_data[~np.isnan(in_data).any(1)] t=len(in_data[0,:]) y_train=np.array(in_data[0:,t-1]) x_train=np.array(in_data[0:,:t-1]) scaler = preprocessing.StandardScaler().fit(x_train) #standardization plays an important role in all NN algos x_train=scaler.transform(x_train) #final x_train out_data=out_data[~np.isnan(out_data).any(1)] t=len(out_data[0,:]) y_test=np.array(out_data[0:,t-1]) x_test=np.array(out_data[0:,:t-1]) x_test=scaler.transform(x_test) # final x_test alltraindata=ClassificationDataSet(t-1,1,nb_classes=2) for count in range(len((in_data))): alltraindata.addSample(x_train[count],[y_train[count]]) alltraindata._convertToOneOfMany(bounds=[0,1]) alltestdata=ClassificationDataSet(t-1,1,nb_classes=2) for count in range(len((out_data))): alltestdata.addSample(x_test[count],[y_test[count]]) alltestdata._convertToOneOfMany(bounds=[0,1]) numRBFCenters = 10 #the 'h' value rbf=RBFNN(alltraindata.indim, alltraindata.outdim, numRBFCenters) rbf.train(alltraindata['input'],alltraindata['target']) testdata_target=rbf.test(alltestdata['input']) #values obtained after testing, T is a 'n x outdim' matrix testdata_target = testdata_target.argmax(axis=1) # the highest output activation gives the class. Selects the class predicted #testdata_target = testdata_target.reshape(len(in_data),1) #compare to y_test to obtain the accuracy. # count=0 # for x in range(len(y_test)): # if testdata_target[x] == y_test[x]: # count+=1 # tstresult2=float(count)/float(len(y_test)) * 100 tstresult = percentError(testdata_target,alltestdata['class']) print "Accuracy on test data is: %5.2f%%," % (100-tstresult)
def trainNN(data: list, targets: list, seed): """ Trains a neural network """ X_tweet_counts = count_vect.fit_transform(data) # Compute term frequencies and store in X_train_tf # Compute tfidf feature values and store in X_train_tfidf X_train_tfidf = tfidf_transformer.fit_transform(X_tweet_counts) arr = X_train_tfidf.toarray() trainingdata = arr[:int(.75 * len(arr))] testdata = arr[int(.75 * len(arr)):] trainingtargets = targets[:int(.75 * len(targets))] testtargets = targets[int(.75 * len(targets)):] trainingds = ClassificationDataSet(len(arr[0]), 1, nb_classes=2) testds = ClassificationDataSet(len(arr[0]), 1, nb_classes=2) for index, data in enumerate(trainingdata): trainingds.addSample(data, trainingtargets[index]) for index, data in enumerate(testdata): testds.addSample(data, testtargets[index]) trainingds._convertToOneOfMany() testds._convertToOneOfMany() net = buildNetwork(trainingds.indim, 10, 10, 10, trainingds.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer(net, dataset=trainingds, learningrate=.65, momentum=.1) besttrain = 99.9 besttest = 99.9 bestresults = [] bestclass = [] for i in range(20): trainer.trainEpochs(1) trainresult = percentError(trainer.testOnClassData(), trainingds['class']) teststuff = trainer.testOnClassData(dataset=testds) testresult = percentError(teststuff, testds['class']) if testresult < besttest: besttest = testresult besttrain = trainresult bestresults = teststuff bestclass = testds['class'] print("epoch: %2d" % trainer.totalepochs) print("train error: %2.2f%%" % trainresult) print("test error: %2.2f%%" % testresult) print("Best test error accuracy: {:.2f}%".format(besttest)) print("Best test error f1 score: {:.4f}%".format(f1_score(bestclass, bestresults, average='macro'))) print("Confusion Matrix:") print(confusion_matrix(bestclass, bestresults)) return besttest
def main(): means = [(-1,0),(2,4),(3,1)] cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])] alldata = ClassificationDataSet(2, 1, nb_classes=3) for n in xrange(400): for klass in range(3): input = multivariate_normal(means[klass],cov[klass]) alldata.addSample(input, [klass]) tstdata, trndata = alldata.splitWithProportion( 0.25 ) trndata._convertToOneOfMany( ) tstdata._convertToOneOfMany( ) print "Number of training patterns: ", len(trndata) print "Input and output dimensions: ", trndata.indim, trndata.outdim print "First sample (input, target, class):" print trndata['input'][0], trndata['target'][0], trndata['class'][0] fnn = buildNetwork( trndata.indim, 5, trndata.outdim, outclass=SoftmaxLayer ) trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01) ticks = arange(-3.,6.,0.2) X, Y = meshgrid(ticks, ticks) # need column vectors in dataset, not arrays griddata = ClassificationDataSet(2,1, nb_classes=3) for i in xrange(X.size): griddata.addSample([X.ravel()[i],Y.ravel()[i]], [0]) griddata._convertToOneOfMany() # this is still needed to make the fnn feel comfy for i in range(20): trainer.trainEpochs(1) trnresult = percentError( trainer.testOnClassData(), trndata['class'] ) tstresult = percentError( trainer.testOnClassData( dataset=tstdata ), tstdata['class'] ) print "epoch: %4d" % trainer.totalepochs, \ " train error: %5.2f%%" % trnresult, \ " test error: %5.2f%%" % tstresult out = fnn.activateOnDataset(griddata) out = out.argmax(axis=1) # the highest output activation gives the class out = out.reshape(X.shape) figure(1) ioff() # interactive graphics off clf() # clear the plot hold(True) # overplot on for c in [0,1,2]: here, _ = where(tstdata['class']==c) plot(tstdata['input'][here,0],tstdata['input'][here,1],'o') if out.max()!=out.min(): # safety check against flat field contourf(X, Y, out) # plot the contour ion() # interactive graphics on draw() # update the plot ioff() show()
def mlpClassifier(X,y,train_indices, test_indices, mom=0.1,weightd=0.01, epo=5): X_train, y_train, X_test, y_test = X[train_indices],y[train_indices], X[test_indices], y[test_indices] #Converting the data into a dataset which is easily understood by PyBrain. tstdata = ClassificationDataSet(X.shape[1],target=1,nb_classes=8) trndata = ClassificationDataSet(X.shape[1],target=1,nb_classes=8) # print "shape of X_train & y_train: " + str(X_train.shape) + str(y_train.shape) for i in range(y_train.shape[0]): trndata.addSample(X_train[i,:], y_train[i]) for i in range(y_test.shape[0]): tstdata.addSample(X_test[i,:], y_test[i]) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() #printing the specs of data # print "Number of training patterns: ", len(trndata) # print "Input and output dimensions: ", trndata.indim, trndata.outdim # print "First sample (input, target, class):" # print trndata['input'][0], trndata['target'][0], trndata['class'][0] #The neural-network used # print "Building Network..." #input layer, hidden layer of size 10(very small), output layer ANNc = FeedForwardNetwork() inLayer = LinearLayer(trndata.indim, name="ip") hLayer1 = TanhLayer(100, name = "h1") hLayer2 = SigmoidLayer(100, name = "h2") outLayer = SoftmaxLayer(trndata.outdim, name = "op") ANNc.addInputModule(inLayer) ANNc.addModule(hLayer1) ANNc.addModule(hLayer2) ANNc.addOutputModule(outLayer) ip_to_h1 = FullConnection(inLayer, hLayer1, name = "ip->h1") h1_to_h2 = FullConnection(hLayer1, hLayer2, name = "h1->h2") h2_to_op = FullConnection(hLayer2, outLayer, name = "h2->op") ANNc.addConnection(ip_to_h1) ANNc.addConnection(h1_to_h2) ANNc.addConnection(h2_to_op) ANNc.sortModules() # print "Done. Training the network." #The trainer used, in our case Back-propagation trainer trainer = BackpropTrainer( ANNc, dataset=trndata, momentum=mom, verbose=True, weightdecay=weightd) trainer.trainEpochs( epo ) #The error trnresult = percentError( trainer.testOnClassData(dataset=trndata), trndata['class'] ) tstresult = percentError( trainer.testOnClassData(dataset=tstdata ), tstdata['class'] ) # print "Done." return ANNc, trainer.totalepochs, (100 - trnresult), (100 - tstresult)
def EvaluateArtificialNeuralNetwork(training_data, Input_features, Output_feature, NUMBER_CLASSES, HIDDEN_NEURONS, NUMBER_LAYERS, dataset_name, ParameterVal): X = training_data[Input_features] Y = training_data[Output_feature] ds = ClassificationDataSet(X.shape[1], nb_classes=NUMBER_CLASSES) for k in xrange(len(X)): ds.addSample((X.ix[k,:]), Y.ix[k,:]) tstdata_temp, trndata_temp = ds.splitWithProportion(.25) tstdata = ClassificationDataSet(X.shape[1], nb_classes=NUMBER_CLASSES) for n in xrange(0, tstdata_temp.getLength()): tstdata.addSample( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1] ) trndata = ClassificationDataSet(X.shape[1], nb_classes=NUMBER_CLASSES) for n in xrange(0, trndata_temp.getLength()): trndata.addSample( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1] ) if NUMBER_CLASSES > 1: trndata._convertToOneOfMany( ) tstdata._convertToOneOfMany( ) '''*****Actual computation with one layer and HIDDEN_NEURONS number of neurons********''' fnn = buildNetwork( trndata.indim, HIDDEN_NEURONS , trndata.outdim, outclass=SoftmaxLayer ) trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1, verbose=False, weightdecay=0.01) trainer.trainUntilConvergence(maxEpochs=3) trnresult = percentError( trainer.testOnClassData(), trndata['class'] ) tstresult = percentError( trainer.testOnClassData(dataset=tstdata ), tstdata['class'] ) print ("Accuracy with Artificial Neural Network: epoch: " + str(trainer.totalepochs) + " TrainingSet:" + str(1-trnresult/100) + " TestSet:" + str(1-tstresult/100)) '''****** Graphical Representation*****''' '''tot_hidden_tests, X_train, X_test, Y_train, Y_test, training_error, test_error = InitiateErrorCalcData(ParameterVal, training_data[Input_features], training_data[Output_feature]) for hidden_unit in tot_hidden_tests: print ("Computing hidden unit :" + str(hidden_unit)) model = buildNetwork( trndata.indim, hidden_unit , trndata.outdim, outclass=SoftmaxLayer ) temp_trainer = BackpropTrainer( model, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01) temp_trainer.trainUntilConvergence(maxEpochs=3) training_error.append(MSE( temp_trainer.testOnClassData(), trndata['class'] )) test_error.append(MSE( temp_trainer.testOnClassData(dataset=tstdata ), tstdata['class'] )) PlotErrors(tot_hidden_tests, training_error, test_error, dataset_name, "Number of Hidden Units for single layer ANN", "MSE")''' '''*****Graphical representation with multiple layers and HIDDEN_NEURONS number of neurons********''' '''ffn = FeedForwardNetwork()
def create_dataset(filename): dataset = ClassificationDataSet(13, 1, class_labels=['0', '1', '2']) football_data = FootballDataCsv(filename) total_min = football_data.total_min() total_max = football_data.total_max() for data in football_data: normalized_features = [normalize(x, min_value=total_min, max_value=total_max) for x in data.to_list()] dataset.addSample(normalized_features, [data.binarized_output]) dataset.assignClasses() dataset._convertToOneOfMany() return dataset
def create_network(X,Y,testx,testy): numOfFeature=X.shape[1] numOfExample=X.shape[0] alldata = ClassificationDataSet(numOfFeature, 1, nb_classes=10) #创建分类数据组 for i in range(0,numOfExample): alldata.addSample(X[i], Y[i]) alldata._convertToOneOfMany() numOfFeature1=testx.shape[1] numOfExample1=testx.shape[0] testdata = ClassificationDataSet(numOfFeature1, 1, nb_classes=10) #创建分类数据组 for i in range(0,numOfExample1): testdata.addSample(testx[i],testy[i]) testdata._convertToOneOfMany() print alldata.indim print alldata.outdim net = FeedForwardNetwork() inLayer = LinearLayer(alldata.indim) hiddenLayer1 = SigmoidLayer(60) #层数自己定,但是从训练效果来看,并不是网络层数和节点数越多越好 hiddenLayer2 = SigmoidLayer(60) outLayer = SoftmaxLayer(alldata.outdim) #bias = BiasUnit('bias') net.addInputModule(inLayer) net.addModule(hiddenLayer1) net.addModule(hiddenLayer2) net.addOutputModule(outLayer) #net.addModule(bias) in_to_hidden = FullConnection(inLayer, hiddenLayer1) hidden_to_out = FullConnection(hiddenLayer2, outLayer) hidden_to_hidden = FullConnection(hiddenLayer1, hiddenLayer2) net.addConnection(in_to_hidden) net.addConnection(hidden_to_hidden) net.addConnection(hidden_to_out) net.sortModules() #fnn = buildNetwork( alldata.indim, 100, alldata.outdim, outclass=SoftmaxLayer ) trainer = BackpropTrainer( net, dataset=alldata, momentum=0.1, verbose=True, weightdecay=0.01) for i in range(0,20): print i trainer.trainEpochs( 1 ) #将数据训练一次 print "train finish...." outtrain = net.activateOnDataset(alldata) outtrain = outtrain.argmax(axis=1) # the highest output activation gives the class,每个样本取最大概率的类 out=[[1],[2],[3],[2]...] outtest = net.activateOnDataset(testdata) outtest = outtest.argmax(axis=1) # the highest output activation gives the class,每个样本取最大概率的类 out=[[1],[2],[3],[2]...] trnresult = percentError( outtrain,alldata['class'] ) tstresult = percentError( outtest,testdata['class'] ) #trnresult = percentError( trainer.testOnClassData(dataset=alldata),alldata['class'] ) #tstresult = percentError( trainer.testOnClassData(dataset=testdata),testdata['class'] ) print "epoch: %4d" % trainer.totalepochs," train error: %5.2f%%" % trnresult," test error: %5.2f%%" % tstresult return net
def predict(self,place,timestamp): sample=self.__prepare_features(place,timestamp) griddata = ClassificationDataSet(2,1, nb_classes=self.num_of_places) griddata.addSample(sample, [0]) griddata._convertToOneOfMany() out = self.fnn.activateOnDataset(griddata) index=out.argmax(axis=1) result=None for key in self.places_indexes: if self.places_indexes[key]==index: result=key return result
target = (y == 1) * 1 # target = y + 1 # target = y for i in xrange(N_train): if y[i] != 0: train_data.addSample(X_new[i, ], [target[i]]) for i in xrange(N_train + 1, N_test_end): if y[i] != 0: test_data.addSample(X_new[i, ], [target[i]]) for i in xrange(X_new.shape[0]): all_data.addSample(X_new[i, ], [target[i]]) train_data._convertToOneOfMany() test_data._convertToOneOfMany() all_data._convertToOneOfMany() print("building") fnn = buildNetwork(train_data.indim, 6, train_data.outdim, fast=True, outclass=SoftmaxLayer) trainer = BackpropTrainer(fnn, dataset=train_data, momentum=0.2, verbose=True, learningrate=0.05, lrdecay=1.0)
class_labels=['jovem', 'adulto', 'idoso']) for n in range(test_data_temp.getLength()): test_data.addSample( test_data_temp.getSample(n)[0], test_data_temp.getSample(n)[1]) val_data = ClassificationDataSet(2, 1, nb_classes=3, class_labels=['jovem', 'adulto', 'idoso']) for n in range(val_data_temp.getLength()): val_data.addSample( val_data_temp.getSample(n)[0], val_data_temp.getSample(n)[1]) train_data._convertToOneOfMany(bounds=[0, 1]) test_data._convertToOneOfMany(bounds=[0, 1]) val_data._convertToOneOfMany(bounds=[0, 1]) from pybrain.structure.modules import SoftmaxLayer from pybrain.utilities import percentError net = buildNetwork(train_data.indim, 5, train_data.outdim, outclass=SoftmaxLayer) def show_weights(net): for mod in net.modules: for conn in net.connections[mod]:
p[2] = 0 if all(board == r[0]): p[3] = 0 # p /= p.sum() # return random.choice(arange(4), p = p) return p.argmax() if __name__ == '__main__': tr_x = load('rec_board.npy') tr_y = load('rec_move.npy') tr_x = con1(tr_x.T) print tr_x.shape print tr_y.shape data = ClassificationDataSet(tr_x.shape[1], 1, nb_classes = 4) for ind, ele in enumerate(tr_x): data.addSample(ele, tr_y[ind]) data._convertToOneOfMany() print data.outdim fnn = buildNetwork(data.indim, 10, 10, data.outdim, hiddenclass=SigmoidLayer, outclass=SoftmaxLayer ) trainer = BackpropTrainer( fnn, dataset=data)#, momentum=0.1, verbose=True, weightdecay=0.01) for i in xrange(3): print trainer.train() #trainer.trainUntilConvergence() game = _2048(length = 4) game.mul_test(100, lambda a, b, c, d, e: softmax_dec(a, b, c, d, e, f = fnn.activate), addition_arg = True)
verbose=True, weightdecay=0.01) trainer.trainUntilConvergence() #trainer.trainEpochs(5) print "trained" #trainer.trainEpochs(5) # Return a functor that wraps calling predict return NeuralNetworkClassifier(trainer) if __name__ == "__main__": # First obtain our training and testing data # Training has 50K samples, Testing 100K Xt, Yt, Xv = load_validation_data() # Run Neural Network over training data classifier = classify(Xt, Yt) # Prepare validation data and predict tstdata = ClassificationDataSet(Xv.shape[1], 1, nb_classes=2) tstdata.setField('input', Xv) tstdata._convertToOneOfMany() # one output neuron per class predictions = classifier.predict(tstdata) # Write prediction to file write_test_prediction("out_nn.txt", np.array(majority))
# [set Data] #CSV_TRAIN = "dataset/train_na2zero.csv" #CSV_TEST = "dataset/test_na2zero.csv" CSV_TRAIN = "dataset/train_zero_60x60.csv" CSV_TEST = "dataset/test_zero_60x60.csv" df_train = pd.read_csv(CSV_TRAIN) Y = df_train.y Y = Y -1 # in order to make target in the range of [0, 1, 2, 3, ...., 11] X = df_train.iloc[:, 1:].values alldata = ClassificationDataSet(inp=X.shape[1], target=1, nb_classes=12) for i in range(X.shape[0]): alldata.addSample(X[i, :], [Y[i]]) alldata._convertToOneOfMany() df_test = pd.read_csv(CSV_TEST) test_X = df_test.iloc[:, 1:].values print "Number of training patterns: ", len(alldata) print "Input and output dimensions: ", alldata.indim, alldata.outdim print "First sample (input, target, class):" print alldata['input'][0], alldata['target'][0], alldata['class'][0] ############################################################################# # fnn n = buildNetwork(alldata.indim, 1000, 1000, 1000, alldata.outdim, outclass=SoftmaxLayer, bias=True) print("\n[ Network Structure]\n",n) #############################################################################
for i in range(len(Y)): y = 0 if Y['好瓜_是'][i] == 1: y = 1 ds.appendLinked(X.ix[i], y) ds.calculateStatistics() # 返回一个类直方图?搞不懂在做什么 # Step 4: 分开测试集和训练集 testdata = ClassificationDataSet(19, 1, nb_classes=2, class_labels=labels) testdata_temp, traindata_temp = ds.splitWithProportion(0.25) for n in range(testdata_temp.getLength()): testdata.appendLinked( testdata_temp.getSample(n)[0], testdata_temp.getSample(n)[1]) print(testdata) testdata._convertToOneOfMany() print(testdata) traindata = ClassificationDataSet(19, 1, nb_classes=2, class_labels=labels) for n in range(traindata_temp.getLength()): traindata.appendLinked( traindata_temp.getSample(n)[0], traindata_temp.getSample(n)[1]) traindata._convertToOneOfMany() ''' # 使用sklean的OneHotEncoder # 缺点是只能单列进行操作,最后再复合,麻烦 from sklearn.preprocessing import OneHotEncoder from sklearn.preprocessing import LabelEncoder a = LabelEncoder().fit_transform(df[df.columns[0]]) # dataset_One = OneHotEncoder.fit(df.values[]) # print(df['色泽']) # 单独的Series?
def exec_algo(xml_file, output_location): rootObj=ml.parse(xml_file) #Getting the root element so that we get the subclasses and its members and member function file=open(rootObj.MachineLearning.classification.datafile) var_inp=rootObj.MachineLearning.classification.input var_out=rootObj.MachineLearning.classification.output classes=rootObj.MachineLearning.classification.classes DS=ClassificationDataSet(var_inp,var_out,nb_classes=classes) for line in file.readlines(): data=[float(x) for x in line.strip().split(',') if x != ''] inp=tuple(data[:var_inp]) output=tuple(data[var_inp:]) DS.addSample(inp,output) split=rootObj.MachineLearning.classification.split tstdata,trndata=DS.splitWithProportion(split) trdata=ClassificationDataSet(trndata.indim,var_out,nb_classes=classes) tsdata=ClassificationDataSet(tstdata.indim,var_out,nb_classes=classes) for i in xrange(trndata.getLength()): trdata.addSample(trndata.getSample(i)[0],trndata.getSample(i)[1]) for i in xrange(tstdata.getLength()): tsdata.addSample(tstdata.getSample(i)[0],tstdata.getSample(i)[1]) trdata._convertToOneOfMany() tsdata._convertToOneOfMany() hiddenNeurons=rootObj.MachineLearning.classification.algorithm.RadialBasisFunctionNetwork.hiddenNeurons fnn=FeedForwardNetwork() inputLayer=LinearLayer(trdata.indim) hiddenLayer=GaussianLayer(hiddenNeurons) outputLayer=LinearLayer(trdata.outdim) fnn.addInputModule(inputLayer) fnn.addModule(hiddenLayer) fnn.addOutputModule(outputLayer) in_to_hidden=FullConnection(inputLayer,hiddenLayer) hidden_to_outputLayer=FullConnection(hiddenLayer,outputLayer) fnn.addConnection(in_to_hidden) fnn.addConnection(hidden_to_outputLayer) fnn.sortModules() learningrate=rootObj.MachineLearning.classification.algorithm.RadialBasisFunctionNetwork.learningRate momentum=rootObj.MachineLearning.classification.algorithm.RadialBasisFunctionNetwork.momentum epochs=rootObj.MachineLearning.classification.algorithm.RadialBasisFunctionNetwork.epochs trainer=BackpropTrainer(fnn,dataset=trdata, verbose=True, learningrate=learningrate, momentum=momentum) trainer.trainEpochs(epochs=epochs) #trainer.train() #trainer.trainUntilConvergence(dataset=trdata, maxEpochs=500, verbose=True, continueEpochs=10, validationProportion=0.25) trresult=percentError(trainer.testOnClassData(),trdata['class']) #testingResult=percentError(trainer.testOnClassData(dataset=tsdata),tsdata['class']) #print "Training accuracy : %f , Testing Accuracy: %f" % (100-trresult,100-testingResult) print "Training accuracy : %f " % (100-trresult) ts=time.time() directory = output_location + sep + str(int(ts)) ; makedirs(directory) fileObject=open(output_location + sep + str(int(ts)) + sep + 'pybrain_RBF','w') pickle.dump(trainer,fileObject) pickle.dump(fnn,fileObject) fileObject.close()
def model_net(self, fields, datas=None): # 对需要处理的数据进行归一化处理,防止大数吃掉小数 # https://www.jianshu.com/p/682c24aef525 用python做数据分析4|pandas库介绍之DataFrame基本操作 # 归一 https://www.zhihu.com/question/57509028 # 标准化和归一化什么区别? https://www.zhihu.com/question/20467170 # sklearn库中数据预处理函数fit_transform()和transform()的区别 http://blog.csdn.net/quiet_girl/article/details/72517053 # 需具体了解其实现方式 from sklearn.preprocessing import MinMaxScaler from pybrain.structure import SoftmaxLayer from pybrain.datasets import ClassificationDataSet from pybrain.tools.shortcuts import buildNetwork from pybrain.supervised.trainers import BackpropTrainer from pybrain.utilities import percentError from pybrain.structure import TanhLayer scaler = MinMaxScaler() datas[fields] = scaler.fit_transform(datas[fields]) tran_data = datas[fields].values tran_target = datas['Flag'].values tran_label = ['Sell', 'Hold', 'Buy'] class_datas = ClassificationDataSet(6, 1, nb_classes=3, class_labels=tran_label) print(type(tran_target)) print(tran_target) for i in range(len(tran_data)): class_datas.appendLinked(tran_data[i], tran_target[i]) tstdata_temp, trndata_temp = class_datas.splitWithProportion(0.25) print(len(tstdata_temp), len(trndata_temp)) tstdata = ClassificationDataSet(6, 1, nb_classes=3, class_labels=tran_label) trndata = ClassificationDataSet(6, 1, nb_classes=3, class_labels=tran_label) for n in range(0, trndata_temp.getLength()): trndata.appendLinked( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1]) for n in range(0, tstdata_temp.getLength()): tstdata.appendLinked( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1]) tstdata._convertToOneOfMany() trndata._convertToOneOfMany() tnet = buildNetwork(trndata.indim, 5, trndata.outdim, hiddenclass=TanhLayer, outclass=SoftmaxLayer) trainer = BackpropTrainer(tnet, dataset=trndata, batchlearning=True, momentum=0.1, verbose=True, weightdecay=0.01) for i in range(5000): trainer.trainEpochs(20) trnresult = percentError(trainer.testOnClassData(), trndata['class']) testResult = percentError(trainer.testOnClassData(dataset=tstdata), tstdata['class']) print("epoch: %4d" % trainer.totalepochs, \ " train error: %5.2f%%" % trnresult, \ " test error: %5.2f%%" % testResult) return trainer, class_datas
all_data.setField('input', raw_inputs) all_data.setField('target', raw_target) all_data.setField('class', raw_target) test_data_temp, training_data_temp = all_data.splitWithProportion(0.33) test_data = ClassificationDataSet(9, 1, nb_classes=2, class_labels=['Benign','Malignant']) for n in xrange(0, test_data_temp.getLength()): test_data.addSample(test_data_temp.getSample(n)[0], test_data_temp.getSample(n)[1]) training_data = ClassificationDataSet(9, 1, nb_classes=2, class_labels=['Benign','Malignant']) for n in xrange(0, training_data_temp.getLength()): training_data.addSample(training_data_temp.getSample(n)[0], training_data_temp.getSample(n)[1]) training_data._convertToOneOfMany() test_data._convertToOneOfMany() #********************End of Data Preparation*************************** #********************NN With BackPropagation*************************** fnn_backprop = buildNetwork(training_data.indim, 2, training_data.outdim, bias=True, outclass=SoftmaxLayer) trainer = BackpropTrainer(fnn_backprop, dataset=training_data, momentum=0.1, verbose=True, weightdecay=0.01) epochs = 10 epoch_v = [] trnerr_backprop = [] tsterr_backprop = [] for i in xrange(epochs): # If you set the 'verbose' trainer flag, this will print the total error as it goes.
def get_training_object(train_list, feature, duration, delta_bool, delta2_bool, base_path): syl_list = [] for t in train_list: syl_obj = Utility.load_obj(t) syl_list += syl_obj.syllables_list syllable_management_object = SyllableDatabaseManagement( syllable_list=syl_list) Y, names, tone, stress, syllable_short_long_type, syllalbe_position, phoneme, syllable_type = syllable_management_object.get_GP_LVM_training_data( feature_key=feature, dur_position=duration, # dur_position=[], delta_bool=delta_bool, delta2_bool=delta2_bool, num_sampling=50, get_only_stress='1') tone = np.array(tone) Y = np.array(Y) for r in range(len(Y[0])): Y[:, r] = preprocessing.normalize(Y[:, r]) arr = np.arange(len(Y)) np.random.shuffle(arr) label_feature = tone alldata = ClassificationDataSet(len(Y[0]), 1, nb_classes=len(set(label_feature))) for a in arr: alldata.addSample(Y[a], label_feature[a]) alldata._convertToOneOfMany() if Utility.is_file_exist('{}/GP_model.npy'.format(base_path)): model = Utility.load_obj('{}/GP_model.npy'.format(base_path)) input_sensitivity = model.input_sensitivity() latent_data = np.array( Utility.load_obj('{}/GP_model.npy'.format(base_path)).X.mean) name_index = np.array( Utility.load_obj('{}/name_index.npy'.format(base_path))) latent_Y = [] for n in names: ind = np.where(name_index == n) # print latent_data[ind][0].shape latent_Y.append(latent_data[ind][0]) latent_Y = np.array(latent_Y) print latent_Y.shape for r in range(len(latent_Y[0])): # latent_Y[:,r] = preprocessing.normalize(latent_Y[:,r]) latent_Y[:, r] = preprocessing.normalize(latent_Y[:, r] * input_sensitivity[r]) lat_data = ClassificationDataSet(len(latent_Y[0]), 1, nb_classes=len(set(label_feature))) for a in arr: # print latent_Y[a], a lat_data.addSample(latent_Y[a], label_feature[a]) else: lat_data = ClassificationDataSet(len(latent_Y[0]), 1, nb_classes=len(set(label_feature))) lat_data._convertToOneOfMany() return (alldata, lat_data)
def hillclimb(domain,costf): # Create a random solution sol=[random.randint(domain[i][0],domain[i][1]) for i in range(len(domain))] # Main loop while 1: # Create list of neighboring solutions neighbors=[] for j in range(len(domain)): # One away in each direction if sol[j]>domain[j][0]: neighbors.append(sol[0:j]+[sol[j]+1]+sol[j+1:]) if sol[j]<domain[j][1]: neighbors.append(sol[0:j]+[sol[j]-1]+sol[j+1:]) # See what the best solution amongst the neighbors is current=costf(sol) best=current for j in range(len(neighbors)): cost=costf(neighbors[j]) if cost<best: best=cost sol=neighbors[j] # If there's no improvement, then we've reached the top if best==current: break return sol def plot_learning_curve(x, training_erorr, test_error, graph_title, graph_xlabel, graph_ylabel, ylim=None, xlim=None): plt.figure() plt.title(graph_title) if ylim is not None: plt.ylim(*ylim) if xlim is not None: plt.xlim(*xlim) plt.xlabel(graph_xlabel) plt.ylabel(graph_ylabel) train_error_mean = np.mean(training_erorr) train_error_std = np.std(training_erorr) test_error_mean = np.mean(test_error) test_error_std = np.std(test_error) plt.grid() plt.fill_between(x, training_erorr - train_error_std, training_erorr + train_error_std, alpha=0.1, color="r") plt.fill_between(x, test_error - test_error_std, test_error + test_error_std, alpha=0.1, color="g") print x print train_error_mean print training_erorr plt.plot(x, training_erorr, 'o-', color="r", label="Training score") plt.plot(x, test_error, 'o-', color="g", label="Test Score") plt.legend(loc="best") plt.savefig('plots/'+graph_title+'.png') plt.close() #plt.show() #************************End of Functions************************************************** #************************Start Data Prep******************************************** raw_data = np.genfromtxt('BreastCancerWisconsinDataset_modified.txt', delimiter=",", skip_header=1) raw_inputs = raw_data[:,0:-1] raw_target = raw_data[:,9:] assert (raw_inputs.shape[0] == raw_target.shape[0]),"Inputs count and target count do not match" all_data = ClassificationDataSet(9, 1, nb_classes=2, class_labels=['Benign','Malignant']) all_data.setField('input', raw_inputs) all_data.setField('target', raw_target) all_data.setField('class', raw_target) test_data_temp, training_data_temp = all_data.splitWithProportion(0.33) test_data = ClassificationDataSet(9, 1, nb_classes=2, class_labels=['Benign','Malignant']) for n in xrange(0, test_data_temp.getLength()): test_data.addSample(test_data_temp.getSample(n)[0], test_data_temp.getSample(n)[1]) training_data = ClassificationDataSet(9, 1, nb_classes=2, class_labels=['Benign','Malignant']) for n in xrange(0, training_data_temp.getLength()): training_data.addSample(training_data_temp.getSample(n)[0], training_data_temp.getSample(n)[1]) training_data._convertToOneOfMany() test_data._convertToOneOfMany() #********************End of Data Preparation*************************** #********************NN With GA*************************** def fitFunction (net, dataset=training_data, targetClass=training_data['class']): error = percentError(testOnClassData_custom(net, dataset=training_data), targetClass) return error stepSize = [.05, .5, 1] for s in stepSize: fnn_ga = buildNetwork(training_data.indim, 2, training_data.outdim, bias=True, outclass=SoftmaxLayer) domain = [(-1,1)]*len(fnn_ga.params) #print domain epochs = 20 epoch_v = [] trnerr_ga = [] tsterr_ga = [] iteration = 5 for i in xrange(epochs): winner = geneticoptimize(iteration,domain,fnn_ga,fitFunction,popsize=100,step=s, mutprob=0.2,elite=0.2) fnn_ga.params[:] = winner[:] training_error = fitFunction(fnn_ga, dataset=training_data, targetClass=training_data['class']) test_error = fitFunction(fnn_ga, dataset=test_data, targetClass=test_data['class']) epoch_v.append(i*iteration) trnerr_ga.append(training_error) tsterr_ga.append(test_error) print ("This is the training and test error at the epoch: ", training_error, test_error, i*iteration) ylim = (0, 70) xlim = (50, 1005) print ("This is epoch_value",epoch_v) print ("This is training ga",trnerr_ga) print ("This is test ga",tsterr_ga) plot_learning_curve(epoch_v, trnerr_ga, tsterr_ga, "Neural Network With GA_step_"+str(s), "Epochs", "Error %", ylim, xlim=None) #*****************End of GA NN******************************* print ("This is the length of the training and test data, respectively", len(training_data), len(test_data)) print (training_data.indim, training_data.outdim) print ("This is the shape of the input", all_data['input'].shape) print ("This is the shape of the target", all_data['target'].shape) print ("This is the shape of the class", all_data['class'].shape) print ("This is count of classes", all_data.nClasses) print ("Here is the statistics on the class", all_data.calculateStatistics()) print ("Here the linked fields", all_data.link) print ("This is the shape of the input in training", training_data['input'].shape) print ("This is the shape of the target in training", training_data['target'].shape) print ("This is the shape of the class in training", training_data['class'].shape) print ("This is the shape of the input in training", test_data['input'].shape) print ("This is the shape of the target in training", test_data['target'].shape) print ("This is the shape of the class in training", test_data['class'].shape)
10, trndata.outdim, hiddenclass=TanhLayer, outclass=SoftmaxLayer) trainer = BackpropTrainer(fnn, dataset=trndata, momentum=0.05, verbose=True, weightdecay=0.01) predictdata = ClassificationDataSet(5400, 1, nb_classes=29) for i in range(0, len(norm_test_X)): predictdata.addSample(norm_test_X[i], [0]) predictdata._convertToOneOfMany( ) # this is still needed to make the fnn feel comfy for i in range(2000): trainer.trainEpochs(1) trnresult = percentError(trainer.testOnClassData(), trndata['class']) tstresult = percentError(trainer.testOnClassData(dataset=tstdata), tstdata['class']) print "epoch: %4d" % trainer.totalepochs, \ " train error: %5.2f%%" % trnresult, \ " test error: %5.2f%%" % tstresult out = fnn.activateOnDataset(predictdata) out = out.argmax(axis=1) # the highest output activation gives the class result = [labels[e] for e in out] print result
def NNBackPropCustom(trainInputs, trainTarget, testInputs, testTarget, inputDim, targetDim, numClass, classLabels, bias=True, numHiddenLayers=2, numEpoch=10, momentum=0.1, weightdecay=0.01): #NN Data Preparation assert ( trainInputs.shape[0] == trainTarget.shape[0] ), "Inputs count and target count for your training data do not match for NN Analysis" assert ( testInputs.shape[0] == testTarget.shape[0] ), "Inputs count and target count for your test data do not match for NN Analysis" training_data = ClassificationDataSet(inputDim, targetDim, nb_classes=numClass, class_labels=classLabels) test_data = ClassificationDataSet(inputDim, targetDim, nb_classes=numClass, class_labels=classLabels) training_data.setField('input', trainInputs) training_data.setField('target', trainTarget) training_data.setField('class', trainTarget) test_data.setField('input', testInputs) test_data.setField('target', testTarget) test_data.setField('class', testTarget) training_data._convertToOneOfMany() test_data._convertToOneOfMany() # NN With BackPropagation fnn_backprop = buildNetwork(training_data.indim, numHiddenLayers, training_data.outdim, bias=bias, outclass=SoftmaxLayer) trainer = BackpropTrainer(fnn_backprop, dataset=training_data, momentum=momentum, verbose=True, weightdecay=weightdecay) epochs = numEpoch epoch_v = [] trnerr_backprop = [] tsterr_backprop = [] for i in xrange(epochs): # If you set the 'verbose' trainer flag, this will print the total error as it goes. trainer.trainEpochs(1) trnresult = percentError(trainer.testOnClassData(), training_data['class']) tstresult = percentError(trainer.testOnClassData(dataset=test_data), test_data['class']) print("epoch: %4d" % trainer.totalepochs, " train error: %5.2f%%" % trnresult, " test error: %5.2f%%" % tstresult) epoch_v.append(trainer.totalepochs) trnerr_backprop.append(trnresult) tsterr_backprop.append(tstresult) return epoch_v, trnerr_backprop, tsterr_backprop
def main(): in_data=np.genfromtxt('logit-train.csv', delimiter = ',') out_data = np.genfromtxt('logit-test.csv', delimiter = ',') #getting in the data from csv files and making it suitable for further action. in_data=in_data[~np.isnan(in_data).any(1)] t=len(in_data[0,:]) y_train=np.array(in_data[0:,t-1]) x_train=np.array(in_data[0:,:t-1]) scaler = preprocessing.StandardScaler().fit(x_train) #standardization plays an important role in all NN algos x_train=scaler.transform(x_train) #final x_train out_data=out_data[~np.isnan(out_data).any(1)] t=len(out_data[0,:]) y_test=np.array(out_data[0:,t-1]) x_test=np.array(out_data[0:,:t-1]) x_test=scaler.transform(x_test) # final x_test alltraindata=ClassificationDataSet(t-1,1,nb_classes=2) for count in range(len((in_data))): alltraindata.addSample(x_train[count],[y_train[count]]) alltraindata._convertToOneOfMany(bounds=[0,1]) alltestdata=ClassificationDataSet(t-1,1,nb_classes=2) for count in range(len((out_data))): alltestdata.addSample(x_test[count],[y_test[count]]) alltestdata._convertToOneOfMany(bounds=[0,1]) numRBFCenters = 10 #the 'h' value rbf=RBFNN(alltraindata.indim, alltraindata.outdim, numRBFCenters) rbf.train(alltraindata['input'],alltraindata['target']) testdata_target=rbf.test(alltestdata['input']) #values obtained after testing, T is a 'n x outdim' matrix testdata_target = testdata_target.argmax(axis=1) # the highest output activation gives the class. Selects the class predicted #testdata_target = testdata_target.reshape(len(in_data),1) #compare to y_test to obtain the accuracy. # count=0 # for x in range(len(y_test)): # if testdata_target[x] == y_test[x]: # count+=1 # tstresult2=float(count)/float(len(y_test)) * 100 tstresult = percentError(testdata_target,alltestdata['class']) print "Accuracy on test data is: %5.2f%%," % (100-tstresult) for x in range(len(y_test)): if any(y_test[x]) == True: y_test[x] = 1 else: y_test[x] = 0 average_label = ['micro','macro','weighted'] for label in average_label: f1 = f1_score(y_test, testdata_target, average=label) print "f1 score (%s)" %label, "is ", f1
def main(): in_data = np.genfromtxt('logit-train.csv', delimiter=',') out_data = np.genfromtxt('logit-test.csv', delimiter=',') #getting in the data from csv files and making it suitable for further action. in_data = in_data[~np.isnan(in_data).any(1)] t = len(in_data[0, :]) y_train = np.array(in_data[0:, t - 1]) x_train = np.array(in_data[0:, :t - 1]) scaler = preprocessing.StandardScaler().fit( x_train) #standardization plays an important role in all NN algos x_train = scaler.transform(x_train) #final x_train out_data = out_data[~np.isnan(out_data).any(1)] t = len(out_data[0, :]) y_test = np.array(out_data[0:, t - 1]) x_test = np.array(out_data[0:, :t - 1]) x_test = scaler.transform(x_test) # final x_test alltraindata = ClassificationDataSet(t - 1, 1, nb_classes=2) for count in range(len((in_data))): alltraindata.addSample(x_train[count], [y_train[count]]) alltraindata._convertToOneOfMany(bounds=[0, 1]) alltestdata = ClassificationDataSet(t - 1, 1, nb_classes=2) for count in range(len((out_data))): alltestdata.addSample(x_test[count], [y_test[count]]) alltestdata._convertToOneOfMany(bounds=[0, 1]) numRBFCenters = 50 kmeans = KMeans(n_clusters=numRBFCenters ) # KMeans to find the centroids for the RBF neurons. kmeans.fit(alltraindata['input']) centers = kmeans.cluster_centers_ #centers.shape = (numRBFCenters,13) cluster_distance = kmeans.transform(alltraindata['input']) #cluster_distance.shape = (152,10) and kmeans.labels_.shape = (152,) #cluster_distance.shape = (152,50) # Calculating the sigma/smoothness parameter of each Radial Basis Function # It is the variance/standard deviation of the points of each cluster, thus giving a value for each RBFcenter distance_std = [] distance_within_cluster = [] for lab in range(numRBFCenters): for x, label in enumerate(kmeans.labels_): if label == lab: distance_within_cluster.append(cluster_distance[x][label]) distance_std.append(np.std(distance_within_cluster)) rbf = RBFNN( alltraindata.indim, alltraindata.outdim, numRBFCenters, centers, distance_std) # Passing the centers array for RBFNN initialization rbf.train(alltraindata['input'], alltraindata['target']) testdata_target = rbf.test( alltestdata['input'] ) #values obtained after testing, T is a 'n x outdim' matrix testdata_target = testdata_target.argmax( axis=1 ) # the highest output activation gives the class. Selects the class predicted traindata_target = rbf.test(alltraindata['input']) traindata_target = traindata_target.argmax( axis=1 ) # the highest output activation gives the class. Selects the class predicted #compare to y_test to obtain the accuracy. # count=0 # for x in range(len(y_test)): # if testdata_target[x] == y_test[x]: # count+=1 # tstresult2=float(count)/float(len(y_test)) * 100 trnresult = percentError(traindata_target, alltraindata['class']) tstresult = percentError(testdata_target, alltestdata['class']) print "Accuracy on train data is: %5.2f%%," % (100 - trnresult) print "Accuracy on test data is: %5.2f%%," % (100 - tstresult) for x in range(len(y_test)): if any(y_test[x]) == True: y_test[x] = 1 else: y_test[x] = 0 average_label = ['micro', 'macro', 'weighted'] for label in average_label: f1 = f1_score(y_test, testdata_target, average=label) print "f1 score (%s)" % label, "is ", f1
def main(): """ CLI Arguments allowed: --display_graphs Displays graphs --retrain Trains a new model --cross-validate Runs cross validation to fine tune the model --test=validation_set Tests the latest trained model against the validation set --test=test_set Tests the latets trained model against the test set """ global trainer, classifier inputs_train, targets_train, inputs_valid, targets_valid, inputs_test, targets_test = load_parsed_data() if '--display_graphs' in sys.argv: display_graphs = True print('using {} percent of all data in corpus'.format(PERCENTAGE_DATA_SET_TO_USE*100)) print('using {} most common words as features'.format(NUM_FEATURES)) if not trained_model_exists() or '--retrain' in sys.argv: train_features, valid_features, test_features = extract_features( inputs_train[:len(inputs_train)*PERCENTAGE_DATA_SET_TO_USE], targets_train[:len(targets_train)*PERCENTAGE_DATA_SET_TO_USE], inputs_valid[:len(inputs_valid)*PERCENTAGE_DATA_SET_TO_USE], targets_valid[:len(targets_valid)*PERCENTAGE_DATA_SET_TO_USE], inputs_test[:len(inputs_test)*PERCENTAGE_DATA_SET_TO_USE], targets_test[:len(targets_test)*PERCENTAGE_DATA_SET_TO_USE] ) save_features(train_features, valid_features, test_features) pca = RandomizedPCA(n_components=N_COMPONENTS, whiten=False).fit(train_features) save_pca(pca) print ("Saved PCA") X_train = pca.transform(train_features) X_valid = pca.transform(valid_features) pca = None print ("Created PCAd features") valid_data = ClassificationDataSet(N_COMPONENTS, target=1, nb_classes=2) for i in range(len(X_valid)): valid_data.addSample(X_valid[i], targets_test[i]) valid_data._convertToOneOfMany() X_valid = None train_data = ClassificationDataSet(N_COMPONENTS, target=1, nb_classes=2) for i in range(len(X_train)): train_data.addSample( X_train[i], targets_train[i]) train_data._convertToOneOfMany() X_train = None classifier = buildNetwork( train_data.indim, N_HIDDEN, train_data.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer( classifier, dataset=train_data, momentum=0.1, learningrate=0.01 , verbose=True) train_model(train_data, valid_data) save_model(classifier) train_data = None valid_data = None else: train_features, valid_features, test_features = load_features() pca = load_pca() X_train = pca.transform(train_features) pca = None print ("Created PCAd features") train_data = ClassificationDataSet(N_COMPONENTS, target=1, nb_classes=2) for i in range(len(X_train)): train_data.addSample( X_train[i], targets_train[i]) train_data._convertToOneOfMany() X_train = None classifier = load_trained_model() trainer = BackpropTrainer( classifier, dataset=train_data, momentum=0.1, learningrate=0.01 , verbose=True) if '--test=validation_set' in sys.argv: print ("Running against validation set") pca = load_pca() X_valid = pca.transform(valid_features) pca = None valid_data = ClassificationDataSet(N_COMPONENTS, target=1, nb_classes=2) for i in range(len(X_valid)): valid_data.addSample( X_valid[i], targets_test[i]) valid_data._convertToOneOfMany() X_valid = None make_prediction(valid_data) if '--test=test_set' in sys.argv: print ("Running against test set") pca = load_pca() X_test = pca.transform(test_features) pca = None test_data = ClassificationDataSet(N_COMPONENTS, target=1, nb_classes=2) for i in range(len(X_test)): test_data.addSample( X_test[i], targets_test[i]) test_data._convertToOneOfMany() y_pred = trainer.testOnClassData(dataset=test_data) plot_precision_and_recall(y_pred, targets_test[:len(targets_test) * PERCENTAGE_DATA_SET_TO_USE]) X_test = None make_prediction(test_data)
class Brain(): """ Constructor. Input: hidden_nodes - number of hidden nodes used in the neuralnetwork """ def __init__(self, hidden_nodes=30): """ parameters to buildNetwork are inputs, hidden layers, output bias = true allows for a bias unit to be added in each neural net layer hiddenclass represents the method used by the hidden layer """ # Regression # self.classifier_neural_net = buildNetwork(12, hidden_nodes, 1, bias=True, hiddenclass=TanhLayer) # # Initializing dataset for supervised regression training # self.data_sets = SupervisedDataSet(12, 1) # # classification_trainer uses backpropagation supervised training method for training the newural network # self.classification_trainer = BackpropTrainer(self.classifier_neural_net, self.data_sets) # Classification self.classifier_neural_net = buildNetwork(12, hidden_nodes, 3, outclass=SoftmaxLayer, hiddenclass=TanhLayer) self.data_sets = ClassificationDataSet(12, 1, nb_classes=3) self.classification_trainer = BackpropTrainer( self.classifier_neural_net, self.data_sets, momentum=0.1, verbose=True, weightdecay=0.01) """ Method to add a sample image to the datasets for training the classifier """ def add_image_to_train(self, image_file, group_id): tto = io.twelve_tone(image_file) print(tto) # regression # self.data_sets.addSample(tto, (group_id,)) # classification self.data_sets.addSample(tto, [group_id]) def train(self): #classification_trainer.trainUntilConvergence() #this will take forever (possibly literally in the pathological case) # classification self.data_sets._convertToOneOfMany() # self.classification_trainer.trainEpochs(30) print("Converging...This is going to take long!") self.classification_trainer.trainUntilConvergence() def save(self, file_name="classifier.brain"): with open(get_path(file_name), 'wb') as file_pointer: pickle.dump(self.classifier_neural_net, file_pointer) def load(self, file_name="classifier.brain"): with open(get_path(file_name), 'rb') as file_pointer: self.classifier_neural_net = pickle.load(file_pointer) def accuracy(self): if len(self.data_sets) == 0: print "No data_sets found. Maybe you loaded the classifier from a file?" return # regression # tstresult = self.classifier_neural_net.activateOnDataset(self.data_sets) # print self.data_sets['target'] # tstresult = mean_squared_error(self.data_sets['target'], tstresult) # print "epoch: %4d" % self.classification_trainer.totalepochs, \ # "trainer error: %5.2f%%" % tstresult, \ # "trainer accuracy: %5.2f%%" % (100-tstresult) # classification tstresult = percentError( self.classification_trainer.testOnClassData( dataset=self.data_sets), self.data_sets['class']) print "epoch: %4d" % self.classification_trainer.totalepochs, \ "trainer error: %5.2f%%" % tstresult, \ "trainer accuracy: %5.2f%%" % (100-tstresult) def classify(self, image_file): score = self.classifier_neural_net.activate(io.twelve_tone(image_file)) print(score) # regression # score = round(score) # classification score = max(xrange(len(score)), key=score.__getitem__) print(score) if score == 0: return "chick-peas" elif score == 1: return "green-peas" else: return "rice"
def ANN(X_train, Y_train, X_test, Y_test, *args): """ An Artificial Neural Network, based on the python library pybrain. In the future this function should be modified to use the SkyNet ANN code instead. INPUTS: X_train - An array containing the features of the training set, of size (N_samples, N_features) Y_train - An array containing the class labels of the training set, of size (N_samples,) X_test - An array containing the features of the testeing set, of size (N_samples, N_features) Y_test - An array containing the class labels of the testing set, of size (N_samples) *args - Currently unused. In the future could specify the network architecture and activation functions at each node. OUTPUTS: probs - an array containing the probabilities for each class for each member of the testing set, of size (N_samples, N_classes) """ Y_train_copy = Y_train.copy() Y_test_copy = Y_test.copy() #Convert class labels from 1,2,3 to 0,1,2 as _convertToOneOfMany requires this Y_train_copy[(Y_train_copy==1)]=0 Y_train_copy[(Y_train_copy==2)]=1 Y_train_copy[(Y_train_copy==3)]=2 Y_test_copy[(Y_test_copy==1)]=0 Y_test_copy[(Y_test_copy==2)]=1 Y_test_copy[(Y_test_copy==3)]=2 #Put all the data in datasets as required by pybrain Y_train_copy = np.expand_dims(Y_train_copy, axis=1) Y_test_copy = np.expand_dims(Y_test_copy, axis=1) traindata = ClassificationDataSet(X_train.shape[1], nb_classes = len(np.unique(Y_train_copy))) #Preallocate dataset traindata.setField('input', X_train) #Add named fields traindata.setField('target', Y_train_copy) traindata._convertToOneOfMany() #Convert classes 0, 1, 2 to 001, 010, 100 testdata = ClassificationDataSet(X_test.shape[1], nb_classes=len(np.unique(Y_test_copy))) testdata.setField('input', X_test) testdata.setField('target', Y_test_copy) testdata._convertToOneOfMany() #Create ANN with n_features inputs, n_classes outputs and HL_size nodes in hidden layers N = pb.FeedForwardNetwork() HL_size1 = X_train.shape[1]*2+2 HL_size2 = X_train.shape[1]*2+2 #Create layers and connections in_layer = LinearLayer(X_train.shape[1]) hidden_layer1 = SigmoidLayer(HL_size1) hidden_layer2 = SigmoidLayer(HL_size2) out_layer = SoftmaxLayer(len(np.unique(Y_test_copy))) #Normalizes output so as to sum to 1 in_to_hidden1 = FullConnection(in_layer, hidden_layer1) hidden1_to_hidden2 = FullConnection(hidden_layer1, hidden_layer2) hidden2_to_out = FullConnection(hidden_layer2, out_layer) #Connect them up N.addInputModule(in_layer) N.addModule(hidden_layer1) N.addModule(hidden_layer2) N.addOutputModule(out_layer) N.addConnection(in_to_hidden1) N.addConnection(hidden1_to_hidden2) N.addConnection(hidden2_to_out) N.sortModules() #Create the backpropagation object trainer = BackpropTrainer(N, dataset=traindata, momentum=0.1, verbose=False, weightdecay=0.01) #Train the network on the data for some number of epochs for counter in np.arange(40): trainer.train() #Run the network on testing data probs = N.activate(X_test[0, :]) probs = np.expand_dims(probs, axis=0) for counter in np.arange(X_test.shape[0]-1): next_probs = N.activate(X_test[counter+1, :]) next_probs = np.expand_dims(next_probs, axis=0) probs = np.append(probs, next_probs, axis=0) return probs
ds.calculateStatistics() # split of training and testing dataset tstdata_temp, trndata_temp = ds.splitWithProportion(0.5) tstdata = ClassificationDataSet(30, 1, nb_classes=2) for n in range(0, tstdata_temp.getLength()): tstdata.appendLinked( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1]) trndata = ClassificationDataSet(30, 1, nb_classes=2) for n in range(0, trndata_temp.getLength()): trndata.appendLinked( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1]) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() ##### build net and training from pybrain.tools.shortcuts import buildNetwork from pybrain.structure.modules import SoftmaxLayer from pybrain.supervised.trainers import BackpropTrainer from pybrain.utilities import percentError n_hidden = 500 bp_nn = buildNetwork(trndata.indim, n_hidden, trndata.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer(bp_nn, dataset=trndata,
features_train = features_pd.iloc[:train_count] # print(features_train.describe()) features_test = features_pd.iloc[train_count:] # print(features_test.describe()) x_train, x_test, y_train, y_test = train_test_split(features_train, labels, test_size=0.2, random_state=1) X = (x_train, x_test, y_train, y_test) print(y_train) dsTrain = ClassificationDataSet(18, 1, nb_classes=2) rows = len(x_train) for row in range(rows): dsTrain.addSample(tuple(x_train.iloc[row]), y_train.iloc[row]) dsTrain._convertToOneOfMany() dsTest = ClassificationDataSet(18, 1, nb_classes=2) rows = len(x_test) for row in range(rows): dsTest.addSample(tuple(x_test.iloc[row]), y_test.iloc[row]) dsTest._convertToOneOfMany() if True: fnn = buildNetwork(18, 20, 20, 2, outclass=SoftmaxLayer) trainer = BackpropTrainer(fnn, dataset=dsTrain, momentum=0.1, verbose=True, weightdecay=0.01)
trnresult = percentError(trainer.testOnClassData(), trndata['class']) tstresult = percentError(trainer.testOnClassData(dataset=tstdata), tstdata['class']) print "epoch: %4d" % trainer.totalepochs, \ " train error: %5.2f%%" % trnresult, \ " test error: %5.2f%%" % tstresult if tstresult < previous_error: fnn = try_fnn previous_error = tstresult NetworkWriter.writeToFile(fnn, 'nn.xml') log.warning('Activating NeuralNetwork...') nginx_log = ClassificationDataSet(len(dictionary), 1, nb_classes=2) add_samples_to_training_set(nginx_log, options.log_file, 0) nginx_log._convertToOneOfMany( ) # this is still needed to make the fnn feel comfy out = fnn.activateOnDataset(nginx_log) out = out.argmax(axis=1) # the highest output activation gives the class with open(options.log_file) as log_file: cnt = 0 for line in log_file: try: entry = LogEntry(*nginx_log_re.match(line).groups()) if out[cnt]: print "BOT: ", else: print "GOOD: ", print "{0}".format(entry) cnt += 1
def trainet2(data, nhide=8, nhide1=8, epo=10, wd=.1, fn=''): alldata = data tstdata_temp, trndata_temp = alldata.splitWithProportion(0.5) tstdata = ClassificationDataSet(alldata.indim, nb_classes=alldata.nClasses) for n in range(0, tstdata_temp.getLength()): tstdata.addSample( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1]) trndata = ClassificationDataSet(alldata.indim, nb_classes=alldata.nClasses) for n in range(0, trndata_temp.getLength()): trndata.addSample( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1]) tstdata._convertToOneOfMany() trndata._convertToOneOfMany() net = FeedForwardNetwork() inLayer = LinearLayer(trndata.indim) hiddenLayer = TanhLayer(nhide) hiddenLayer1 = TanhLayer(nhide1) outLayer = LinearLayer(trndata.outdim) net.addInputModule(inLayer) net.addModule(hiddenLayer) net.addModule(hiddenLayer1) net.addOutputModule(outLayer) in_to_hidden = FullConnection(inLayer, hiddenLayer) hidden_to_hidden = FullConnection(hiddenLayer, hiddenLayer1) hidden_to_out = FullConnection(hiddenLayer1, outLayer) net.addConnection(in_to_hidden) net.addConnection(hidden_to_hidden) net.addConnection(hidden_to_out) net.sortModules() net.bias = True trainer = BackpropTrainer(net, dataset=trndata, verbose=True, weightdecay=wd, momentum=0.1) edata = [] msedata = [] for i in range(epo): trainer.trainEpochs(1) trnresult = percentError(trainer.testOnClassData(), trndata['class']) tstresult = percentError(trainer.testOnClassData(dataset=tstdata), tstdata['class']) tod = trainer.testOnData(verbose=False) print("epoch: %4d" % trainer.totalepochs, " train error: %5.2f%%" % trnresult, " test error: %5.2f%%" % tstresult, " layers: ", nhide1, " N_tourn: ", alldata.indim / 2) edata.append([trnresult, tstresult]) msedata.append([i, tod]) with open(fn + ".dta", 'w') as fp: json.dump(edata, fp) with open(fn + ".mse", 'w') as fp: json.dump(msedata, fp) return net
lin_clf = svm.LinearSVC() # creates a linear svm. lin_clf.fit(x_train, y_train) # trains the svm. y_hat['svm'] = lin_clf.predict(x_test) ############## ANN ################ print '\nTraining Artificial Neural Network' trndata = ClassificationDataSet(n_components) for i in range(0, y_train.size): # add data to the pybrain structure. trndata.addSample(x_train[i], y_train[i]) tstdata = ClassificationDataSet(n_components) for i in range(0, y_test.size): tstdata.addSample(x_test[i], y_test[i]) trndata._convertToOneOfMany() # convert the label to multidimension label. tstdata._convertToOneOfMany() n = FeedForwardNetwork() inLayer = LinearLayer(trndata.indim) hiddenLayer = SigmoidLayer(15) outLayer = LinearLayer(trndata.outdim) n.addInputModule(inLayer) n.addModule(hiddenLayer) n.addOutputModule(outLayer) in_to_hidden = FullConnection(inLayer, hiddenLayer) hidden_to_out = FullConnection(hiddenLayer, outLayer) n.addConnection(in_to_hidden)
# construimos red de forma rapido con todo lo anterior hecho con ela tajo buildnetwork fnn = buildNetwork(trndata.indim, 5, trndata.outdim, outclass=SoftmaxLayer) # preparamos en entrenamiento trainer = BackpropTrainer(fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01) # generamos una matriz de datosy ticks = arange(-3., 6., 0.2) X, Y = meshgrid(ticks, ticks) # necesitamos un vecto columan en el dataset, sin punteros griddata = ClassificationDataSet(2, 1, nb_classes=3) for i in xrange(X.size): griddata.addSample([X.ravel()[i], Y.ravel()[i]], [0]) griddata._convertToOneOfMany() # hace la red fiable # comenzamos las iteraciones de entreno for i in range(20): trainer.trainEpochs(1) trnresult = percentError(trainer.testOnClassData(), trndata['class']) tstresult = percentError(trainer.testOnClassData(dataset=tstdata), tstdata['class']) print "epoch: %4d" % trainer.totalepochs, \ " train error: %5.2f%%" % trnresult, \ " test error: %5.2f%%" % tstresult out = fnn.activateOnDataset(griddata) out = out.argmax(axis=1) # the highest output activation gives the class out = out.reshape(X.shape) figure(1) ioff() # interactive graphics off
dataFile1.close() dataFile2.close() dataFile3.close() dataFile4.close() dataFile5.close() images = np.append(data1['data'], [data2['data'], data3['data'], data4['data'], data5['data']]) labels = np.append(data1['labels'], [data2['labels'], data3['labels'], data4['labels'], data5['labels']]) # Construct the classification data set for learning print 'Constructing the Data Set' dataSet = ClassificationDataSet(3072, 1, nb_classes = 10) for index in range(0, labels.size): dataSet.addSample(images[index], labels[index]) dataSet._convertToOneOfMany(); # Train the neural network print 'Training the Neural Network' trainer = BackpropTrainer(network, dataset = dataSet, momentum = 0.1, verbose = True, weightdecay = 0.01) trainer.trainEpochs(5) # Save the neural network to a file for later use print 'Saving to File' networkFile = open('trainedNet1.cpkl', 'w') cPickle.dump(network, networkFile) network print 'Finished Training Network'
variations = [0 for y in range(n_classes)] #print(statistics.pvariance(points[x])) #variations[x] = results[x][1] / results[x][0] variations = [calculate_variance(point,center) for point,center in zip(points,centers)] print(centers,variations) entries = pseudo_samples(data) train_data = ClassificationDataSet(n_classes, 1,nb_classes=n_output) for n in range(0, len(entries)): train_data.addSample( entries[n], [data[n][-1]]) train_data._convertToOneOfMany( ) for epochs in range(6): rights = 0 cont = 0 for i in range(len(train_data["input"])): #print("<") results = [] for j in range(len(output_weights)): add_bias = [1] add_bias.extend(train_data["input"][i]) #print(add_bias) total = 0 for x in range(len(add_bias)):#CALCULANDO CADA SAÍDA
tstdata_temp, trndata_temp = alldata.splitWithProportion(0.25) tstdata = ClassificationDataSet(2, 1, nb_classes=3) for n in xrange(0, tstdata_temp.getLength()): tstdata.addSample( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1]) trndata = ClassificationDataSet(2, 1, nb_classes=3) for n in xrange(0, trndata_temp.getLength()): trndata.addSample( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1]) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() print "Number of training patterns: ", len(trndata) print "Input and output dimensions: ", trndata.indim, trndata.outdim print "First sample (input, target, class):" print trndata['input'][0], trndata['target'][0], trndata['class'][0] fnn = buildNetwork(trndata.indim, 5, trndata.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer(fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01)
for line in inputFile.readlines(): data = [float(x) for x in line.strip().split() if x != ''] indata = tuple(data[:7]) outdata = tuple(data[7:]) ds.addSample(indata,outdata) k +=1 if (k == size): testdata, traindata = ds.splitWithProportion( PorcDivTest ) ds.clear() k = 0 for inp,targ in testdata: testSet.appendLinked(inp,targ-1) for inp,targ in traindata: trainSet.appendLinked(inp,targ-1) trainSet._convertToOneOfMany(bounds=[0, 1]) testSet._convertToOneOfMany(bounds=[0, 1]) if(camada2==0): net = buildNetwork(trainSet.indim,camada1,trainSet.outdim, recurrent = True) else : net = buildNetwork(trainSet.indim,camada1,camada2,trainSet.outdim, recurrent = True) trainer = BackpropTrainer(net,dataset = trainSet,learningrate = Learning,momentum = Momentum, verbose = True) trainer.trainOnDataset(trainSet,Ciclos) out = net.activateOnDataset(testSet) out = out.argmax(axis=1) acerto = total = i = 0 for data in testSet: if data[1][0] == 1 and out[i] == 0:
class2vec2=np.reshape(class2im2,np.size(class2im1)) class2vec3=np.reshape(class2im3,np.size(class2im1)) class2vec4=np.reshape(class2im4,np.size(class2im1)) class2vec5=np.reshape(class2im5,np.size(class2im1)) trainData1=np.array([class1vec1,class1vec2,class1vec3,class1vec4,class1vec5,class2vec1,class2vec2,clas s2vec3,class2vec4,class2vec5]) ncomponents=9 pca = PCA(n_components=ncomponents) pca.fit(trainData1) trainData=pca.transform(trainData1) trainLabels=np.array([1,1,1,1,1,2,2,2,2,2]) trnData = ClassificationDataSet(ncomponents, 1, nb_classes=2) for i in range(len(trainLabels)): trnData.addSample(trainData[i,:], trainLabels[i]-1) tstdata, trndata = trnData.splitWithProportion( 0.40 ) trnData._convertToOneOfMany( ) fnn = buildNetwork( trnData.indim, 20, trnData.outdim, outclass=SoftmaxLayer ) trainer = BackpropTrainer( fnn, dataset=trnData, momentum=0.1, verbose=True, weightdecay=0.01) for i in range(20): trainer.trainEpochs(5) trnresult=percentError(trainer.testOnClassData(),trnData['class']) print "epoch: %4d" % trainer.totalepochs, \ " train error: %5.2f%%" % trnresult, \ outTrain=fnn.activateOnDataset(trnData) outTrainLabels=outTrain.argmax(axis=1)+1 numErrTrain=numErr=sum(abs(outTrainLabels!=trainLabels)) accTrain=1-numErrTrain/len(trainLabels) from __future__ import division import numpy as np from pybrain.datasets import ClassificationDataSet from pybrain.utilities import percentError
# reconvert to fix class issue testingData = ClassificationDataSet(64 * 64 * 3, nb_classes=2) for n in xrange(0, testingDataTemp.getLength()): testingData.addSample( testingDataTemp.getSample(n)[0], testingDataTemp.getSample(n)[1]) trainingData = ClassificationDataSet(64 * 64 * 3, nb_classes=2) for n in xrange(0, trainingDataTemp.getLength()): trainingData.addSample( trainingDataTemp.getSample(n)[0], trainingDataTemp.getSample(n)[1]) # reencode outputs, necessary for training accurately testingData._convertToOneOfMany() trainingData._convertToOneOfMany() ##### BUILD ANN ##### # build feed-forward multi-layer perceptron ANN fnn = FeedForwardNetwork() # create layers: 9 input layer nodes (8 features + 1 bias), 3 hidden layer nodes, 10 output layer nodes bias = BiasUnit(name='bias unit') input_layer = LinearLayer(64 * 64 * 3, name='input layer') hidden_layer = SigmoidLayer(64 * 64 * 3 / 2, name='hidden layer') output_layer = SigmoidLayer(2, name='output layer') # create connections with full connectivity between layers bias_to_hidden = FullConnection(bias, hidden_layer, name='bias-hid') bias_to_output = FullConnection(bias, output_layer, name='bias-out')