def handle(self, *args, **options): better_thans = BetterThan.objects.all() #.filter(pk__lte=50) ds = SupervisedDataSet(204960, 1) for better_than in better_thans: bt = imread(better_than.better_than.image.file) wt = imread(better_than.worse_than.image.file) better_than.better_than.image.file.close() better_than.worse_than.image.file.close() # bt = filters.sobel(bt) # wt = filters.sobel(wt) bt_input_array = np.reshape(bt, (bt.shape[0] * bt.shape[1])) wt_input_array = np.reshape(wt, (wt.shape[0] * wt.shape[1])) input_1 = np.append(bt_input_array, wt_input_array) input_2 = np.append(wt_input_array, bt_input_array) ds.addSample(np.append(bt_input_array, wt_input_array), [-1]) ds.addSample(np.append(wt_input_array, bt_input_array), [1]) net = buildNetwork(204960, 2, 1) train_ds, test_ds = ds.splitWithProportion(options['train_test_split']) _, test_ds = ds.splitWithProportion(options['test_split']) trainer = BackpropTrainer(net, ds) print 'Looking for -1: {0}'.format(net.activate(np.append(bt_input_array, wt_input_array))) print 'Looking for 1: {0}'.format(net.activate(np.append(wt_input_array, bt_input_array))) trainer.train() print 'Looking for -1: {0}'.format(net.activate(np.append(bt_input_array, wt_input_array))) print 'Looking for 1: {0}'.format(net.activate(np.append(wt_input_array, bt_input_array)))
def fnn3(frame,string): df=frame.dropna(axis=0,how='any') name=string nm1=name+"tst1.png" nm2=name+"cov1.png" x=df.drop(columns=['gvkey','date','price']) x=np.array(x) x=normalize(x, axis=0, norm='max') y=np.array(df['ret']) x=np.delete(x,1,axis=1) xdim=x.shape[1] ydim=1 DS=SupervisedDataSet(xdim,ydim) for i in range(len(x)): DS.addSample(x[i],y[i]) dataTrain, dataTest = DS.splitWithProportion(0.8) dataPlot, datadrop =DS.splitWithProportion(0.002) xTrain, yTrain = dataTrain['input'],dataTrain['target'] xTest, yTest = dataTest['input'], dataTest['target'] xPlot, yPlot= dataPlot['input'], dataPlot['target'] fnn=buildNetwork(xdim,xdim+1,xdim+2,int(0.5*(xdim+1)),ydim,hiddenclass=TanhLayer,outclass=LinearLayer) trainer=BackpropTrainer(fnn,dataTrain,learningrate=0.000000001,verbose=True) err_train, err_valid =trainer.trainUntilConvergence(maxEpochs=100) tstresult = percentError( trainer.testOnClassData(), dataTest['target'] ) print("epoch: %4d" % trainer.totalepochs, " test error: %5.2f%%" % tstresult) predict_resutl=[] for i in np.arange(len(xPlot)): predict_resutl.append(fnn.activate(xPlot[i])[0]) print(predict_resutl) #yTest2=yTest([0:len(yTest):12]) #pred2=predict_resutl([0:len(predict_resutl):12]) plt.figure(figsize=(30,6), dpi=600) plt.xlabel("Test Timeline") plt.ylabel("Result") plt.plot(np.arange(0,len(xPlot)), yPlot,'ko-', label='true number') plt.plot(np.arange(0,len(xPlot)), predict_resutl,'ro--', label='predict number') lgnd1=plt.legend() plt.savefig(nm1,dpi=600, bbox_extra_artists=(lgnd1)) plt.figure(figsize=(9,9), dpi=600) plt.plot(err_train,'b',label='train_err') plt.plot(err_valid,'r',label='valid_err') plt.xlabel("Training Times") plt.ylabel("Total Error") lgnd2=plt.legend() plt.savefig(nm2,dpi=600, bbox_extra_artists=(lgnd2)) plt.show() return fnn3(Apro,"Apro") fnn3(Aval,"Aval") fnn3(Amom,"Amom") fnn3(Atra,"Atra") fnn3(Afd,"Afd") fnn3(Atec,"Atec")
def buildDataset(inpts, targets): i = 0 ds = SupervisedDataSet(12, 1) while i != len(inpts): ds.addSample(inpts[i], targets[i]) i = i + 1 return ds.splitWithProportion(0.75)
def classicNeuralNetwork(self, features, labels, autoencoder=False): dataSet = SupervisedDataSet(features.shape[1], 1) dataSet.setField('input', features) if autoencoder: labels = features dataSet.setField('target', labels) tstdata, trndata = dataSet.splitWithProportion(0.25) print features.shape simpleNeuralNetwork = _buildNetwork(\ (LinearLayer(features.shape[1],'in'),),\ (SigmoidLayer(20,'hidden0'),),\ (LinearLayer(labels.shape[1],'out'),),\ bias=True) trainer = BackpropTrainer(simpleNeuralNetwork, dataset=trndata, verbose=True) #, momentum=0.1) trainer.trainUntilConvergence(maxEpochs=15) trnresult = percentError(trainer.testOnData(dataset=trndata), trndata['target']) tstresult = percentError(trainer.testOnData(dataset=tstdata), tstdata['target']) print "epoch: %4d" % trainer.totalepochs, \ " train error: %5.2f%%" % trnresult, \ " test error: %5.2f%%" % tstresult self.neuralNetwork = simpleNeuralNetwork
def _buildDataset(self, inpts, targets): i = 0 ds = SupervisedDataSet(len(inpts[0]), len(targets[0])) while i != len(inpts): ds.addSample(inpts[i], targets[i]) i = i + 1 return ds.splitWithProportion(1)
def dsBuild(data): ds = SupervisedDataSet(6, 1) for ele in data: ds.addSample((ele[0], ele[1], ele[2], ele[3], ele[4], ele[5]), (ele[6])) dsTrain, dsTest = ds.splitWithProportion(0.8) return dsTrain, dsTest
def makeNet(learning_rate): ds = SupervisedDataSet(20, 20) with open('data/misspellingssmall.csv', 'rbU') as f: reader = csv.reader(f) for row in reader: ds.addSample(convert(row[0]), convert(row[1])) testds, trainds = ds.splitWithProportion(0.2) net = buildNetwork(20, 20, 20) trainer = BackpropTrainer(net, dataset=trainds, learningrate=learning_rate) myscore = float("inf") i = 0 while myscore > 5: i += 1 trainer.train() #trainer.trainEpochs(5) #trainer.trainUntilConvergence(verbose=True) myscore = score(net, testds) print "Epoch #" + str(i) + ": " + str(myscore) + " (" + unconvert( net.activate(convert("ecceptable"))) + ")" global lastNet lastNet = net print "Network done with score " + str(myscore) return score
def makeNet(learning_rate): ds = SupervisedDataSet(20, 20) with open('data/misspellingssmall.csv', 'rbU') as f: reader = csv.reader(f) for row in reader: ds.addSample(convert(row[0]),convert(row[1])) testds, trainds = ds.splitWithProportion(0.2) net = buildNetwork(20, 20, 20) trainer = BackpropTrainer(net, dataset=trainds, learningrate=learning_rate) myscore = float("inf") i = 0 while myscore > 5: i += 1 trainer.train() #trainer.trainEpochs(5) #trainer.trainUntilConvergence(verbose=True) myscore = score(net, testds) print "Epoch #" + str(i) + ": " + str(myscore) + " (" + unconvert(net.activate(convert("ecceptable"))) + ")" global lastNet lastNet = net print "Network done with score " + str(myscore) return score
def get_portion(self, portion=1.00): num_portion = int(self.tot_size * portion) data_portion = SupervisedDataSet(self.in_dim, self.out_dim) for ind in xrange(num_portion): data_portion.addSample(copy.deepcopy(self.all_data['input'][ind]), copy.deepcopy(self.all_data['target'][ind])) tst_portion, trn_portion = data_portion.splitWithProportion( self.split_proportion) self.portion["training"] = trn_portion self.portion["test"] = tst_portion
def fnn_datasets(data_x, label_y, train_test_rate): input_demension = np.shape(data_x)[1] target_demension = np.shape(label_y)[1] print input_demension, target_demension DS = SupervisedDataSet(input_demension, target_demension) #定义数据集的格式是三维输入,一维输出 for i in range(np.shape(data_x)[0]): DS.addSample(data_x[i], label_y[i]) dataTrain, dataTest = DS.splitWithProportion(train_test_rate) #xTrain, yTrain = dataTrain['input'], dataTrain['target'] #xTest, yTest = dataTest['input'], dataTest['target'] return dataTrain, dataTest
def __init__(self, in_dim, out_dim, size=1000, means=None, covas=None, split_proportion=0.25): if means == None or covas == None: means = [] covas = [] for i in xrange(in_dim): ### randomMeans sign_value = random.choice([-1, 1]) means.append(random.random() * 10 * sign_value) ### randomCovas size_value = 3 covas.append(random.random() * size_value) self.in_dim = in_dim self.out_dim = out_dim means = tuple(means) covas = diag(covas) entro = math.log(det(covas)) self.means = means self.covas = covas self.entro = entro # TODO: investigate what the output function actually looks like all_data = SupervisedDataSet(in_dim, out_dim) for n in xrange(size): in_datum = multivariate_normal(means, covas) out_datum = [] for z in xrange(out_dim): start_ind = z * (in_dim / out_dim) end_ind = (z + 1) * (in_dim / out_dim) + 1 val = math.sin(sum(in_datum[start_ind:end_ind])) out_datum.append(val) all_data.addSample(in_datum, out_datum) tst_data, trn_data = all_data.splitWithProportion(split_proportion) self.tot_size = size self.all_data = all_data self.tst_data = tst_data self.trn_data = trn_data self.portion = {"training": None, "test": None} self.split_proportion = split_proportion
def buildDS(tag_num, data): print "Building data set..." # feature: 3 ds = SupervisedDataSet(tag_num * 3, tag_num) for ele in data: #ds.addSample((ele[4], ele[5], ele[6], ele[7], ele[8], ele[9], ele[10], ele[11], ele[12], ele[13], ele[14], ele[15]), (ele[0], ele[1], ele[2], ele[3])) # 4-tag_num * 3: prob of each feature (in), 0-3: tag info (out) ds.addSample(ele[tag_num:], ele[:tag_num]) # split to training and setting dsTrain, dsTest = ds.splitWithProportion(0.8) return dsTrain, dsTest
class NNet(object): def __init__(self): self.net = buildNetwork(2, 4, 2, bias=True) self.net.randomize() print self.net self.ds = SupervisedDataSet(2,2) self.trainer = BackpropTrainer(self.net, self.ds, learningrate = 0.1, momentum=0.99) def addTrainDS(self, data1, data2, max): for x in [1,2]: norm1 = self.normalize(data1,max) norm2 = self.normalize(data2,max) for x in range(len(norm1)): self.ds.addSample(norm1[x], norm2[x]) def train(self): print "Training" # print self.trainer.train() trndata, tstdata = self.ds.splitWithProportion(.25) self.trainer.trainUntilConvergence(verbose=True, trainingData=trndata, validationData=tstdata, validationProportion=.3, maxEpochs=500) # self.trainer.trainOnDataset(trndata,500) self.trainer.testOnData(tstdata, verbose= True) def activate(self, data): for x in data: self.net.activate(x) def normalize(self, data, max): normData = np.zeros((len(data), 2)) for x in [0,1]: for y in range(len(data)): val = data[y][x] normData[y][x] = (val)/(max[x]) # print normData return normData def denormalize(self, data, max): deNorm = np.zeros((len(data), 2)) for x in [0,1]: for y in range(len(data)): val = data[y][x] deNorm[y][x] = val*max[x] return deNorm def getOutput(self, mat, max): norm = self.normalize(mat, max) out = [] for val in norm: out.append(self.net.activate(val)) return self.denormalize(out, max)
def getDataSet(): X, Y = getFeatures() features = len(X[0]) cases = len(X) DS = SupervisedDataSet(features, 1) i = 0 while (i < cases): DS.addSample(X[i], Y[i]) i += 1 TrainDS, TestDS = DS.splitWithProportion(0.7) return TrainDS, TestDS
def learn(input, output): """ Learn nn from data. """ nn = RecurrentNeuralNetwork(13, 4) dataset = SupervisedDataSet(13, 4) for ins, out in zip(input, output): dataset.addSample(ins, out) learning, validating = dataset.splitWithProportion(0.8) nn.set_learning_data(learning) nn.train(75) result = nn.calculate(validating) return result, validating['target']
def get_train_data(): # definite the dataset as two input , one output DS = SupervisedDataSet(2, 1) u1, u2, y = _generate_data() # add data element to the dataset for i in np.arange(199): DS.addSample([u1[i], u2[i]], [y[i + 1]]) # you can get your input/output this way # X = DS['input'] # Y = DS['target'] # split the dataset into train dataset and test dataset dataTrain, dataTest = DS.splitWithProportion(0.8) return dataTrain, dataTest
def neural_network_converg(data, target, network): DS = SupervisedDataSet(len(data[0]), 1) nn = buildNetwork(len(data[0]), 7, 1, bias = True, hiddenclass = SigmoidLayer, outclass = LinearLayer) for d, t in zip(data, target): DS.addSample(d,t) Train, Test = DS.splitWithProportion(0.9) #data_train = Train['input'] data_test = Test['input'] #target_train = Train['target'] target_test = Test['target'] bpTrain = BackpropTrainer(nn,Train, verbose = True) #bpTrain.train() bpTrain.trainUntilConvergence(maxEpochs = 10) p = [] for d_test in data_test: p.append(nn.activate(d_test)) rmse_nn = sqrt(np.mean((p - target_test)**2)) print(rmse_nn)
def fit(self, x, y, params): from pybrain.datasets import SupervisedDataSet from pybrain.tools.shortcuts import buildNetwork from pybrain.supervised.trainers import BackpropTrainer from pybrain.structure import TanhLayer, LinearLayer from sklearn.preprocessing import MinMaxScaler ''' 进行归一化,将特征X和标签y都归一化到同一维度上 ''' scale_x = MinMaxScaler().fit(x.values.reshape(x.shape[0], -1)) x_min_max = scale_x.transform(x.values.reshape(x.shape[0], -1)) scale_y = MinMaxScaler().fit(y.values.reshape(y.shape[0], -1)) y_min_max = scale_y.transform(y.values.reshape(y.shape[0], -1)) num = x.shape[0] x_dim = x.shape[1] try: y_dim = y.shape[1] except: y_dim = 1 model = buildNetwork(x_dim, 4, 16, y_dim, bias=True, hiddenclass=TanhLayer, outclass=LinearLayer) data_set = SupervisedDataSet(x_dim, y_dim) for i in range(num): data_set.addSample(x_min_max[i], y_min_max[i]) train, test = data_set.splitWithProportion(0.99) trainer = BackpropTrainer(model, dataset=train, learningrate=0.02, lrdecay=1.0, momentum=0, verbose=True) trainingErrors, validationErrors = trainer.trainUntilConvergence( maxEpochs=15) model.dims = (x_dim, y_dim) model.scale_x = scale_x model.scale_y = scale_y return model
def build_pybrain_dataset(self): field_count = len(dataset.fields) diagnoses_count = len(self.diagnoses) supervised_dataset = SupervisedDataSet(field_count, diagnoses_count) # supervised_dataset = ClassificationDataSet(field_count, # diagnoses_count, # nb_classes=diagnoses_count, # class_labels=self.diagnoses) for sample in self.data: input = self.make_input(sample) diagnosis = sample['Диагноз'] target = self.make_target(diagnosis) supervised_dataset.addSample(input, target) self.supervised_dataset = supervised_dataset # self.training_dataset = supervised_dataset # self.testing_dataset = supervised_dataset self.training_dataset, self.testing_dataset = supervised_dataset.splitWithProportion(0.7)
def vali(): from pybrain.tools.validation import ModuleValidator from pybrain.tools.validation import CrossValidator with open('new_data1.txt') as data_file: data = json.load(data_file) m = [d[0] for d in data] case = [min([a for a, s, d in m]), float(max([a for a, s, d in m])-min([a for a, s, d in m]))] week = [min([s for a, s, d in m]), float(max([s for a, s, d in m])-min([s for a, s, d in m]))] grid = [min([d for a, s, d in m]), float(max([d for a, s, d in m])-min([d for a, s, d in m]))] ds = SupervisedDataSet(3, 1) import random random.shuffle(data) print len(data) for i in xrange(0, len(data)): # print "Adding {}th data sample".format(i), x1 = float(data[i][0][0] - case[0])/case[1] x2 = float(data[i][0][1] - week[0])/week[1] x3 = float(data[i][0][2] - grid[0])/grid[1] input = (x1, x2, x3) output = data[i][1] ds.addSample(input, output) # print ":: Done" print "Train" net = buildNetwork(3, 3, 1, bias=True) tstdata, trndata = ds.splitWithProportion( 0.33 ) trainer = BackpropTrainer(net, trndata) mse = [] modval = ModuleValidator() for i in range(100): trainer.trainEpochs(1) trainer.trainOnDataset(dataset=trndata) cv = CrossValidator(trainer, trndata, n_folds=10, valfunc=modval.MSE) mse_val = cv.validate() print "MSE %f @ %i" % (mse_val, i) mse.append(mse_val) with open('cross_validation.json', 'w') as outfile: json.dump(mse, outfile, indent=4)
def classicNeuralNetwork(self,features,labels,autoencoder=False): dataSet = SupervisedDataSet(features.shape[1], 1) dataSet.setField('input', features) if autoencoder: labels = features dataSet.setField('target', labels) tstdata, trndata = dataSet.splitWithProportion( 0.25 ) print features.shape simpleNeuralNetwork = _buildNetwork(\ (LinearLayer(features.shape[1],'in'),),\ (SigmoidLayer(20,'hidden0'),),\ (LinearLayer(labels.shape[1],'out'),),\ bias=True) trainer = BackpropTrainer(simpleNeuralNetwork, dataset=trndata, verbose=True)#, momentum=0.1) trainer.trainUntilConvergence(maxEpochs=15) trnresult = percentError( trainer.testOnData( dataset=trndata ), trndata['target'] ) tstresult = percentError( trainer.testOnData( dataset=tstdata ), tstdata['target'] ) print "epoch: %4d" % trainer.totalepochs, \ " train error: %5.2f%%" % trnresult, \ " test error: %5.2f%%" % tstresult self.neuralNetwork = simpleNeuralNetwork
class training: network_name=None; network=None; '''a network instance''' '''input data''' descriptors=None target=None data_set=None data_setNormed = None tstdata = None trndata = None '''these for PCA''' scalar_X = None '''important outputs''' r_squared=None median_error=None predicted_value=None def __init__(self,X_filename,Y_filename=None): '''some important factors''' '''loading data''' self._load_training_data(X_filename, Y_filename) def _load_training_data(self,X_filename,Y_filename): ''' X and Y values will be loaded here ''' self.descriptors=X_filename print "Descriptors loaded!" if Y_filename is not None: self.target=Y_filename print "Target for training loaded!" def do_pca(self,user_input=0,threshold_input=0): '''PCA will be doen here''' def set_dataset(self,splitProtion = 0.15,featureNorm = True, Y_log = True): '''put training data into pybrain object''' '''Feature Selection doing here''' # self.descriptors = self.featureReduction(self.descriptors, threshold_input = 10) num_row=self.descriptors.shape[0] num_col=self.descriptors.shape[1] '''Pack data ''' self.data_set = SupervisedDataSet(num_col , 1) for num_data in range(num_row): inputs=self.descriptors[num_data,:] outputs=self.target[num_data] self.data_set.addSample(inputs, outputs) print self.data_set.indim if featureNorm: '''split data''' self.tstdata, self.trndata = self.split_data(self.data_set,splitProtion) '''get the scalar for the trndata''' '''and normalize the tstdata with this scalar''' trn_scalar = self._getScalar(self.trndata['input']) self.trndata = self.featureNorm(self.trndata,trn_scalar,Y_log = True) self.tstdata = self.featureNorm(self.tstdata, trn_scalar, Y_log = True) self.data_setNormed = self.featureNorm(self.data_set, trn_scalar, Y_log = True) print 'Feature Normed' else: self.tstdata, self.trndata = self.split_data(self.data_set,splitProtion) print 'Feature not Normed' raw_input("Pybrain data object has been set up.") def featureReduction(self, data,threshold_input = 0.99): ''' feature reduction that only keep variables that the variance is greater than threshold. ''' selector = VarianceThreshold(threshold = threshold_input) data = selector.fit_transform(data) print 'Feature Selected with threshold ', threshold_input, data.shape return data def _getScalar(self, data): '''For Normalization ''' '''get the scalar of the input data and return ''' thisScalar = preprocessing.StandardScaler().fit(data) return thisScalar def featureNorm(self,data,scalar,Y_log = True): ''' feature Normalization, deal with self.data_set, return self.data_setNormed ''' descs = data['input'] target = data['target'] num_col = descs.shape[1] data_setNormed = SupervisedDataSet(num_col,1) data_setNormed.setField('input', scalar.transform(descs)) '''feature norm for Y''' if Y_log: print 'Using log value of target' data_setNormed.setField('target',np.log(target)) else: print 'Using the original value of target' data_setNormed.setField('target',target) return data_setNormed def split_data(self,dataset,proportion = 0.15): ''' split the data to self.tstdata and self.trndata. ''' tstdata,trndata = dataset.splitWithProportion(proportion) return tstdata, trndata def train_net(self,training_times_input=100,num_neroun=200,learning_rate_input=0.1,weight_decay=0.1,momentum_in = 0,verbose_input=True): ''' The main function to train the network ''' print self.trndata['input'].shape raw_input() self.network=buildNetwork(self.trndata.indim, num_neroun,self.trndata.outdim, bias=True, hiddenclass=SigmoidLayer, outclass = LinearLayer) self.trainer=BackpropTrainer(self.network, dataset=self.trndata, learningrate=learning_rate_input, momentum=momentum_in, verbose=True, weightdecay=weight_decay ) for iter in range(training_times_input): print "Training", iter+1,"times" self.trainer.trainEpochs(1) trn_error = self._net_performance(self.network, self.trndata) tst_error = self._net_performance(self.network, self.tstdata) print "the trn error is: ", trn_error print "the test error is: ",tst_error '''prediction on all data:''' # self.predicted_value = self.predict(self.network,self.data_setNormed['input']) def train_best_converge(self,training_times_input=5,num_neuron=120,learning_rate_input=0.1,weightdecay_input = 0.01,maxEpochs_input=1200,verbose_input=True): '''pass values''' self.training_time=training_times_input self.learning_rate=learning_rate_input self.maxEpo=maxEpochs_input self.verbose=verbose_input self.r_squared=np.empty([self.training_time]) self.median_error=np.empty([self.training_time]) test_data,training_data=self.data_set.splitWithProportion(0.15) #train the network 30 times for iter in range(self.training_time): print "Training", iter+1,"times" '''randomly split the dataset to have 20% to be test data''' valid_data_this,train_data_this=training_data.splitWithProportion(0.1) net=buildNetwork(self.data_set.indim,num_neuron,self.data_set.outdim,bias=True,outputbias=True,hiddenclass=SigmoidLayer) t=BackpropTrainer(net,train_data_this,learningrate=self.learning_rate,weightdecay=weightdecay_input,momentum=0.,verbose=self.verbose) t.trainUntilConvergence(train_data_this,maxEpochs=self.maxEpo, validationProportion=0.1,verbose=self.verbose) '''validate the model with validation dataset''' self.r_squared[iter],self.median_error[iter]=self.do_regression(net, valid_data_this.getField("input"),valid_data_this.getField("target")[:,0]) locals()['net'+str(iter)]=net locals()['train_data' + str(iter)]=train_data_this locals()['valid_data' + str(iter)]=valid_data_this locals()['train' + str(iter)]=t print "Training",iter+1,"has done!" r_max = np.amax(self.r_squared) max_index=self.r_squared.argmax() print "Model ", max_index+1, "has been selected" self.network=locals()['net'+str(max_index)] self.train_best=locals()['train_data' + str(max_index)] self.valid_best=locals()['valid_data' + str(max_index)] '''run the best network on the test data''' print "The performance on test data........." descriptors_test=test_data.getField("input") Y_test=test_data.getField("target")[:,0] r2_all=self.do_regression(self.network, descriptors_test, Y_test) raw_input("Paused!") '''run the best network on the all data''' print "The performance on all data........." self.predicted_value=self.test_data(self.network, self.descriptors_pca) r2_test=self.do_regression(self.network, self.descriptors_pca, self.target) def train_CV(self,n_folds=5,num_neuron = 50,learning_rate_input=0.01,decay=0.01,maxEpochs_input=1200,verbose_input=True): '''call the class in model validators''' '''and do cross validation''' '''pass values''' dataset = self.data_set l = dataset.getLength() indim = dataset.indim outdim = dataset.outdim inp = dataset.getField("input") out = dataset.getField("target") perms = np.array_split(permutation(l), n_folds) perf = 0 for i in range(n_folds): train_perms_idxs = list(range(n_folds)) train_perms_idxs.pop(i) temp_list = [] for train_perms_idx in train_perms_idxs: temp_list.append(perms[ train_perms_idx ]) train_idxs = np.concatenate(temp_list) #this is the test set: test_idxs = perms[i] #train: print "Training on part: ", i train_ds = SupervisedDataSet(indim,outdim) train_ds.setField("input", inp[train_idxs]) train_ds.setField("target",out[train_idxs]) net_this = buildNetwork(indim,num_neuron,outdim,bias=True,hiddenclass = SigmoidLayer) t_this = BackpropTrainer(net_this,train_ds,learningrate = learning_rate_input,weightdecay=decay, momentum=0.,verbose=verbose_input) #train asked times: t_this.trainEpochs(maxEpochs_input) #test on testset. test_ds = SupervisedDataSet(indim,outdim) test_ds.setField("input", inp[test_idxs]) test_ds.setField("target",out[test_idxs]) perf_this = self._net_performance(net_this, test_ds) perf = perf + perf_this perf /=n_folds print perf return perf def do_CV(self,): ''' call CV ''' data_set_this = self.data_set perf_all=[] for num_neuron in np.arange(20,200,5): print "Training with number of neuron :", num_neuron perf_this = self.train_CV(n_folds=5, num_neuron=num_neuron, learning_rate_input=0.001, maxEpochs_input=50, verbose_input=False) perf_all.append(perf_this) print "All of the performance: ", perf_all output=open("CV_results_20to200.csv",'wb') filewriter=csv.writer(output) filewriter.writerow(perf_all) def _net_performance(self,net,test_data): """ calculate the median relatively error (mre) """ input = test_data.getField("input") target = test_data.getField("target") outputs = self.predict(net, input) abs_error = np.absolute(outputs - target) rel_error = np.divide(abs_error,np.absolute(target)) mre = np.median(rel_error) return mre def predict(self,net,X): ''' run the prediction of the given data (descriptors) on the given network. ''' num_row=X.shape[0] num_col=X.shape[1] results=np.empty([num_row]) for line in range(num_row): results[line]=net.activate(X[line])[0] # if self.scalar_Y is not None: # results = self.scalar_Y.inverse_transform(results) return results def do_regression(self,net,X_pca,Y): ''' run the network prediction on descriptor X do regression on Y return R_squred value ''' test_result=self.test_data(net, X_pca) slope,intercept,r_value,p_value,std_err =stats.linregress(Y, test_result) median_error_this=self.calc_Diff(Y, test_result) print "The R squared of this time is: ",r_value**2 print "The median relatively error of this time is:", median_error_this return r_value**2,median_error_this def calc_Diff(self,real_value,predicted_value): ''' this function calculate the median and average absolute error and relatively error between the real_value and the predicted value ''' diff_between_abs=np.absolute(predicted_value-real_value) diff_between_abs_relatively=diff_between_abs/real_value mean_rel_error=np.mean(diff_between_abs_relatively) median_rel_error=np.median(diff_between_abs_relatively) mean_abs_error=np.mean(diff_between_abs) median_abs_error=np.median(diff_between_abs) return median_rel_error def plot_diff(self,real_value,predicted_value,xlab,ylab,title): ''' plot the line of real value and estimated value and plot the difference bar on the same graph ''' num_row=real_value.shape[0] #this is the length of x axis data_all=np.array((real_value,predicted_value)) data_all=np.transpose(data_all) data_all_sorted=data_all[data_all[:,0].argsort()] diff=data_all_sorted[:,1]-data_all_sorted[:,0] y_value=np.arange(num_row) fig=plt.figure() ax=fig.gca() ax.plot(y_value,data_all_sorted[:,1],label="Estimated Values") ax.plot(y_value,data_all_sorted[:,0],label="Reported Values") plt.xlabel(xlab, fontsize = 16) plt.ylabel(ylab, fontsize = 16) plt.title(title) ax.legend(loc = 2) ax.bar(y_value,diff) plt.show() def save_toFile(self,filename,pred): '''this function save the Numpy object array of prediction results to csv file''' np.savetxt('filename', pred, delimiter=',') def save_network(self,name_of_the_net): print "Saving the trained network to file" if self.network is None: print "Network has not been trained!!" else: NetworkWriter.writeToFile(self.network, name_of_the_net) print "Saving Finished" def load_network(self,name_of_the_net): print "load existing trained network" self.network=NetworkReader.readFrom(name_of_the_net) print "Succeed!"
def run_data1(): with open('new_data1.txt') as data_file: data = json.load(data_file) output = set([i[2] for i in [d[0] for d in data if d[1] == 1]]) print output m = [d[0] for d in data] print (max([d for a, s, d in m]), min([d for a, s, d in m]), float(max([d for a, s, d in m])-min([d for a,s,d in m]))) case = [min([a for a, s, d in m]), float(max([a for a, s, d in m])-min([a for a,s,d in m]))] week = [min([s for a, s, d in m]), float(max([s for a, s, d in m])-min([s for a,s,d in m]))] grid = [min([d for a, s, d in m]), float(max([d for a, s, d in m])-min([d for a,s,d in m]))] ds = SupervisedDataSet(3, 1) import random random.shuffle(data) print len(data) for i in xrange(0, len(data)): # print "Adding {}th data sample".format(i), x1 = float(data[i][0][0] - case[0])/case[1] x2 = float(data[i][0][1] - week[0])/week[1] x3 = float(data[i][0][2] - grid[0])/grid[1] input = (x1, x2, x3) output = data[i][1] ds.addSample(input, output) # print ":: Done" print "Train" # net = buildNetwork(3, 3, 1, bias=True)\ net = NetworkReader.readFrom('dengue_network.xml') tstdata, trndata = ds.splitWithProportion( 0.33 ) trainer = BackpropTrainer(net, trndata) # terrors = trainer.trainUntilConvergence(verbose = True, validationProportion = 0.33, maxEpochs = 100, continueEpochs = 10 ) # mse = [0] # acceptable_error = .00001 # for i in xrange(0,1000): # print i," ", # mse_c = trainer.train() # if (mse_c < acceptable_error): # break # mse.append(mse_c) # print mse_c threshold = [0.25, 0.30] for t in threshold: print "Testing threshold :", t true_positive = 0.0 true_negative = 0.0 false_positive = 0.0 false_negative = 0.0 data_to_write = [] data_to_write_input = [] for input, expectedOutput in tstdata: o = net.activate(input) output = 1.0 if o[0] > t else 0.0 data_to_write.append((int((input[0]*case[1]) + case[0]), int((input[1]*week[1]) + week[0]),int((input[2]*grid[1]) + grid[0]), output)) if (output == expectedOutput): if output == 1.0: true_positive += 1.0 else: true_negative += 1.0 else: if output == 1.0: false_positive += 1.0 else: false_negative += 1.0 # NetworkWriter.writeToFile(net, 'dengue_network1.xml') precision = true_positive / (true_positive + false_positive) recall = true_positive / (true_positive + false_negative) f = (2 * precision * recall)/(precision + recall) accuracy = (true_positive + true_negative) / (true_positive + true_negative + false_positive + false_negative) def getKey(item): return item[1] data_to_write = sorted(data_to_write, key=getKey) counts = { # "MSE" : mse, # "DATA": data_to_write, "Threshold": t, "Precision": precision, "Recall": recall, "F-Measure": f, "Accuracy": accuracy, "Values": { "True Positive": true_positive, "True Negative": true_negative, "False Positive": false_positive, "False Negative": false_negative } } print "Accuracy :", accuracy print "Precision :", precision print "Recall :", recall print "F-Measure :", f print counts # errors = { # "terrors" : terrors # } # with open('data8.json', 'w') as outfile: # json.dump(counts, outfile, indent=4) exit()
Ciclos = int(sys.argv[2]) Learning = float(sys.argv[3]) Momentum = float(sys.argv[4]) camada1 = int(sys.argv[5]) camada2 = int(sys.argv[6]) k = 0 size = 70 for line in inputFile.readlines(): data = [float(x) for x in line.strip().split() if x != ''] indata = tuple(data[:7]) outdata = tuple(data[7:]) ds.addSample(indata,outdata) k +=1 if (k == size): testdata, traindata = ds.splitWithProportion( PorcDivTest ) ds.clear() k = 0 for inp,targ in testdata: testSet.appendLinked(inp,targ-1) for inp,targ in traindata: trainSet.appendLinked(inp,targ-1) trainSet._convertToOneOfMany(bounds=[0, 1]) testSet._convertToOneOfMany(bounds=[0, 1]) if(camada2==0): net = buildNetwork(trainSet.indim,camada1,trainSet.outdim, recurrent = True) else : net = buildNetwork(trainSet.indim,camada1,camada2,trainSet.outdim, recurrent = True) trainer = BackpropTrainer(net,dataset = trainSet,learningrate = Learning,momentum = Momentum, verbose = True)
def calc(): filePath = 'asc_gyro_l.skl' f = open(filePath, 'r') headers = f.readline().split() indices = [2] numOfFeatures = len(indices)#len(ancestorMap) ds = SupervisedDataSet(numOfFeatures, 1) press0 = [] press1 = [] for line in f: splited = line.split() output = [float(splited[2]) - 32920.0]#, splited[3]] press0.append(float(output[0])) #press1.append(float(output[1])) input = np.array(splited) input = input[indices]#getAnccestorRelativePos(splited)#splited[7:]# ds.appendLinked(output[0], output) tstdata, trndata = ds.splitWithProportion( 0.25 ) #for n in range(5): numOfHidden = 1#15*n + 1 net = buildNetwork(numOfFeatures, numOfHidden, 1, bias=True) #net = FeedForwardNetwork() """ inLayer = LinearLayer(numOfFeatures) hiddenLayer0 = SigmoidLayer(numOfHidden) #hiddenLayer1 = SigmoidLayer(numOfHidden) #hiddenLayer2 = SigmoidLayer(numOfHidden) outLayer = LinearLayer(1) net.addInputModule(inLayer) net.addModule(hiddenLayer0) #net.addModule(hiddenLayer1) #net.addModule(hiddenLayer2) net.addOutputModule(outLayer) in_to_hidden = FullConnection(inLayer, hiddenLayer0) #zero2one = FullConnection(hiddenLayer0, hiddenLayer1) #one2two = FullConnection(hiddenLayer1, hiddenLayer2) hidden_to_out = FullConnection(hiddenLayer0, outLayer) net.addConnection(in_to_hidden) #net.addConnection(zero2one) #net.addConnection(one2two) net.addConnection(hidden_to_out) net.sortModules() """ trainer = BackpropTrainer(net, tstdata) print 'numOfHidden: ' + str(numOfHidden) #res = trainer.trainUntilConvergence() for i in range(100): res = trainer.train() evaluatedData = tstdata press0 = [] press1 = [] expectedPress0 = [] expectedPress1 = [] for input, expectedOutput in evaluatedData: output = net.activate(input) press0.append(output) #press1.append(output[1]) expectedPress0.append(expectedOutput) #expectedPress1.append(expectedOutput[1]) #errorSum0+=abs(output[0]-expectedOutput[0]) #errorSum1+=abs(output[1]-expectedOutput[1]) #print errorSum0/len(evaluatedData) #print errorSum1/len(evaluatedData) print mean_squared_error(press0, expectedPress0) print np.mean(expectedPress0) #print mean_squared_error(press1, expectedPress1) """ arr = np.array(press0) print np.std(arr, axis=0) arr = np.array(press1) print np.std(arr, axis=0) print 'end' """ #calc()
class NaturalLanguageNetwork: def __init__(self,file): self.file = file self.ios = 30 self.hns = 25 self.epochs = 300 self.ds = SupervisedDataSet(self.ios,self.ios) self.nn = None def get_concepts(self,inp_pos): inp = [] ret = [] for (word,pos) in inp_pos: inp.append(get_pos_tag(pos)) inp = self.pad(inp,self.ios,-1) res = self.nn.activate(inp) print res cur_str = '' for i in range(len(res)): if round(res[i]) == 1: print 'Matched a node' if i < len(inp_pos): print 'Appending ' + str(inp_pos[i][0]) cur_str += inp_pos[i][0] + ' ' elif cur_str != '': ret.append(cur_str.strip()) cur_str = '' return ret def parse_and_train(self): f = open(self.file,'r') learn_lines = [] for line in f: if line.strip() != '': learn_lines.append(line) i = 0 f.close() while i < len(learn_lines): ins, outs = self.convert_to_tuple(learn_lines[i],learn_lines[i+1]) i += 2 self.ds.addSample(ins,outs) self.nn = buildNetwork(self.ios,self.hns,self.ios) self.train_dat, self.test_dat = self.ds.splitWithProportion(0.75) trnr = BackpropTrainer(self.nn,dataset=self.train_dat,momentum=0.1,verbose=False,weightdecay=0.01) i = 150 trnr.trainEpochs(150) while i < self.epochs: trnr.trainEpochs(50) i += 50 print 'For epoch ' + str(i) print 'For train:' self.print_current_error() print 'For test:' self.print_validation() self.nn.sortModules() #trnr.trainEpochs(self.epochs) #trnr.trainUntilConvergence() #for i in range(self.epochs): # trnr.trainEpochs(1) def print_validation(self): res = self.nn.activateOnDataset(self.test_dat) ttl_misses = 0 for i in range(len(res)): resul = [] for v in res[i]: resul.append(round(v)) print 'Misses: ' + str(self.num_misses(resul,self.test_dat['target'][i])) ttl_misses += self.num_misses(resul,self.test_dat['target'][i]) print 'Average Misses: ' + str(float(ttl_misses)/float(len(self.test_dat['target']))) def print_current_error(self): res = self.nn.activateOnDataset(self.train_dat) ttl_misses = 0 for i in range(len(res)): resul = [] for v in res[i]: resul.append(round(v)) #print 'Misses: ' + str(self.num_misses(resul,self.train_dat['target'][i])) ttl_misses += self.num_misses(resul,self.train_dat['target'][i]) print 'Average Misses: ' + str(float(ttl_misses)/float(len(self.train_dat['target']))) def num_misses(self,first,second): num = 0 for i in range(len(first)): if first[i] != 1 and second[i] == 1: num += 1 elif first[i] == 1 and second[i] != 1: num += 1 return num def convert_to_tuple(self,poses,incls): #I'm chopping off the last thing here because the strings being parsed #are ; terminated, resulting in an empty entry pos_list = poses.split(';')[:-1] incl_list = incls.split(';')[:-1] pos_vals = [] incl_vals = [] for p in pos_list: pos_vals.append(get_pos_tag(p)) for i in incl_list: incl_vals.append(int(i)) pos_vals = self.pad(pos_vals,self.ios,-1) incl_vals = self.pad(incl_vals,self.ios,0) return (tuple(pos_vals), tuple(incl_vals)) def pad(self,ls,sz,pd_val): while len(ls) < sz: ls.append(pd_val) return ls
for attr in attributelist ] inputs.extend([ Normalize(metadata['maxmins'][attr], game[attr][1]) for attr in attributelist ]) if game['points'][0] > game['points'][1]: outputs = [1, 0] else: outputs = [0, 1] print outputs alldata.addSample(inputs, outputs) testdata, traindata = alldata.splitWithProportion(0.70) print "IMPORTANT ", traindata.outdim n = buildNetwork(traindata.indim, 5, traindata.outdim, outclass=SoftmaxLayer) print "Number of training patterns: ", len(traindata) trainer = BackpropTrainer(n, dataset=traindata, momentum=0.1, verbose=True, weightdecay=0.01) trainer.trainEpochs(200) # trainer.trainUntilConvergence() totalcount = 0 rightcount = 0 sumerrors = 0.0
inp2_vec = np.zeros((1, num_words)) out_vec = np.zeros((1, num_words)) for temp_list in sorted_list: inp1 = word_list.index(temp_list[0]) inp2 = word_list.index(temp_list[1]) out = word_list.index(temp_list[2]) inp1_vec = np.concatenate((inp1_vec, [inp[inp1, :]]), axis=0) inp2_vec = np.concatenate((inp2_vec, [inp[inp2, :]]), axis=0) out_vec = np.concatenate((out_vec, [inp[out, :]]), axis=0) inp_vec = np.concatenate((inp1_vec, inp2_vec), axis=1) #building the dataset dataset = SupervisedDataSet(2 * num_words, num_words) for i in range(len(sorted_list) + 1): dataset.addSample(inp_vec[i, :], out_vec[i, :]) tstdata, trndata = dataset.splitWithProportion(0.25) #building the network net = FeedForwardNetwork() input_layer = LinearLayer(2 * num_words, name='input_layer') hidden_layer = TanhLayer(num_words, name='hidden') output_layer = SigmoidLayer(num_words, name='output_layer') net.addInputModule(input_layer) net.addModule(hidden_layer) net.addOutputModule(output_layer) net.addConnection(FullConnection(input_layer, hidden_layer, name='in_to_hidden')) net.addConnection(FullConnection(hidden_layer, output_layer, name='hidden_to_out'))
# create a dataset object, make output Y a softmax matrix allData = SupervisedDataSet(n, numLabels) Y2 = convertToOneOfMany(Y) # add data samples to dataset object, both ways are correct '''for i in range(m): inData = X[i,:] outData = Y2[i, :] allData.addSample(inData, outData) ''' allData.setField('input', X) allData.setField('target', Y2) #separate training and testing data dataTrain, dataTest = allData.splitWithProportion(.9) # create object for training train = BackpropTrainer(net, dataset=dataTrain, learningrate=0.03, momentum=0.3) #train.trainUntilConvergence(dataset=dataTrain) # evaluate correct output for trainer trueTrain = dataTrain['target'].argmax(axis=1) trueTest = dataTest['target'].argmax(axis=1) # train step by step EPOCHS = 60 for i in range(EPOCHS): train.trainEpochs(1)
targetData.pop() #Pop off last element because it should be the next day's closing price. #print targetData myNet = buildNetwork(2, 2, 1, hiddenclass=SoftmaxLayer) # Build network with 2 input neurons, 2 hidden neurons, and 1 output neuron print myNet['in'], myNet['hidden0'], myNet['out'] # Debug message to confirm network setup myDS = SupervisedDataSet(2, 1) # Create dataset with two dimensional input, one dimensional target. for i in enumerate(myList): #Pop off each element, then added to dataset myDataSetAdder = myDS.appendLinked(myList.pop(), targetData.pop()) for inpt, target in myDS: #display dataset structure print inpt, target myTrainer = BackpropTrainer(myNet, myDS, verbose=True) print """This may take awhile...""" myTrainData, myTestData = myDS.splitWithProportion(0.25) print "Number of training patterns: ", len(myTestData) print "Input and output dimensions: ", myTestData.indim, myTrainData.outdim print "First sample (input, target):" print myTrainData['input'][0], myTrainData['target'][0] #import pdb; pdb.set_trace() # Debugger, uncomment to run python debugger for i in range(20): myTrainer.trainEpochs( 5 ) # Run the network for 5 epochs... trnresult = percentError (myTestData, myTrainData) print "epoch: %4d" % myTrainer.totalepochs, \ " train error: %5.2f%%" % trnresult, \ # " test error: %5.2f%%" % tstresult
def brescia_nn(train, test, max_epochs=None, verbose=False): trainval_ds = SupervisedDataSet(5, 1) test_ds = SupervisedDataSet(5, 1) for datum in train: trainval_ds.addSample(datum[:5], (datum[5], )) for datum in test: test_ds.addSample(datum[:5], (datum[5], )) train_ds, val_ds = trainval_ds.splitWithProportion(0.75) if verbose: print "Train, validation, test:", len(train_ds), len(val_ds), len( test_ds) ns = {} min_error = -1 min_h = -1 # use validation to form 4-layer network with two hidden layers, # with (2n + 1) nodes in the first hidden layer and somewhere from # 1 to (n - 1) in the second hidden layer for h2 in range(1, 5): if verbose: start = time.time() print "h2 nodes:", h2 # create the network if verbose: print "building network" n = FeedForwardNetwork() inLayer = LinearLayer(5) hiddenLayer1 = SigmoidLayer(11) hiddenLayer2 = SigmoidLayer(h2) outLayer = LinearLayer(1) n.addInputModule(inLayer) n.addModule(hiddenLayer1) n.addModule(hiddenLayer2) n.addOutputModule(outLayer) in_to_hidden = FullConnection(inLayer, hiddenLayer1) hidden_to_hidden = FullConnection(hiddenLayer1, hiddenLayer2) hidden_to_out = FullConnection(hiddenLayer2, outLayer) n.addConnection(in_to_hidden) n.addConnection(hidden_to_hidden) n.addConnection(hidden_to_out) n.sortModules() # training if verbose: print "beginning training" trainer = BackpropTrainer(n, train_ds) trainer.trainUntilConvergence(maxEpochs=max_epochs) ns[h2] = n # validation if verbose: print "beginning validation" out = n.activateOnDataset(val_ds) actual = val_ds['target'] error = np.sqrt(np.sum((out - actual)**2) / len(val_ds)) if verbose: print "RMSE:", error if min_error == -1 or error < min_error: min_error = error min_h = h2 if verbose: stop = time.time() print "Time:", stop - start # iterate through if verbose: print "best number of h2 nodes:", min_h out_test = ns[min_h].activateOnDataset(test_ds) return ns[h2], out_test
from pybrain.datasets import SupervisedDataSet from pybrain.supervised.trainers import BackpropTrainer from pybrain.structure.modules import SigmoidLayer from random import shuffle print('hello world') with open('latencias_normalizado.txt') as f: lines = f.readlines() ds = SupervisedDataSet(48, 1) for input in lines: input = input.split(",") [float(i) for i in input if i != ''] ds.addSample(input[1:], input[0]) train, test = ds.splitWithProportion(0.25) nn = buildNetwork(48, 10, 1, bias=True, outclass=SigmoidLayer) nn.reset() trainer = BackpropTrainer(nn, train, momentum=0.7) for i in xrange(3000): print("%s %s" % (trainer.train(), i)) trainer.testOnData(test, verbose=True) # for inp, targ in test: # mytarg = nn.activate(inp) # print(mytarg) # print(targ)
from os.path import isfile, join mypath = "transcripts" files = ["transcripts/" + f for f in listdir(mypath) if isfile(join(mypath, f))] net = buildNetwork(775, 2, 1, bias=True, hiddenclass=SoftmaxLayer,) ds = SupervisedDataSet(775, 1) f = open('dataset') for line in f.readlines(): case = json.loads(line) print case if case["output"] == "petitioner": output = 1 else: output = 0 ds.addSample(case["inputs"], output) test_data, training_data = ds.splitWithProportion(0.25) trainer = BackpropTrainer(net, training_data) print trainer.trainUntilConvergence(verbose=True) NetworkWriter.writeToFile(net, "saved_network.xml") for data in test_data: print "Network says: ", net.activate(data[0]) print "Actual answer: ", data[1]
def load(): print "Loading dataset..." with open('data/misspellings.csv', 'rbU') as f: reader = csv.reader(f) for row in reader: ds.addSample(convert(row[0]), convert(row[1])) print len(ds), "items in dataset." print "Load of dataset finished." load() timeit("Loading the data") testds, trainds = ds.splitWithProportion(0.2) #trainds._convertToOneOfMany() #testds._convertToOneOfMany() net = buildNetwork(20, 20, 20, 20, 20) trainer = BackpropTrainer(net, trainds) #trainer.train() trainer.trainEpochs(10) timeit("Training") ''' trnresult = percentError( trainer.testOnClassData(), trainds['target'] ) tstresult = percentError( trainer.testOnClassData( dataset=testds ), testds['target'] )
def ffn(nodesNum, trainingTime): """构建神经网络""" n = FeedForwardNetwork() inLayer = LinearLayer(6) #构建神经网络的三层 hiddenLayer1 = SigmoidLayer(nodesNum) hiddenLayer2 = SigmoidLayer(nodesNum) hiddenLayer3 = SigmoidLayer(nodesNum) hiddenLayer4 = SigmoidLayer(nodesNum) hiddenLayer5 = SigmoidLayer(nodesNum) hiddenLayer6 = SigmoidLayer(nodesNum) hiddenLayer7 = SigmoidLayer(nodesNum) hiddenLayer8 = SigmoidLayer(nodesNum) hiddenLayer9 = SigmoidLayer(nodesNum) hiddenLayer10 = SigmoidLayer(nodesNum) outLayer = LinearLayer(1) n.addInputModule(inLayer) #将三层加入网络中 n.addModule(hiddenLayer1) n.addModule(hiddenLayer2) n.addModule(hiddenLayer3) n.addModule(hiddenLayer4) n.addModule(hiddenLayer5) n.addModule(hiddenLayer6) n.addModule(hiddenLayer7) n.addModule(hiddenLayer8) n.addModule(hiddenLayer9) n.addModule(hiddenLayer10) n.addOutputModule(outLayer) in_to_hidden = FullConnection(inLayer, hiddenLayer1) #设置连接模式 hidden_to_hidden1 = FullConnection(hiddenLayer1, hiddenLayer2) hidden_to_hidden2 = FullConnection(hiddenLayer2, hiddenLayer3) hidden_to_hidden3 = FullConnection(hiddenLayer3, hiddenLayer4) hidden_to_hidden4 = FullConnection(hiddenLayer4, hiddenLayer5) hidden_to_hidden5 = FullConnection(hiddenLayer5, hiddenLayer6) hidden_to_hidden6 = FullConnection(hiddenLayer6, hiddenLayer7) hidden_to_hidden7 = FullConnection(hiddenLayer7, hiddenLayer8) hidden_to_hidden8 = FullConnection(hiddenLayer8, hiddenLayer9) hidden_to_hidden9 = FullConnection(hiddenLayer9, hiddenLayer10) hidden_to_out = FullConnection(hiddenLayer10, outLayer) n.addConnection(in_to_hidden) #将连接加入网络 n.addConnection(hidden_to_hidden1) n.addConnection(hidden_to_hidden2) n.addConnection(hidden_to_hidden3) n.addConnection(hidden_to_hidden4) n.addConnection(hidden_to_hidden5) n.addConnection(hidden_to_hidden6) n.addConnection(hidden_to_hidden7) n.addConnection(hidden_to_hidden8) n.addConnection(hidden_to_hidden9) n.addConnection(hidden_to_out) n.sortModules() #使网络可用 print(n) """建立数据集""" ds = SupervisedDataSet(6, 1) #六个输入,一个输出 #表查询语句 cur1 = conn.cursor() cur1.execute( 'select * from szzs_rise_and_fall_rate limit 2,9999999999999999;') result1 = cur1.fetchall() fv = [] #特征 for res in result1: a = [] a.append(float(list(res)[1])) a.append(float(list(res)[2])) a.append(float(list(res)[3])) a.append(float(list(res)[4])) a.append(float(list(res)[5])) a.append(float(list(res)[6])) fv.append(a) cur2 = conn.cursor() cur2.execute( 'select rise_fall_rate_next from szzs_rise_and_fall_rate limit 2,9999999999999999;' ) result2 = cur2.fetchall() cla = [] #分类 for res in result2: cla.append(float(list(res)[0])) cur3 = conn.cursor() cur3.execute('select * from szzs_rise_and_fall_rate order by date desc;') result3 = cur3.fetchmany(1) test = [] #测试数据 for res in result3: test.append(float(list(res)[1])) test.append(float(list(res)[2])) test.append(float(list(res)[3])) test.append(float(list(res)[4])) test.append(float(list(res)[5])) test.append(float(list(res)[6])) for i in range(0, len(fv)): ds.addSample(fv[i], cla[i]) dataTrain, dataTest = ds.splitWithProportion( 0.8) #百分之八十的数据用于训练,百分之二十的数据用于测试 """训练神经网络""" trainer = BackpropTrainer(n, dataset=dataTrain) #神经网络和数据集 trainer.trainEpochs(trainingTime) #训练次数 return n.activate(test)
Y = preprocessing.scale(y_np) ###################################### #setup the dataset (supervised classification training) for neural network ###################################### from pybrain.utilities import percentError from pybrain.tools.shortcuts import buildNetwork from pybrain.supervised.trainers import BackpropTrainer from pybrain.structure.modules import SoftmaxLayer from pybrain.datasets.classification import ClassificationDataSet from pybrain.datasets import SupervisedDataSet ds = SupervisedDataSet(4, 1) for i in range(len(X)): ds.addSample(X[i], Y[i]) # #split the dataset trainData, testData = ds.splitWithProportion(0.60) # ################################### # #Creating a Neural Network # ################################### # # build nerual net with 4 inputs, 5 hidden neuron and 1 output neuron net = buildNetwork(4, 5, 1, bias=True) trainer = BackpropTrainer(net, trainData) train_error = trainer.trainUntilConvergence(dataset=trainData, maxEpochs=50) # #evaluate the error rate on training data from sklearn.metrics import accuracy_score from sklearn.metrics import mean_squared_error train_out = net.activateOnDataset(trainData) #return the output
alldata = SupervisedDataSet(len(attributelist)*2, 2) #home, away for gameid, game in games.iteritems(): inputs = [Normalize(metadata['maxmins'][attr], game[attr][0]) for attr in attributelist] inputs.extend( [Normalize(metadata['maxmins'][attr], game[attr][1]) for attr in attributelist] ) if game['points'][0] > game['points'][1]: outputs = [1, 0] else: outputs = [0, 1] print outputs alldata.addSample(inputs, outputs) testdata, traindata = alldata.splitWithProportion(0.70) print "IMPORTANT ", traindata.outdim n = buildNetwork(traindata.indim, 5, traindata.outdim, outclass=SoftmaxLayer) print "Number of training patterns: ", len(traindata) trainer = BackpropTrainer( n, dataset=traindata, momentum=0.1, verbose=True, weightdecay=0.01) trainer.trainEpochs(200) # trainer.trainUntilConvergence() totalcount = 0 rightcount = 0 sumerrors = 0.0 for data in testdata: inputvalues = [] for attr in attributelist:
result_r = rev_map(result) # buying vhigh, high, med, low # maint vhigh, high, med, low # doors 2, 3, 4, 5more # persons 2, 4, more # lug_boot small, med, big # safety low, med, high with open(fname, "r") as f: reader = csv.reader(f) for row in reader: sample = (price[row[0]], price[row[1]], doors[row[2]], persons[row[3]], lug_boot[row[4]], safety[row[5]]) ds.addSample(sample, result[row[6]]) tst_ds, trn_ds = ds.splitWithProportion(0.2) # print "train data" # for inpt, target in trn_ds: # print inpt, target # print "test data" # for inpt, target in tst_ds: # print inpt, target # More information about trainers: http://pybrain.org/docs/api/supervised/trainers.html print "Training started" trainer.trainOnDataset(trn_ds, 10)
print 'reducing data' sample = np.random.randint(len(data), size=5000) data = data.ix[sample] X = data[[0, 1, 2, 3, 4]].values y = data[['outcome']].values print 'building net' net = buildNetwork(5, 20, 1, recurrent=False) dataset = SupervisedDataSet(5, 1) dataset.setField('input', X) dataset.setField('target', y) trainset, testset = dataset.splitWithProportion(0.75) trainer = BackpropTrainer(net, dataset=trainset, learningrate=0.1, momentum=0.5, verbose=False) epochs = np.array([]) train_errors = np.array([]) test_errors = np.array([]) for i in range(10): print 'round', i + 1 trainer.trainEpochs(5) train_error = trainer.testOnData(trainset) test_error = trainer.testOnData(testset) epochs = np.append(epochs, trainer.totalepochs) train_errors = np.append(train_errors, train_error)
def brescia_nn(train, test, max_epochs=None, verbose=False): trainval_ds = SupervisedDataSet(5, 1) test_ds = SupervisedDataSet(5, 1) for datum in train: trainval_ds.addSample(datum[:5], (datum[5],)) for datum in test: test_ds.addSample(datum[:5], (datum[5],)) train_ds, val_ds = trainval_ds.splitWithProportion(0.75) if verbose: print "Train, validation, test:", len(train_ds), len(val_ds), len(test_ds) ns = {} min_error = -1 min_h = -1 # use validation to form 4-layer network with two hidden layers, # with (2n + 1) nodes in the first hidden layer and somewhere from # 1 to (n - 1) in the second hidden layer for h2 in range(1, 5): if verbose: start = time.time() print "h2 nodes:", h2 # create the network if verbose: print "building network" n = FeedForwardNetwork() inLayer = LinearLayer(5) hiddenLayer1 = SigmoidLayer(11) hiddenLayer2 = SigmoidLayer(h2) outLayer = LinearLayer(1) n.addInputModule(inLayer) n.addModule(hiddenLayer1) n.addModule(hiddenLayer2) n.addOutputModule(outLayer) in_to_hidden = FullConnection(inLayer, hiddenLayer1) hidden_to_hidden = FullConnection(hiddenLayer1, hiddenLayer2) hidden_to_out = FullConnection(hiddenLayer2, outLayer) n.addConnection(in_to_hidden) n.addConnection(hidden_to_hidden) n.addConnection(hidden_to_out) n.sortModules() # training if verbose: print "beginning training" trainer = BackpropTrainer(n, train_ds) trainer.trainUntilConvergence(maxEpochs=max_epochs) ns[h2] = n # validation if verbose: print "beginning validation" out = n.activateOnDataset(val_ds) actual = val_ds['target'] error = np.sqrt(np.sum((out - actual)**2) / len(val_ds)) if verbose: print "RMSE:", error if min_error == -1 or error < min_error: min_error = error min_h = h2 if verbose: stop = time.time() print "Time:", stop - start # iterate through if verbose: print "best number of h2 nodes:", min_h out_test = ns[min_h].activateOnDataset(test_ds) return ns[h2], out_test
TD.gen() print(TD.X.shape) print(TD.y.shape) X_train = TD.X y_train = TD.y X_test = TD.X_test y_test = TD.y_test ds = SupervisedDataSet(TD.indim, 2) for i in range(TD.X.shape[0]): inp, outp = TD.X.iloc[i], TD.y.iloc[i] if outp == 1: outp = [1, 0] else: outp = [0, 1] ds.addSample(inp, outp) training_dataset, testing_dataset = ds.splitWithProportion(0.9) print(X_train.shape) print(X_test.shape) mmm = MMModel(name="model_test", n_pca=20, C_svr=1.0) #svc分类 fnn = buildNetwork(HISTORY, 200, 100, 50, 2, bias=True, hiddenclass=TanhLayer, outclass=SoftmaxLayer) trainer = make_trainer(fnn, training_dataset, 0.005) s_time = time.time()
inp2_vec = np.zeros((1, num_words)) out_vec = np.zeros((1, num_words)) for temp_list in sorted_list: inp1 = word_list.index(temp_list[0]) inp2 = word_list.index(temp_list[1]) out = word_list.index(temp_list[2]) inp1_vec = np.concatenate((inp1_vec, [inp[inp1, :]]), axis=0) inp2_vec = np.concatenate((inp2_vec, [inp[inp2, :]]), axis=0) out_vec = np.concatenate((out_vec, [inp[out, :]]), axis=0) inp_vec = np.concatenate((inp1_vec, inp2_vec), axis=1) #building the dataset dataset = SupervisedDataSet(2 * num_words, num_words) for i in range(len(sorted_list) + 1): dataset.addSample(inp_vec[i, :], out_vec[i, :]) tstdata, trndata = dataset.splitWithProportion(0.25) #building the network net = FeedForwardNetwork() input_layer = LinearLayer(2 * num_words, name='input_layer') hidden_layer = TanhLayer(num_words, name='hidden') output_layer = SigmoidLayer(num_words, name='output_layer') net.addInputModule(input_layer) net.addModule(hidden_layer) net.addOutputModule(output_layer) net.addConnection( FullConnection(input_layer, hidden_layer, name='in_to_hidden')) net.addConnection( FullConnection(hidden_layer, output_layer, name='hidden_to_out')) net.sortModules()
# create a dataset object, make output Y a softmax matrix allData = SupervisedDataSet(n, numLabels) Y2 = convertToOneOfMany(Y) # add data samples to dataset object, both ways are correct '''for i in range(m): inData = X[i,:] outData = Y2[i, :] allData.addSample(inData, outData) ''' allData.setField('input', X) allData.setField('target', Y2) #separate training and testing data dataTrain, dataTest = allData.splitWithProportion(.9) # create object for training train = BackpropTrainer(net, dataset=dataTrain, learningrate=0.03, momentum=0.3) #train.trainUntilConvergence(dataset=dataTrain) # evaluate correct output for trainer trueTrain = dataTrain['target'].argmax(axis=1) trueTest = dataTest['target'].argmax(axis=1) # train step by step EPOCHS = 60
# make neural network come into effect fnn.sortModules() # definite the dataset as two input , one output DS = SupervisedDataSet(1, 1) # add data element to the dataset for i in np.arange(199): DS.addSample(u[i], y[i]) # you can get your input/output this way X = DS['input'] Y = DS['target'] # split the dataset into train dataset and test dataset dataTrain, dataTest = DS.splitWithProportion(0.8) xTrain, yTrain = dataTrain['input'], dataTrain['target'] xTest, yTest = dataTest['input'], dataTest['target'] # train the NN # we use BP Algorithm # verbose = True means print th total error trainer = BackpropTrainer(fnn, dataTrain, verbose=True, learningrate=0.0001) # set the epoch times to make the NN fit trainer.trainUntilConvergence(maxEpochs=10000) # prediction = fnn.activate(xTest[1]) # print("the prediction number is :",prediction," the real number is: ",yTest[1]) predict_resutl = [] for i in np.arange(len(xTest)): predict_resutl.append(fnn.activate(xTest[i])[0])
# If the left-most bit of player_slot is set, # this player is on dire, so push the index accordingly player_slot = player['player_slot'] if player_slot < 128: hero_id += NUM_HEROES x[hero_id] = 1.0 ds.addSample(x, y) pbar.update(i) pbar.finish() print "Dataset built" print "Radiant {0}; Dire {1}".format(r, d) train_ds, test_ds = ds.splitWithProportion(1 - VALIDATION_RATIO - TEST_RATIO) valid_ds, test_ds = test_ds.splitWithProportion(VALIDATION_RATIO / (VALIDATION_RATIO + TEST_RATIO)) test_ds.saveToFile(TEST_FILE_NAME) valid_ds.saveToFile(VALIDATION_FILE_NAME) train_ds.saveToFile(TRAIN_FILE_NAME) print "Training, validation and test dataset built" # Network manipulation if isfile(NETWORK_TEMP_FILE_NAME) and isfile(NETWORK_VAL_FILE_NAME): net = NetworkReader.readFrom(NETWORK_TEMP_FILE_NAME) trainer = BackpropTrainer(net, train_ds, learningrate = 0.05) with open(NETWORK_VAL_FILE_NAME, "rb") as f: epoch, additional_left, best = load(f) print "Network loaded with best averge validation error {0}".format(best) else: net = buildNetwork(NUM_FEATURES, NUM_FEATURES + 100, 100, 20, 10, 1, \
def run(): import scipy from scipy import linalg f = open("modelfitDatabase1.dat", "rb") import pickle dd = pickle.load(f) node = dd.children[13] rfs = node.children[0].data["ReversCorrelationRFs"] pred_act = numpy.array(node.children[0].data["ReversCorrelationPredictedActivities"]) pred_val_act = numpy.array(node.children[0].data["ReversCorrelationPredictedValidationActivities"]) training_set = node.data["training_set"] validation_set = node.data["validation_set"] training_inputs = node.data["training_inputs"] validation_inputs = node.data["validation_inputs"] ofs = contrib.modelfit.fit_sigmoids_to_of(numpy.mat(training_set), numpy.mat(pred_act)) pred_act_t = contrib.modelfit.apply_sigmoid_output_function(numpy.mat(pred_act), ofs) pred_val_act_t = contrib.modelfit.apply_sigmoid_output_function(numpy.mat(pred_val_act), ofs) (sx, sy) = numpy.shape(rfs[0]) print sx, sy n = FeedForwardNetwork() inLayer = LinearLayer(sx * sy) hiddenLayer = SigmoidLayer(4) outputLayer = SigmoidLayer(1) n.addInputModule(inLayer) n.addModule(hiddenLayer) n.addOutputModule(outputLayer) in_to_hidden = RBFConnection(sx, sy, inLayer, hiddenLayer) # in_to_hidden = FullConnection(inLayer, hiddenLayer) hidden_to_out = FullConnection(hiddenLayer, outputLayer) n.addConnection(in_to_hidden) n.addConnection(hidden_to_out) n.sortModules() gradientCheck(n) return from pybrain.datasets import SupervisedDataSet ds = SupervisedDataSet(sx * sy, 1) val = SupervisedDataSet(sx * sy, 1) for i in xrange(0, len(training_inputs)): ds.addSample(training_inputs[i], training_set[i, 0]) for i in xrange(0, len(validation_inputs)): val.addSample(validation_inputs[i], validation_set[i, 0]) tstdata, trndata = ds.splitWithProportion(0.1) from pybrain.supervised.trainers import BackpropTrainer trainer = BackpropTrainer(n, trndata, momentum=0.1, verbose=True, learningrate=0.002) training_set = numpy.array(numpy.mat(training_set)[:, 0]) validation_set = numpy.array(numpy.mat(validation_set)[:, 0]) pred_val_act_t = numpy.array(numpy.mat(pred_val_act_t)[:, 0]) out = n.activateOnDataset(val) (ranks, correct, pred) = contrib.modelfit.performIdentification(validation_set, out) print "Correct:", correct, "Mean rank:", numpy.mean(ranks), "MSE", numpy.mean(numpy.power(validation_set - out, 2)) print "Start training" for i in range(50): trnresult = percentError(trainer.testOnData(), trndata) tstresult = percentError(trainer.testOnData(dataset=tstdata), tstdata) print "epoch: %4d" % trainer.totalepochs, " train error: %5.2f%%" % trnresult, " test error: %5.2f%%" % tstresult trainer.trainEpochs(1) out = n.activateOnDataset(val) (ranks, correct, pred) = contrib.modelfit.performIdentification(validation_set, out) print "Correct:", correct, "Mean rank:", numpy.mean(ranks), "MSE", numpy.mean( numpy.power(validation_set - out, 2) ) out = n.activateOnDataset(val) print numpy.shape(out) print numpy.shape(validation_set) (ranks, correct, pred) = contrib.modelfit.performIdentification(validation_set, out) print "Correct:", correct, "Mean rank:", numpy.mean(ranks), "MSE", numpy.mean(numpy.power(validation_set - out, 2)) (ranks, correct, pred) = contrib.modelfit.performIdentification(validation_set, pred_val_act_t) print "Correct:", correct, "Mean rank:", numpy.mean(ranks), "MSE", numpy.mean( numpy.power(validation_set - pred_val_act_t, 2) ) return n
data_file = fitsio.FITS('dr7qso.fit')[1].read() alldata = SupervisedDataSet(5, 1) length = len(data_file['UMAG']) #for i in range(NUM_DATA): for i in range(length): umag = data_file['UMAG'][i] gmag = data_file['GMAG'][i] rmag = data_file['RMAG'][i] imag = data_file['IMAG'][i] zmag = data_file['ZMAG'][i] redshift = data_file['z'][i] alldata.addSample((umag, gmag, rmag, imag, zmag), (redshift, )) trainval_ds, test_ds = alldata.splitWithProportion(0.8) train_ds, val_ds = trainval_ds.splitWithProportion(0.75) print "Train, validation, test:", len(train_ds), len(val_ds), len(test_ds) ns = {} min_error = -1 min_h = -1 # use validation to form 4-layer network with two hidden layers, # with (2n + 1) nodes in the first if not CACHED: for h2 in range(1, 5): start = time.time() print "h2 nodes:", h2
fonet = open('net', 'w') #load mat from matlab mat = sio.loadmat('Features.mat') #print(mat) X = mat['X'] y = mat['y'] length = X.shape[0] #set data alldata = SupervisedDataSet(14, 7) for n in arange(0, length): alldata.appendLinked(X[n], y[n]) #split data into test data and training data tstdata, trndata = alldata.splitWithProportion(0.25) #build network fnn = buildNetwork(trndata.indim, 100, trndata.outdim, outclass=SigmoidLayer, fast=True) #print fnn #build trainer trainer = BackpropTrainer(fnn, dataset=trndata, momentum=0.0, verbose=True, weightdecay=0.0)
trainIn = [] for x in row[:numberOfInputs]: trainIn.append(x) trainOut = [] for x in row[numberOfInputs:]: trainOut.append(x) d.appendLinked(trainIn, trainOut) # build a neural network with the second parameter being the number of hidden layers n = buildNetwork(d.indim, 3, d.outdim, recurrent=True) # configure the trainer t = BackpropTrainer(n, learningrate=0.01, momentum=0.99, verbose=True) # split the data randomly into 75% training - 25% testing train, test = d.splitWithProportion(0.75) print "{} - {}".format(len(train), len(test)) # train the data with n number of epochs t.trainOnDataset(train, 10) # test the data with the remaining data t.testOnData(test, verbose=True) # try the same test but with a different method net = buildNetwork(d.indim, 3, d.outdim, bias=True, hiddenclass=TanhLayer) trainer = BackpropTrainer(net, d) trainer.trainUntilConvergence(verbose=True)
x2=[(x-min(x2))/(max(x2)-min(x2)) for x in x2] x3=[(x-min(x3))/(max(x3)-min(x3)) for x in x3] y=[(x-min(y))/(max(y)-min(y)) for x in y] # transform x y be array format x=np.array([x1,x2,x3]).T y=np.array(y) xdim=x.shape[1] #input dimention ydim=1 #output dimention # create supervise dataset DS=SupervisedDataSet(xdim,ydim); for i in range(num): DS.addSample(x[i],y[i]) train,test=DS.splitWithProportion(0.75) # DS['input'] value of input x # DS['target'] value of output y # DS.clear() clear data # create nerve net ann=buildNetwork(xdim,10,5,ydim,hiddenclass=TanhLayer,outclass=LinearLayer) # BP train trainer=BackpropTrainer(ann,dataset=train,learningrate=0.1,momentum=0.1,verbose=True) # trainer.trainEpochs(epochs=20) #times of training trainer.trainUntilConvergence(maxEpochs=50) #times of training # forecast and draw # forecast test output=ann.activateOnDataset(test); # ann.activate(onedata) can only test one
from pybrain.structure import RecurrentNetwork from pybrain.structure import LinearLayer, LSTMLayer, SoftmaxLayer, SigmoidLayer from pybrain.structure import FullConnection from pybrain.supervised.trainers import BackpropTrainer from pybrain.datasets import SupervisedDataSet from pybrain.tools.xml.networkwriter import NetworkWriter from pybrain.utilities import percentError import data_parsing from scipy import array,where training_data = [] ds = SupervisedDataSet(28,39) training_data = data_parsing.conversion_to_one_hot_representation() ds = data_parsing.conversion_to_pybrain_dataset_format(training_data) test, train = ds.splitWithProportion( 0.25 ) n = RecurrentNetwork() input1 = LinearLayer(28) hidden1 = LSTMLayer(512) hidden2 = LSTMLayer(512) hidden3 = LSTMLayer(128) output1 = SigmoidLayer(39) output2 = LinearLayer(39) n.addInputModule(input1) n.addModule(hidden1) n.addModule(hidden2) n.addModule(hidden3) n.addModule(output1)
ltc = d.ltc.combine("LTC") # take our ltc dataframe, and get targets (prices in next 10 minutes) # in the form of compound return prices (other options are "PRICES", # which are raw price movements) dataset, tgt = dtools.gen_ds(ltc, 1, ltc_opts, "CRT") # initialize a pybrain dataset DS = SupervisedDataSet(len(dataset.values[0]), np.size(tgt.values[0])) # fill it for i in xrange(len(dataset)): DS.appendLinked(dataset.values[i], [tgt.values[i]]) # split 70% for training, 30% for testing train_set, test_set = DS.splitWithProportion(0.7) # build our recurrent network with 10 hidden neurodes, one recurrent # connection, using tanh activation functions net = RecurrentNetwork() hidden_neurodes = 10 net.addInputModule(LinearLayer(len(train_set["input"][0]), name="in")) net.addModule(TanhLayer(hidden_neurodes, name="hidden1")) net.addOutputModule(LinearLayer(len(train_set["target"][0]), name="out")) net.addConnection(FullConnection(net["in"], net["hidden1"], name="c1")) net.addConnection(FullConnection(net["hidden1"], net["out"], name="c2")) net.addRecurrentConnection(FullConnection(net["out"], net["hidden1"], name="cout")) net.sortModules() net.randomize() # train for 30 epochs (overkill) using the rprop- training algorithm
num_trials, num_features = X_successful.shape alldata = SupervisedDataSet(num_features,1) stimalldata = SupervisedDataSet(num_features,1) # add the features and class labels into the dataset for xnum in xrange(num_trials): alldata.addSample(X_successful[xnum,:],y_successful[xnum]) # add the features and dummy class labels into the stim dataset for xnum in xrange(len(ind_successful_stress_stim)): stimalldata.addSample(X_successful_stim[xnum,:],y_successful_stim[xnum]) # split the data into testing and training data tstdata_temp, trndata_temp = alldata.splitWithProportion(0.15) # small bug with _convertToOneOfMany function. This fixes that tstdata = ClassificationDataSet(num_features,1,nb_classes=2) for n in xrange(0, tstdata_temp.getLength()): tstdata.addSample(tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1]) trndata = ClassificationDataSet(num_features,1,nb_classes=2) for n in xrange(0,trndata_temp.getLength()): trndata.addSample(trndata_temp.getSample(n)[0],trndata_temp.getSample(n)[1]) valdata = ClassificationDataSet(num_features,1,nb_classes=2) for n in xrange(0,stimalldata.getLength()): valdata.addSample(stimalldata.getSample(n)[0],stimalldata.getSample(n)[1]) # organizes dataset for pybrain
ltc = d.ltc.combine("LTC") # take our ltc dataframe, and get targets (prices in next 10 minutes) # in the form of compound return prices (other options are "PRICES", # which are raw price movements) dataset, tgt = dtools.gen_ds(ltc, 1, ltc_opts, "CRT") # initialize a pybrain dataset DS = SupervisedDataSet(len(dataset.values[0]), np.size(tgt.values[0])) # fill it for i in xrange(len(dataset)): DS.appendLinked(dataset.values[i], [tgt.values[i]]) # split 70% for training, 30% for testing train_set, test_set = DS.splitWithProportion(.7) # build our recurrent network with 10 hidden neurodes, one recurrent # connection, using tanh activation functions net = RecurrentNetwork() hidden_neurodes = 10 net.addInputModule(LinearLayer(len(train_set["input"][0]), name="in")) net.addModule(TanhLayer(hidden_neurodes, name="hidden1")) net.addOutputModule(LinearLayer(len(train_set["target"][0]), name="out")) net.addConnection(FullConnection(net["in"], net["hidden1"], name="c1")) net.addConnection(FullConnection(net["hidden1"], net["out"], name="c2")) net.addRecurrentConnection( FullConnection(net["out"], net["hidden1"], name="cout")) net.sortModules() net.randomize()
# buying vhigh, high, med, low # maint vhigh, high, med, low # doors 2, 3, 4, 5more # persons 2, 4, more # lug_boot small, med, big # safety low, med, high with open(fname, "r") as f: reader = csv.reader(f) for row in reader: sample = (price[row[0]], price[row[1]], doors[row[2]], persons[row[3]], lug_boot[row[4]], safety[row[5]]) ds.addSample(sample, result[row[6]]) tst_ds, trn_ds = ds.splitWithProportion(0.2) # print "train data" # for inpt, target in trn_ds: # print inpt, target # print "test data" # for inpt, target in tst_ds: # print inpt, target # More information about trainers: http://pybrain.org/docs/api/supervised/trainers.html print "Training started" trainer.trainOnDataset(trn_ds, 10)
# center the data around the origin to # make poses in different locations look the same sittingData = centrePose(sittingData) uprightData = centrePose(uprightData) dataSet = SupervisedDataSet(18,1) # 0 target is sitting # 1 target is upright for i in xrange(sittingData.shape[2]): dataSet.addSample((sittingData[:,:,i].flatten()),(0,)) for i in xrange(uprightData.shape[2]): dataSet.addSample((uprightData[:,:,i].flatten()),(1,)) testSet, trainingSet = dataSet.splitWithProportion(0.25) testSet.saveToFile(rootPath+"/data/testSet") trainingSet.saveToFile(rootPath+"/data/trainingSet") for i in xrange(2,19): print "training network with " + str(i) + " neurons" network = buildNetwork(18,i,1) trainer = BackpropTrainer(network,dataset=trainingSet, momentum=0.1, verbose=True, weightdecay=0.01) trainer.trainUntilConvergence(dataset=None,maxEpochs = 40, verbose = True, continueEpochs=10,validationProportion=0.25) # save the network networkOutFile = open(rootPath+"/networks/network"+str(i)+".pkl","w") pickle.dump(network, networkOutFile)
#To have a nice dataset for visualization, we produce a set of # points in 2D belonging to three different classes. You could # also read in your data from a file, e.g. using pylab.load(). # means = [(-1,0),(2,4),(3,1)] # cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])] # alldata = ClassificationDataSet(2, 1, nb_classes=3) # for n in xrange(400): # for klass in range(3): # input = multivariate_normal(means[klass],cov[klass]) # alldata.addSample(input, [klass]) #Randomly split the dataset into 75% training and 25% test # data sets. Of course, we could also have created two different # datasets to begin with. tstdata, trndata = alldata.splitWithProportion( 0.25 ) #For neural network classification, it is highly advisable # to encode classes with one output neuron per class. # Note that this operation duplicates the original targets # and stores them in an (integer) field named 'class' trndata._convertToOneOfMany() tstdata._convertToOneOfMany() #Test our dataset by printing a little information about it. print "Number of training patterns: ", len(trndata) print "Input and output dimensions: ", trndata.indim, trndata.outdim print "First sample (input, target, class):" print trndata['input'][0], trndata['target'][0], trndata['class'][0] print alldata
from pybrain.structure import FullConnection n.addInputModule(SigmoidLayer(4, name='in')) n.addModule(SigmoidLayer(3, name='hidden')) n.addOutputModule(LinearLayer(1, name='out')) n.addConnection(FullConnection(n['in'], n['hidden'], name='c1')) n.addConnection(FullConnection(n['hidden'], n['out'], name='c2')) n.sortModules() #initialisation ## ----------------------- Trainer ---------------------------- ## from pybrain.supervised.trainers import BackpropTrainer tstdata, trndata = DS.splitWithProportion(0.25) # print len(tstdata) # print len(trndata) trainer = BackpropTrainer(n, DS, learningrate=0.1, momentum=0.5, weightdecay=0.0001) trainer.trainUntilConvergence(verbose=True, maxEpochs=100) # print trainer.trainUntilConvergence() # trainer.trainOnDataset(trndata, 100) #print n.activate((2, 1, 3, 0)) #print n.activate((2, 1, 3, 90)) ## ----------------------- Results & Performance mesurements ---------------------------- ##
continue purchase_Y = [] for j in range(oo): purchase_Y.append(Y1[i + j]) X = [0 for k in range(11)] for j in range(7): X[j] = Y1[i - j - 1] for j in range(4): X[7 + j] = Y1[i - j*7 - 14] DS.addSample(X,purchase_Y) X = DS['input'] Y = DS['target'] dataTrain,dataTest = DS.splitWithProportion(0.8) xTrain, yTrain = dataTrain['input'],dataTrain['target'] xTest, yTest = dataTest['input'],dataTest['target'] trainer = BackpropTrainer(fnn,dataTrain,verbose = True,learningrate = 0.01) trnerr,valerr = trainer.trainUntilConvergence(maxEpochs = 100) out = fnn.activateOnDataset(dataTest) i = 0 out = SupervisedDataSet(11,oo) temp = [0 for j in range(oo)] d = len(purchase) test = [0 for j in range(11)] for j in range(5): test[j] = Y1[d - j - 1]