def fit(self, X, y): y_train = np.array([[yn] for yn in y]) _, self.in_size = X.shape _, self.out_size = y_train.shape ds = SDS(self.in_size, self.out_size) ds.setField('input', X) ds.setField('target', y_train) self.net = buildNetwork(self.in_size, self.h_size, self.out_size, bias=True) trainer = BP(self.net, ds) print("start training ...") for n in xrange(self.epo): mse = trainer.train() rmse = sqrt(mse) if self.verbose: print("RMSE = %8.3f epoch = %d" % (rmse, n)) return self
def Predict(self, ticker, day): endDay = day-datetime.timedelta(1) startDay = endDay - datetime.timedelta(self.trainingPeriod) try: stockData = data.DataReader(ticker, 'yahoo', startDay, endDay) except: return [0] rawTrainFeatures = [] rawTrainResponses = [] for currentDay in range(self.windowLength, len(stockData)): window = stockData[currentDay-self.windowLength:currentDay] currentPrice = stockData.iloc[currentDay]['Open'] response = stockData.iloc[currentDay]['Close'] rawTrainFeatures.append(self.GetFeature(window)) rawTrainResponses.append(response) rawTestFeatures = self.GetFeature(stockData[len(stockData)-self.windowLength:len(stockData)]) # normalTrainFeatures, normalTestFeatures = self.NormalizeFeatures(rawTrainFeatures, rawTestFeatures) alldata = SupervisedDataSet(len(rawTrainFeatures[0]), 1) for index in range(0, len(rawTrainFeatures)): alldata.addSample(rawTrainFeatures[index],[rawTrainResponses[index]]) self.network = buildNetwork(alldata.indim, (alldata.indim+alldata.outdim)/2, alldata.outdim, hiddenclass=SigmoidLayer, outclass=LinearLayer) trainer = BackpropTrainer(self.network, dataset=alldata) activations = [] for i in range(50): for x in range(5): trainer.train() return float(self.network.activate(rawTestFeatures))
def NN_data(ts, max_lag): '''Function for creating a normalized dataset suitable for training PyBrain's neural networks from pandas Series object. Returns: dataset suitable for neural net training, max value of dataset for denormalization purposes''' ds = SupervisedDataSet(max_lag, 1) times = ts.index prices = [item for item in normalize(ts.values)[0]] target = list() for item in prices: target.append(item) input_cols = list() for i in range(1, max_lag+1): col = prices[:-i] while len(col) < len(prices): col = ['nan'] + list(col) input_cols.append(col) #convert input columns to input rows input_rows = zip(*input_cols) #Remove rows containing 'nan' input_rows = input_rows[max_lag:] target = target[max_lag:] for i in range(0, len(target)): ds.appendLinked(input_rows[i], target[i]) return ds, normalize(ts.values)[1]
def createXORData(self,inputdim,outputdim): self.data = SupervisedDataSet(inputdim,outputdim) self.data.addSample([1,1],[0]) self.data.addSample([1,0],[1]) self.data.addSample([0,1],[1]) self.data.addSample([0,0],[0])
def create_dataset(): dataset = SupervisedDataSet(1, 1) for x in arange(0, 4*pi, pi/30): dataset.addSample(x, sin(x)) return dataset
def ANN( trainFeature, trainLabel, testFeature, testLabel, netStructure, para_rate, para_momentum ): # netStructure is a list [in, hidden, out], momentum is a parameter in SGD sampleNum = trainFeature.shape[0] featureNum = trainFeature.shape[1] Dataset = SupervisedDataSet(featureNum, 1) i = 0 while i < sampleNum: print(i) Dataset.addSample(list(trainFeature[i]), [trainLabel[i]]) i += 1 Network = buildNetwork( netStructure[0], netStructure[1], netStructure[2], netStructure[3], hiddenclass=SigmoidLayer, outclass=SigmoidLayer, ) T = BackpropTrainer(Network, Dataset, learningrate=para_rate, momentum=para_momentum, verbose=True) # print(Dataset['input']) errorList = [] errorList.append(T.testOnData(Dataset)) T.trainOnDataset(Dataset) errorList.append(T.testOnData(Dataset)) T.trainOnDataset(Dataset) while abs(T.testOnData(Dataset) - errorList[-1]) > 0.0001: T.trainOnDataset(Dataset) errorList.append(T.testOnData(Dataset)) pass # this step is for the output of predictedLabel print(np.array([Network.activate(x) for x in trainFeature])) # print(testLabel) print(Network.activate([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])) return errorList
def fit(self, X, y): _, self.in_size = X.shape _, self.out_size = y.shape ds = SDS(self.in_size, self.out_size) ds.setField('input', X) ds.setField('target', y) self.net = buildNetwork(self.in_size, self.h_size, self.out_size, bias=True) trainer = BP(self.net, ds) print("start training ...") #mse = trainer.train() #trainer.trainUntilConvergence(verbose=True, maxEpochs=4) for n in xrange(self.epo): mse = trainer.train() rmse = sqrt(mse) print("RMSE = %8.3f epoch = %d" % (rmse, n)) return self
def train( train, label, custom_net=None, training_mse_threshold=0.40, testing_mse_threshold=0.60, epoch_threshold=10, epochs=100, hidden_size=20, ): # Test Set. x_train = train[0:split_at, :] y_train_slice = label.__getslice__(0, split_at) y_train = y_train_slice.reshape(-1, 1) x_test = train[split_at:, :] y_test_slice = label.__getslice__(split_at, label.shape[0]) y_test = y_test_slice.reshape(-1, 1) # Shape. input_size = x_train.shape[1] target_size = y_train.shape[1] # prepare dataset ds = SDS(input_size, target_size) ds.setField("input", x_train) ds.setField("target", y_train) # prepare dataset ds_test = SDS(input_size, target_size) ds_test.setField("input", x_test) ds_test.setField("target", y_test) min_mse = 1000000 # init and train if custom_net == None: net = buildNetwork(input_size, hidden_size, target_size, bias=True) else: print "Picking up the custom network" net = custom_net trainer = RPropMinusTrainer(net, dataset=ds, verbose=False, weightdecay=0.01, batchlearning=True) print "training for {} epochs...".format(epochs) for i in range(epochs): mse = trainer.train() print "training mse, epoch {}: {}".format(i + 1, math.sqrt(mse)) p = net.activateOnDataset(ds_test) mse = math.sqrt(MSE(y_test, p)) print "-- testing mse, epoch {}: {}".format(i + 1, mse) pickle.dump(net, open("current_run", "wb")) if min_mse > mse: print "Current minimum found at ", i pickle.dump(net, open("current_min_epoch_" + model_file, "wb")) min_mse = mse pickle.dump(net, open(model_file, "wb")) return net
def __init__(self, domain, mode, iters, ensemble_size, trial_number): self.domain = domain self.mode = mode self.iters = iters self.ensemble_size = ensemble_size self.trial_number = trial_number self.iteration = 0 seed = abs(hash(self)) numpy.random.seed(seed) random.seed(seed) seed = abs(hash(random.random())) numpy.random.seed(seed) random.seed(seed) print 'Seeding %d' % seed self.ensemble = Ensemble(self.ensemble_size, domain.inputdim, self.NUM_HIDDEN1, self.NUM_HIDDEN2, domain.outputdim) (self.eval_dataset, self.eval_costset) = self.domain.make_evaluation_datasets() # used in run() self.train_dataset = SupervisedDataSet(domain.inputdim, domain.outputdim) self.current_error = 0.0 self.current_avg_cost = 0.0 self.current_error_times_avg_cost = 0.0
def readFromExcel(inCols,targetCols, numRows, fileName, offset=0, sheet=0, dataSet=None, conversionFun=None): """Populates a given dataset or creates a new SupervisedDataSet from an exccel file. inCols = array of colum numbers containing the input data colums, colums are indexed from 0 targetCols = array of colum numbers containing the target data colums, colums are indexed from 0 numRows = the number of rows ofs data fileName= the name of the excel file offset = the row the vaild data starts at sheet = the sheet of the workbook the data is on, indexed from 0 as it is in xlrd dataSet = the dataset to be populated, a SupervisedDataSet if created if it is None conversionFun = used to preprocess data. """ book = open_workbook(fileName) sheet=book.sheet_by_index(sheet) if dataSet is None: dataSet=SupervisedDataSet(len(inCols),len(targetCols)) for r in range(offset,(offset+numRows)): input=[] target=[] for inC in inCols: input.append(sheet.cell_value(r,inC)) for tC in targetCols: target.append(sheet.cell_value(r,tC)) try: if conversionFun: input=[conversionFun(i) for i in input] target=[conversionFun(t) for t in target] print input,target dataSet.addSample(input, target) except Exception: print 'rejected row {}'.format(r) return dataSet
def test(self, arr): # load model net, std_scale = pickle.load(open(self.model_file, 'rb')) print 'Finish loading model' # Load test data x_test, y_test = load_data(arr) x_test_scaled = std_scale.transform( x_test) # Normalize to standard normal y_test_dummy = np.zeros(y_test.shape) input_size = x_test_scaled.shape[1] target_size = y_test.shape[1] assert (net.indim == input_size) assert (net.outdim == target_size) # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_test_scaled) ds.setField('target', y_test_dummy) # predict print 'Activating ds' p = net.activateOnDataset(ds) print 'debug' # ptest = preprocessing.StandardScaler().fit_transform(p) # p_scaled = std_scale.inverse_transform(ptest) # Convert back to original scale dna = self.convert_to_dna(p) return dna
def train_fn(trainfile, hiddennodes, output_model_file): hidden_size = hiddennodes print 'Loading data..' x_train, y_train = load_data(trainfile) input_size = x_train.shape[1] target_size = y_train.shape[1] # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_train) ds.setField('target', y_train) # init and train net = buildNetwork(input_size, hidden_size, target_size, bias=True, hiddenclass=SigmoidLayer, outclass=SigmoidLayer) trainer = BackpropTrainer(net, ds) print 'Training..' trainer.trainUntilConvergence(validationProportion=0.15, maxEpochs=1000, continueEpochs=10) print 'Finish training. Serializing model...' pickle.dump(net, open(output_model_file, 'wb'))
def validate(X, y, net): # Test Set. x_test = X[split_at:, :] y_test = y.__getslice__(split_at, y.shape[0]) y_test = y_test.reshape(-1, 1) # you'll need labels. In case you don't have them... y_test_dummy = np.zeros(y_test.shape) input_size = x_test.shape[1] target_size = y_test.shape[1] assert (net.indim == input_size) assert (net.outdim == target_size) # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_test) ds.setField('target', y_test) # predict p = net.activateOnDataset(ds) mse = MSE(y_test, p) print "testing MSE:", mse np.savetxt(output_predictions_file, p, fmt='%.6f')
def test_train(self, epochs=1): print("Training...") split = int(len(self.samples) * 0.7) train_samples = self.samples[0:split] train_labels = self.labels[0:split] test_samples = self.samples[split:] test_labels = self.labels[split:] net = buildNetwork(300, 300, 1) ds = SupervisedDataSet(300, 1) for i in range(len(train_samples)): ds.addSample(tuple(np.array(train_samples[i], dtype='float64')), (train_labels[i],)) trainer = BackpropTrainer(net, ds, verbose=True) trainer.trainEpochs(epochs) self.totalEpochs = epochs error = 0 counter = 0 for i in range(0, 100): output = net.activate(tuple(np.array(test_samples[i], dtype='float64'))) if round(output[0]) != test_labels[i]: counter += 1 print(counter, " : output : ", output[0], " real answer : ", test_labels[i]) error += 1 else: counter += 1 print(counter, " : output : ", output[0], " real answer : ", test_labels[i]) print("Trained with " + str(epochs) + " epochs; Total: " + str(self.totalEpochs) + ";") return error
def predict(isGroup): path_test_file = '/home/rodolfo/Projetos/NeuralNetwork/data/test_groups_%s_file.csv' % isGroup path_neural_network = 'model_groups_%s.pkl' % isGroup test_file = path_test_file model_file = path_neural_network output_predictions_file = 'predictions_file.txt' # load model net = pickle.load(open(model_file, 'rb')) # load data test = np.loadtxt(test_file, delimiter=',') x_test = test[:, 0:-1] y_test = test[:, -1] y_test = y_test.reshape(-1, 1) # you'll need labels. In case you don't have them... y_test_dummy = np.zeros(y_test.shape) input_size = x_test.shape[1] target_size = y_test.shape[1] assert (net.indim == input_size) assert (net.outdim == target_size) # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_test) ds.setField('target', y_test_dummy) # predict p = net.activateOnDataset(ds) np.savetxt(output_predictions_file, p, fmt='%.6f')
def predict(X, net): # Test Set. x_test = X[:, :] # you'll need labels. In case you don't have them... y_test_dummy = np.zeros((X.shape[0], 1)) input_size = x_test.shape[1] target_size = y_test_dummy.shape[1] assert (net.indim == input_size) assert (net.outdim == target_size) # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_test) ds.setField('target', y_test_dummy) p = net.activateOnDataset(ds) print p.shape np.savetxt("1_" + output_predictions_file, p, fmt='%.6f') s = pd.Series(p[:, 0]) s.index += 1 s.to_csv('neural_prediction_3.csv', header=['Prediction'], index=True, index_label='ID')
def createDataSet(self, trainInput, trainOut): ds = SupervisedDataSet(trainInput.shape[1], 1) # adhoc - no first input element # adding all train samples to dataset for x in range(len(trainInput)): #for x in range(len(trainInput)-1): ds.addSample(trainInput[x], trainOut[x]) # ds.addSample(trainInput[x + 1], trainOut[x]) return ds
def __init__(self, indim, targetdim): SupervisedDataSet.__init__(self, indim, targetdim) # add field that stores the beginning of a new episode self.addField('sequence_index', 1) self.append('sequence_index', 0) #self.data['sequence_index'] = zeros((0, 1), int) self.currentSeq = 0
def train_net(data_set, n, epochs=1): num_inputs = len(data_set[0][0][n]) ds = SupervisedDataSet(num_inputs, 2) for i in range(len(data_set)): try: ds.appendLinked(data_set[i][0][n], (data_set[i][1], data_set[i][2])) except: continue print str(len(ds)) + ' points successfully aquired' net = FeedForwardNetwork() net.addInputModule(LinearLayer(num_inputs, name='input')) net.addInputModule(BiasUnit(name='bias')) net.addOutputModule(LinearLayer(2, name='output')) net.addModule(SigmoidLayer(int((num_inputs + 2) / 2.), name='sigmoid')) net.addModule(TanhLayer(10, name='tanh')) net.addConnection(FullConnection(net['bias'], net['sigmoid'])) net.addConnection(FullConnection(net['bias'], net['tanh'])) net.addConnection(FullConnection(net['input'], net['sigmoid'])) net.addConnection(FullConnection(net['sigmoid'], net['tanh'])) net.addConnection(FullConnection(net['tanh'], net['output'])) net.sortModules() trainer = BackpropTrainer(net, learningrate=0.01, momentum=0.1, verbose=True) trainer.trainOnDataset(ds) trainer.trainEpochs(epochs) return net
def main(): # criando o dataset, onde os dados de entrada no dataset será um vetor de tamanho 2 # e o dado de saída será um escalar dataset = SupervisedDataSet(2, 1) criandoDataset(dataset) # criando a rede neural # onde terá, respectivamente, a quantidade de entrada na rede # quantidade de neurônios na camada intermediária # dimensão de saída da rede # utilizando uma adaptação da rede ao longo do tempo network = buildNetwork(dataset.indim, 4, dataset.outdim, bias=True) # criando o método de treino da rede # passando a rede # passando o dataset # passando a taxa de aprendizado # aumentando o cálculo que maximiza o treinamento da rede trainer = BackpropTrainer(network, dataset, learningrate=0.01, momentum=0.99) # looping que faz o treino da função for epocas in range(0, 1000): trainer.train() # realizando o teste datasetTeste = SupervisedDataSet(2, 1) criandoDataset(datasetTeste) trainer.testOnData(datasetTeste, verbose=True)
def anntrain(xdata,ydata):#,epochs): #print len(xdata[0]) ds=SupervisedDataSet(len(xdata[0]),1) #ds=ClassificationDataSet(len(xdata[0]),1, nb_classes=2) for i,algo in enumerate (xdata): ds.addSample(algo,ydata[i]) #ds._convertToOneOfMany( ) esto no net= FeedForwardNetwork() inp=LinearLayer(len(xdata[0])) h1=SigmoidLayer(1) outp=LinearLayer(1) net.addOutputModule(outp) net.addInputModule(inp) net.addModule(h1) #net=buildNetwork(len(xdata[0]),1,1,hiddenclass=TanhLayer,outclass=SoftmaxLayer) net.addConnection(FullConnection(inp, h1)) net.addConnection(FullConnection(h1, outp)) net.sortModules() trainer=BackpropTrainer(net,ds)#, verbose=True)#dataset=ds,verbose=True) #trainer.trainEpochs(40) trainer.trainOnDataset(ds,40) #trainer.trainUntilConvergence(ds, 20, verbose=True, validationProportion=0.15) trainer.testOnData()#verbose=True) #print 'Final weights:',net.params return net
def retrain(N, dataset, net): ds = SupervisedDataSet(20, 20) for data in dataset: ds.addSample(data[0], data[1]) trainer = BackpropTrainer(net, ds) for i in range(N): trainer.train() return net
def make_ds_with_samples(sample_subset): ds = SupervisedDataSet(len(features.word_list), len(features.class_list)) ds_labels = [] for sample_features, target, label in sample_subset: ds.addSample(sample_features, target) ds_labels.append(label) return (ds, ds_labels)
def main(): train_file = 'data/train.csv' # validation_file = 'data/validation.csv' output_model_file = 'model.xml' # hidden_size = 4 epochs = 500 # load data # def loadData(): train = np.loadtxt(train_file, delimiter=' ') Input = train[0:,0:3] Output = train[0:,3:5] # validation = np.loadtxt(validation_file, delimiter=',') # train = np.vstack((train, validation)) # x_train = train[:, 0:-1] # y_train = train[:, -1] # y_train = y_train.reshape(-1, 1) # input_size = x_train.shape[1] # target_size = y_train.shape[1] # prepare dataset # def prepare dataset(input_size, target_size): ds = SDS(Input,Output) # ds.addSample(input_size) # ds.setField('input', x_train) # ds.setField('target', y_train) # init and train # def initTrain(input_size, hidden_size, input, output): # net = buildNetwork(input_size, hidden_size, target_size, bias=True) net = buildNetwork(3, # input layer 4, # hidden0 2, # output hiddenclass=SigmoidLayer, outclass=SigmoidLayer, bias=True ) net = NetworkReader.readFrom('model.xml') for i,o in zip(Input,Output): ds.addSample(i,o) print i, o trainer = BackpropTrainer(net, ds) print "training for {} epochs...".format(epochs) for i in range(epochs): mse = trainer.train() rmse = sqrt(mse) print "training RMSE, epoch {}: {}".format(i + 1, rmse) if os.path.isfile("../stopfile.txt") == True: break NetworkWriter.writeToFile(net, output_model_file)
def buildTrainingSet(gydataset): #最后的训练数据 trainingset = SupervisedDataSet(15, 1) for line in gydataset: trainingset.addSample( (line[0], line[1], line[2], line[3], line[4], line[5], line[6], line[7], line[8], line[9], line[10], line[11], line[12], line[13], line[14]), line[15]) return trainingset
def train(): print "-------------------------------------------------" print "loading data..." print "file to be loaded: ", train_file # regresa un ndarray de numpy train = np.loadtxt( train_file, delimiter = ',' ) print "data loaded to a ", type(train), " of size: ", train.shape, " and type:", train.dtype print "Spliting inputs and output for training..." inputs_train = train[:,0:-1] output_train = train[:,-1] output_train = output_train.reshape( -1, 1 ) print "inputs in a ", type(inputs_train), " of size: ", inputs_train.shape, " and type:", inputs_train.dtype print "output in a ", type(output_train), " of size: ", output_train.shape, " and type:", output_train.dtype print "-------------------------------------------------" print "Setting up supervised dataset por pyBrain training..." input_size = inputs_train.shape[1] target_size = output_train.shape[1] dataset = SDS( input_size, target_size ) dataset.setField( 'input', inputs_train ) dataset.setField( 'target', output_train ) print "-------------------------------------------------" print "Setting up supervised dataset por pyBrain training..." hidden_size = 50 epochs = 600 crime_network = buildNetwork( input_size, hidden_size, target_size, bias = True, hiddenclass = SigmoidLayer, outclass = LinearLayer ) trainer = BackpropTrainer( crime_network,dataset ) print "-------------------------------------------------" rmse_vector = [] print "training for {} epochs...".format( epochs ) for i in range( epochs ): mse = trainer.train() rmse = sqrt( mse ) print "training RMSE, epoch {}: {}".format( i + 1, rmse ) rmse_vector.append(rmse) print "-------------------------------------------------" pickle.dump( crime_network, open( output_model_file, 'wb' )) print "Training done!" print "-------------------------------------------------" return rmse_vector
def CV_NN(X_train, Y, N_CV=1, test_sze=0.3, n_middle = 14): hidden_size = n_middle sss = cross_validation.StratifiedShuffleSplit( Y, N_CV, test_size=test_sze, random_state=0) overall_accuracy = 0 overall_error = 0 confusion_matrix = np.zeros((7, 7), dtype=np.int) for train_block, test_block in sss: x_train=X_train.as_matrix()[train_block] input_size = x_train.shape[1] y_vals = Y[train_block] y_train=np.zeros((len(y_vals),7)) for i,y in enumerate(y_vals): y_train[i][y-1]=1 target_size = y_train.shape[1] # print x_train.shape, y_train.shape ds = SDS( input_size, target_size) ds.setField( 'input', x_train) ds.setField( 'target', y_train) net = buildNetwork( input_size, hidden_size, target_size, bias = True, hiddenclass=SigmoidLayer, outclass=SoftmaxLayer ) trainer = BackpropTrainer( net, ds, learningrate=0.1, verbose=True) trainer.trainUntilConvergence( verbose = False, validationProportion = 0.2, maxEpochs = 64, continueEpochs = 4 ) trainer = BackpropTrainer( net, ds, learningrate=0.05, verbose=True) trainer.trainUntilConvergence( verbose = False, validationProportion = 0.2, maxEpochs = 64, continueEpochs = 8 ) trainer = BackpropTrainer( net, ds, learningrate=0.01, verbose=True) trainer.trainUntilConvergence( verbose = False, validationProportion = 0.2, maxEpochs = 512, continueEpochs = 16 ) trainer = BackpropTrainer( net, ds, learningrate=0.005, verbose=True) trainer.trainUntilConvergence( verbose = False, validationProportion = 0.2, maxEpochs = 1024, continueEpochs = 64 ) y_vals = Y[test_block] y_test=np.zeros((len(y_vals),7)) for i,y in enumerate(y_vals): y_test[i][y-1]=1 x_test = X_train.as_matrix()[test_block] ds = SDS( input_size, target_size) ds.setField( 'input', x_test ) ds.setField( 'target', y_test ) Y_predict = net.activateOnDataset( ds ) y_predict=Y_predict.argmax(axis=1) y_test=y_vals-1 accuracy = (y_test == y_predict).mean() for x, y in zip(y_test, y_predict): confusion_matrix[x - 1, y - 1] += 1 overall_accuracy += accuracy overall_error += accuracy * accuracy confusion_matrix *= 1.0 / N_CV print confusion_matrix overall_accuracy *= 1.0 / N_CV overall_error = np.sqrt( (overall_error / N_CV - overall_accuracy ** 2) / N_CV) print overall_accuracy, overall_error
def train_cross_validate(train, label, custom_net=None, training_mse_threshold=0.40, testing_mse_threshold=0.60, epoch_threshold=10, epochs=100, hidden_size=50): # Test Set. x_train = train[0:split_at, :] y_train_slice = label.__getslice__(0, split_at) y_train = y_train_slice.reshape(-1, 1) x_test = train[split_at:, :] y_test_slice = label.__getslice__(split_at, label.shape[0]) y_test = y_test_slice.reshape(-1, 1) # Shape. input_size = x_train.shape[1] target_size = y_train.shape[1] input_size_test = x_test.shape[1] target_size_test = y_test.shape[1] # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_train) ds.setField('target', y_train) # prepare dataset ds_test = SDS(input_size, target_size) ds_test.setField('input', x_test) ds_test.setField('target', y_test) min_mse = 1000000 # init and train if custom_net == None: net = buildNetwork(input_size, hidden_size, target_size, bias=True, hiddenclass=TanhLayer) else: print "Picking up the custom network" net = custom_net trainer = RPropMinusTrainer(net, dataset=ds, verbose=True, weightdecay=0.01, batchlearning=True) print "training for {} epochs...".format(epochs) for i in range(epochs): mse = trainer.train() print "training mse, epoch {}: {}".format(i + 1, mse) p = net.activateOnDataset(ds_test) mse = MSE(y_test, p) print "-- testing mse, epoch {}: {}".format(i + 1, mse) pickle.dump(net, open("current_run", 'wb')) if min_mse > mse: print "Current minimum found at ", i pickle.dump(net, open("current_min_epoch_" + model_file, 'wb')) min_mse = mse pickle.dump(net, open(model_file, 'wb')) return net
def train(N, dataset): ds = SupervisedDataSet(20, 20) for data in dataset: ds.addSample(data[0], data[1]) net = buildNetwork(20, 20, 20, bias=True, hiddenclass=TanhLayer) trainer = BackpropTrainer(net, ds) for i in range(N): sys.stdout.write("Progress: %d/%d \r" % (i, N)) sys.stdout.flush() trainer.train() return net
def train(train_select, validate_select, aggregate_ttrss): train = pd_to_numpy(train_select, aggregate_ttrss) validation = pd_to_numpy(validate_select, aggregate_ttrss) output_model_file = 'model.pkl' hidden_size = 20 epochs = 10 train = np.vstack((train, validation)) x_train = train[:, 0:-1] y_train = train[:, -1] y_train = y_train.reshape(-1, 1) y_train = y_train.reshape(-1, 1) print(x_train, y_train) input_size = x_train.shape[1] target_size = y_train.shape[1] # print (input_size, target_size) # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_train) ds.setField('target', y_train) # init and train # fnn = FeedForwardNetwork() net = buildNetwork( input_size, hidden_size, target_size, bias=True, ) # net = NNregression(ds) trainer = BackpropTrainer(net, ds, verbose=True, weightdecay=0.01) print("training for {} epochs...".format(epochs)) print(input_size, target_size, x_train, y_train) # plt.axis([0, epochs, 0, 0.03]) # plt.xlabel('epoch') # plt.ylabel('error') # plt.ion() for i in range(epochs): mse = trainer.train() rmse = sqrt(mse) # plt.scatter(i, rmse, s=5) # plt.pause(0.00001) print("training RMSE, epoch {}: {}".format(i + 1, rmse)) pickle.dump(net, open(output_model_file, 'wb')) return net
def update_neural_network(self, old_state, old_value, new_state,action, reward): desired_value = old_value + self.learning_rate * (reward + self.discount_factor * self.get_best_action(new_state)[1] - old_value) ds = SupervisedDataSet(self.states_and_actions_num,1) ds.addSample(old_state + action, desired_value) trainer = BackpropTrainer(self.neural_network,ds) trainer.train()
def createTrainingData(self,filename,inputdim, outputdim): """ create training data by reading our log file inputdim = inputdimension of data outputdim = output dim expected """ self.data = SupervisedDataSet(inputdim,outputdim) textFile = loadtxt(filename, delimiter=",") for line in textFile: self.data.addSample(line[:inputdim], line[-outputdim:])
def nn(train_source, test_source, validation=False, v_size=0.5): hidden_size = 100 epochs = 600 # load data train = read_csv(train_source) tmp = open(train_source) feature_count = None for line in tmp: feature_count = len(line.split(",")) break trainX = np.asarray(train[range(1, feature_count)]) trainY = np.asarray(train[[0]]).ravel() # print "All Data size: " + str(len(trainX)) testX = None testY = None if validation: # --- CROSS VALIDATION --- trainX, testX, trainY, testY = cross_validation.train_test_split( trainX, trainY, test_size=v_size, random_state=0) else: # --- TEST DATA --- test = read_csv(test_source) testX = np.asarray(test[range(1, feature_count)]) testY = np.asarray(test[[0]]).ravel() # print testX # print testY input_size = len(trainX[0]) target_size = 1 print input_size print target_size # prepare dataset ds = SDS( input_size, target_size ) ds.setField( 'input', trainX ) ds.setField( 'target', [[item] for item in trainY] ) # init and train net = buildNetwork( input_size, hidden_size, target_size, bias = True ) trainer = BackpropTrainer(net, ds) print "training for {} epochs...".format(epochs) for i in range( epochs ): mse = trainer.train() rmse = sqrt(mse) print "training RMSE, epoch {}: {}".format(i + 1, rmse)
def neuralNetwork(X, Y): print "Creating dataset..." ds = SupervisedDataSet(len(X[0]), 1) for x, y in zip(X, Y): ds.addSample(x, y) print "Creating neural network..." n = buildNetwork(ds.indim, int(ds.indim), ds.outdim) print "Training neural network..." t = BackpropTrainer(n, ds, verbose=True) errors = t.trainUntilConvergence(maxEpochs=10) return n
def montaRede(dadosEntrada, dadosSaida): """ Função na qual def :param dadosEntrada: parâmetros de entrada na rede neural :param dadosSaida: parâmetros de saída da rede neural :return: retorna a rede de treinamento treinada e os dados supervisionados """ entradaTreino = np.concatenate( (dadosEntrada[:35], dadosEntrada[50:85], dadosEntrada[100:135])) saidaTreino = np.concatenate( (dadosSaida[:35], dadosSaida[50:85], dadosSaida[100:135])) entradaTeste = np.concatenate( (dadosEntrada[35:50], dadosEntrada[85:100], dadosEntrada[135:])) saidaTeste = np.concatenate( (dadosSaida[35:50], dadosSaida[85:100], dadosSaida[135:])) treinaRede(entradaTreino, saidaTreino) # criando o dataset de treinamento # serão 4 dados de entrada # será um dado de saída treinamento = treinaRede(entradaTreino, saidaTreino) # rede neural do tamanho do treinamento # com 2 neurônios na camada intermediária # com o dado de output sendo o tamanho da rede # utilizando bias redeNeural = buildNetwork(treinamento.indim, 2, treinamento.outdim, bias=True) # criando a rede neural treinada redeNeuralTreinada = BackpropTrainer(redeNeural, treinamento, learningrate=0.3, momentum=0.9) for epocas in range(0, 10000): redeNeuralTreinada.train() teste = SupervisedDataSet(4, 1) for i in range(len(entradaTeste)): teste.addSample(entradaTeste[i], saidaTeste[i]) return redeNeuralTreinada, teste
def make_evaluation_datasets(self): eval_dataset = SupervisedDataSet(self.inputdim, self.outputdim) eval_costset = SupervisedDataSet(self.inputdim, self.outputdim) f_sim = open('simdata/evalset.txt') f_input = open('../data/funcvalue.txt', 'w') f_input_cost = open('../data/funccost.txt', 'w') for line in f_sim: line_segs = line.split() x = line_segs[0] y = line_segs[1] dist = float(line_segs[2]) angle = line_segs[3] if dist < 0: cost = self.COST_HIGH else: cost = self.COST_LOW eval_dataset.addSample([x, y], [dist, angle]) eval_costset.addSample([x, y], [cost]) f_input.write('%s %s %f\n' % (x, y, dist)) f_input_cost.write('%s %s %f\n' % (x, y, cost)) f_input.close() f_input_cost.close() return (eval_dataset, eval_costset)
def absorb(self, winner, **kwargs): self.total_sim += 1 ds = SupervisedDataSet(self.features_num, 2) for who, s0, s1 in self.observation: if who != Board.STONE_BLACK: continue input_vec = self.get_input_values(s0, s1, who) val = self.net.activate(input_vec) plays = val[1] * self.total_sim + 1 wins = val[0] * self.total_sim if who == winner: wins += 1 ds.addSample(input_vec, (wins, plays)) self.trainer.trainOnDataset(ds)
def predict_proba(self, X): row_size, in_size = X.shape y_test_dumy = np.zeros([row_size, self.out_size]) assert (self.net.indim == in_size) ds = SDS(in_size, self.out_size) ds.setField('input', X) ds.setField('target', y_test_dumy) p = self.net.activateOnDataset(ds) return p
def buildDataset(filenames, history=2, # how many snapshots into the past? ): D = SupervisedDataSet(set_feats + history * snap_feats, num_targ) for fname in filenames: rundata = quickload(fname) snapshots = rundata['snapshots'] settings = rundata['setting'] for i in range(len(snapshots) - history - 1): inp = parseFeatures(settings, snapshots[i:i + history]) prevtarget = parseTarget(snapshots[i + history-1]) nexttarget = parseTarget(snapshots[i + history]) # percentage gain target = (-nexttarget+prevtarget)/(nexttarget+prevtarget)/2. D.addSample(inp, [target]) return D
def createTrainingData(self,filename,inputdim, outputdim): """ create training data by reading file=filename inputdim = inputdimension of data outputdim = output dim expected """ if filename is not None: finaldf = pd.read_csv(paths+filename, parse_dates=[0], delimiter=";",index_col=0); finaldf = finaldf.reset_index() finaldf['hour'] = pd.DatetimeIndex(finaldf['TIMESTAMP']).hour for col in finaldf: if(col not in ['TIMESTAMP','hour']): print col print "hhhhhhhhhhhhhhhhhhh" finaldf[col] /= finaldf[col].iloc[0].astype(np.float64) print finaldf.head(10) #split data into percentages msk = np.random.rand(len(finaldf)) < self.train_percent train = finaldf[msk].copy() test = finaldf[~msk].copy() test = test.reset_index() train = train.reset_index() self.train_input = train[inputparams] self.train_output = train[outputparams] #normalize train_output #self.train_output = 1. * self.train_output/self.train_output.max() #print self.train_output.head(10) self.test_input = test[inputparams] self.test_output = test[outputparams] self.data = SupervisedDataSet(inputdim,outputdim) totalLength = len(self.train_input) for line in xrange(0,totalLength-1): #print self.train_input.values[line], self.train_output.values[:,0][line] self.data.addSample(self.train_input.values[line], self.train_output.values[:,0][line]) print "data loaded..."
def NetworkTrain(trainDataSet, mnetwork=NetworkBuild(), file='NetworkDump.pkl',maxEpochs=100): mnetwork = NetworkBuild(new = True) assert len(mnetwork[0].inmodules) == len(mnetwork[1].keys()) print('DEBUG') #print(trainDataSet) print("lens " + str(len(trainDataSet[0][0])) + " " + str(len(mnetwork[0].inmodules))) # 定义数据集的格式 DS = SupervisedDataSet(len(trainDataSet[0][0]), len(trainDataSet[0][1])) for itrainDataSet in trainDataSet: indata = itrainDataSet[0] outdata = itrainDataSet[1] DS.addSample(indata, outdata) # 如果要获得里面的输入/输出时,可以用 # 如果要把数据集切分成训练集和测试集,可以用下面的语句,训练集:测试集=8:2 # 为了方便之后的调用,可以把输入和输出拎出来 # 训练器采用BP算法 # verbose = True即训练时会把Total error打印出来,库里默认训练集和验证集的比例为4:1,可以在括号里更改 mnetwork[0].sortModules() trainer = BackpropTrainer(mnetwork[0], DS, verbose=True, learningrate=0.01) # 0.0575 # maxEpochs即你需要的最大收敛迭代次数,这里采用的方法是训练至收敛,我一般设为1000 trainer.trainUntilConvergence(maxEpochs=maxEpochs) ''' for mod in mnetwork[0].modules: print "Module:", mod.name if mod.paramdim > 0: print "--parameters:", mod.params for conn in mnetwork[0].connections[mod]: print "-connection to", conn.outmod.name if conn.paramdim > 0: print "- parameters", conn.params if hasattr(mnetwork[0], "recurrentConns"): print "Recurrent connections" for conn in mnetwork[0].recurrentConns: print "-", conn.inmod.name, " to", conn.outmod.name if conn.paramdim > 0: print "- parameters", conn.params ''' pickle.dump(mnetwork, open(file, 'wb')) return mnetwork
def treinaRede(entradaTreino, saidaTreino): """ Função que cria o método de treino da rede :param entradaTreino: dados de entrada do treino :param saidaTreino: dados de saída do treino :return: treinamento : objeto que diz qual será o treino da rede """ # serão 4 dados de entrada # será um dado de saída treinamento = SupervisedDataSet(4, 1) for i in range(len(entradaTreino)): treinamento.addSample(entradaTreino[i], saidaTreino[i]) return treinamento
def buildTrainingSet(dataset): # gy = dataset[:,:-1]; # X_scaled = preprocessing.scale(gy) # gydataset = np.hstack((X_scaled,dataset[:,-1:])); gydataset = dataset; # print(gydataset[:,5:6]); #最后的训练数据 trainingset = SupervisedDataSet(15, 2); for line in gydataset: if line[-1] == 0: trainingset.addSample((line[0],line[1],line[2],line[3],line[4],line[5],line[6],line[7],line[8],line[9],line[10],line[11],line[12],line[13],line[14]), (0,0)); elif line[-1] == 1: trainingset.addSample((line[0],line[1],line[2],line[3],line[4],line[5],line[6],line[7],line[8],line[9],line[10],line[11],line[12],line[13],line[14]), (0,1)); elif line[-1] == 2: trainingset.addSample((line[0],line[1],line[2],line[3],line[4],line[5],line[6],line[7],line[8],line[9],line[10],line[11],line[12],line[13],line[14]), (1,0)); elif line[-1] == 3: trainingset.addSample((line[0],line[1],line[2],line[3],line[4],line[5],line[6],line[7],line[8],line[9],line[10],line[11],line[12],line[13],line[14]), (1,1)); return trainingset;
def apply_updates(self): dataset = SupervisedDataSet(self.inputdim, self.outputdim) for (si, ai) in self.updates.iterkeys(): si_ai = '%s-%s' % (si, ai) network_in = self.network_inputs[si_ai] current_value = self.get_network_value(None, None, si_ai) new_value = [ a + b for a, b in zip(current_value, self.updates[(si, ai)]) ] dataset.addSample(network_in, new_value) if PRINT_GAME_RESULTS: print 'updating (%s, %s) from %s to %s' % ( si, ai, map(PrettyFloat, current_value), map(PrettyFloat, new_value)) # import pdb; pdb.set_trace() if dataset: # len(dataset) > 0: self.trainer.setData(dataset) self.trainer.trainEpochs(NTD_TRAIN_EPOCHS)
def test_train(self, epochs=1): print("Training...") # split the array in a way that the net will be # trained with 70% of the images and # tested with the rest split = int(len(self.samples) * 0.7) train_samples = self.samples[0:split] train_labels = self.labels[0:split] test_samples = self.samples[split:] test_labels = self.labels[split:] # build the net with 300 input values representing # each pixel of the 10x10 image (100 values) # and its Red,Green,Blue values (3 values) net = buildNetwork(300, 300, 1) ds = SupervisedDataSet(300, 1) for i in range(len(train_samples)): ds.addSample(tuple(np.array(train_samples[i], dtype='float64')), (train_labels[i], )) trainer = BackpropTrainer(net, ds, verbose=True) trainer.trainEpochs(epochs) self.totalEpochs = epochs error = 0 counter = 0 for i in range(0, 100): output = net.activate( tuple(np.array(test_samples[i], dtype='float64'))) if round(output[0]) != test_labels[i]: counter += 1 print(counter, " : output : ", output[0], " real answer : ", test_labels[i]) error += 1 else: counter += 1 print(counter, " : output : ", output[0], " real answer : ", test_labels[i]) print("Trained with " + str(epochs) + " epochs; Total: " + str(self.totalEpochs) + ";") return error
def train_net(): t = load_image_arr('example.jpg') # example of image used by network #print('Resized t length:', len(t)) global net net = buildNetwork(len(t), len(t), 1) ds = SupervisedDataSet(len(t), 1) for test_dir in get_folders_in(INIT_FOLDER, full=True): img_class = os.path.basename(test_dir) for test_pic in get_list_of_files(test_dir, '.jpg'): #print('Adding {0} with class {1}'.format(test_pic, img_class)) ds.addSample(load_image_arr(test_pic), (img_class, )) # <- class trainer = BackpropTrainer(net, ds) error = 10 iteration = 0 while error > 0.1: error = trainer.train() iteration += 1 yield 'Iteration: {0}. Error: {1}'.format(iteration, error)
class NeuralNet: def __init__(self): self.net = None self.data_set = None self.trainer = None self.inputs = None self.targets = None def build(self, inputs, hidden, output): self.inputs = inputs self.targets = output self.net = buildNetwork(inputs, hidden, output, bias=True) def create_data_set(self): self.data_set = SupervisedDataSet(self.inputs, self.targets) def add_list_of_data(self, list_of_data, data_class): for dt in list_of_data: self.data_set.addSample(dt, data_class) def train(self): self.trainer = BackpropTrainer(self.net, self.data_set, learningrate=0.01) error = 10000 iteration = 0 while error > 0.001: error = self.trainer.train() print "Iteration: {0} Error {1}".format(iteration, error) iteration += 1 def save_to_file(self, filename): with open(filename, 'w') as f: pickle.dump(self.net, f) def load_from_file(self, filename): with open(filename, 'r') as f: self.net = pickle.load(f) def apply_over_data(self, data): return self.net.activate(data)
def test_train(self, epochs=1): print("Training...") # split the array in a way that the net will be # trained with 70% of the images and # tested with the rest split = int(len(self.samples) * 0.7) train_samples = self.samples[0:split] train_labels = self.labels[0:split] test_samples = self.samples[split:] test_labels = self.labels[split:] # build the net with 300 input values representing # each pixel of the 10x10 image (100 values) # and its Red,Green,Blue values (3 values) net = buildNetwork(300, 300, 1) ds = SupervisedDataSet(300, 1) for i in range(len(train_samples)): ds.addSample(tuple(np.array(train_samples[i], dtype='float64')), (train_labels[i],)) trainer = BackpropTrainer(net, ds, verbose=True) trainer.trainEpochs(epochs) self.totalEpochs = epochs error = 0 counter = 0 for i in range(0, 100): output = net.activate(tuple(np.array(test_samples[i], dtype='float64'))) if round(output[0]) != test_labels[i]: counter += 1 print(counter, " : output : ", output[0], " real answer : ", test_labels[i]) error += 1 else: counter += 1 print(counter, " : output : ", output[0], " real answer : ", test_labels[i]) print("Trained with " + str(epochs) + " epochs; Total: " + str(self.totalEpochs) + ";") return error
def sim(self, board): visited_path = [] state = board winner = Board.STONE_EMPTY for _ in range(1, self.max_moves + 1): moves, player, _ = Game.possible_moves(state) state_new, state_new_val = self.get_best(state, moves, player) visited_path.append((player, state, state_new, state_new_val)) over, winner, _ = state_new.is_over(state) if over: break state = state_new self.total_sim += 1 ds = SupervisedDataSet(self.features_num, 2) for player, state, new, val in visited_path: plays = val[1] * self.total_sim + 1 wins = val[0] * self.total_sim if player == winner: wins += 1 ds.addSample(self.get_input_values(state, new, player), (wins, plays)) self.trainer.trainOnDataset(ds)
def import_network(self, filename): train_samples = self.samples train_labels = self.labels np.random.seed(0) np.random.shuffle(train_samples) np.random.seed(0) np.random.shuffle(train_labels) self.net_shared = NetworkReader.readFrom(filename) self.ds_shared = SupervisedDataSet(300, 1) for i in range(len(train_samples)): self.ds_shared.addSample(tuple(np.array(train_samples[i], dtype='float64')), (train_labels[i],)) self.trainer_shared = BackpropTrainer(self.net_shared, self.ds_shared, verbose=True)
def train_clean(self, epochs=1): print("Training...") self.totalEpochs = epochs train_samples = self.samples train_labels = self.labels self.net_shared = buildNetwork(300, 300, 1) self.ds_shared = SupervisedDataSet(300, 1) for i in range(len(train_samples)): self.ds_shared.addSample(tuple(np.array(train_samples[i], dtype='float64')), (train_labels[i],)) self.trainer_shared = BackpropTrainer(self.net_shared, self.ds_shared, verbose=True) self.trainer_shared.trainEpochs(epochs) print("Trained with " + str(epochs) + " epochs; Total: " + str(self.totalEpochs) + ";")
def ready_supervised_dataset(self, dataset): """ Ready the supervised dataset for training. @TODO: Need to randomize the data being fed to the network. See randomBatches() here: http://pybrain.org/docs/api/datasets/superviseddataset.html """ self.network_dataset = SupervisedDataSet(len(self.train_data), 1) # Currently only supports log function for normalizing data training_values = np.log(dataset.data_frame[self.train_data]) results = np.log(dataset.data_frame[self.prediction_data].shift(-self.prediction_window)) training_values['PREDICTION_%s' %self.prediction_data[0]] = results training_values = training_values.dropna() for _, row_data in enumerate(training_values.iterrows()): _, data = row_data sample = list(data[:-1]) result = [data[-1]] self.network_dataset.addSample(sample, result)