def entrenarSomnolencia(red): #Se inicializa el dataset ds = SupervisedDataSet(4096,1) """Se crea el dataset, para ello procesamos cada una de las imagenes obteniendo los rostros, luego se le asignan los valores deseados del resultado la red neuronal.""" print "Somnolencia - cara" for i,c in enumerate(os.listdir(os.path.dirname('/home/taberu/Imágenes/img_tesis/somnoliento/'))): try: im = cv2.imread('/home/taberu/Imágenes/img_tesis/somnoliento/'+c) pim = pi.procesarImagen(im) cara = d.deteccionFacial(pim) if cara == None: print "No hay cara" else: print i ds.appendLinked(cara.flatten(),10) except: pass trainer = BackpropTrainer(red, ds) print "Entrenando hasta converger" trainer.trainUntilConvergence() NetworkWriter.writeToFile(red, 'rna_somnolencia.xml')
class dataset: # Initialize the dataset with input and label size def __init__(self, inputsize, labelsize): self.inputsize = inputsize self.labelsize = labelsize self.DS = SupervisedDataSet(self.inputsize, self.labelsize) # Adds data to existing training dataset def addTrainingData(self,inputdata, labeldata): try: if inputdata.size == self.inputsize and labeldata.size == self.labelsize: self.DS.appendLinked(inputdata, labeldata) return 1 except AttributeError: print "Input error." return 0 def getTrainingDataset(self): return self.DS def generateDataSet(self): for line in fileinput.input(['data/inputdata']): x = line.split(':') self.addTrainingData(ft.feature.getImageFeatureVector(x[0]),np.array([int(x[1])])) return 1
class dataset: # Initialize the dataset with input and label size def __init__(self, inputsize, labelsize): self.inputsize = inputsize self.labelsize = labelsize self.DS = SupervisedDataSet(self.inputsize, self.labelsize) # Adds data to existing training dataset def addTrainingData(self,inputdata, labeldata): try: if inputdata.size == self.inputsize and labeldata.size == self.labelsize: self.DS.appendLinked(inputdata, labeldata) return 1 except AttributeError: print "Input error." return 0 def getTrainingDataset(self): return self.DS def generateDataSet(self): for line in fileinput.input(['data/inputdata3.txt']): x = line.split(':') # print ft.feature.getImageFeatureVector(x[0]),np.array([int(x[1])]) self.addTrainingData(ft.feature.getImageFeatureVector(x[0]),np.array([int(x[1])])) return 1
def neuralNetwork_eval_func(self, chromosome): node_num, learning_rate, window_size = self.decode_chromosome(chromosome) if self.check_log(node_num, learning_rate, window_size): return self.get_means_from_log(node_num, learning_rate, window_size)[0] folded_dataset = self.create_folded_dataset(window_size) indim = 21 * (2 * window_size + 1) mean_AUC = 0 mean_decision_value = 0 mean_mcc = 0 sample_size_over_thousand_flag = False for test_fold in xrange(self.fold): test_labels, test_dataset, train_labels, train_dataset = folded_dataset.get_test_and_training_dataset(test_fold) if len(test_labels) + len(train_labels) > 1000: sample_size_over_thousand_flag = True ds = SupervisedDataSet(indim, 1) for i in xrange(len(train_labels)): ds.appendLinked(train_dataset[i], [train_labels[i]]) net = buildNetwork(indim, node_num, 1, outclass=SigmoidLayer, bias=True) trainer = BackpropTrainer(net, ds, learningrate=learning_rate) trainer.trainUntilConvergence(maxEpochs=self.maxEpochs_for_trainer) decision_values = [net.activate(test_dataset[i]) for i in xrange(len(test_labels))] decision_values = map(lambda x: x[0], decision_values) AUC, decision_value_and_max_mcc = validate_performance.calculate_AUC(decision_values, test_labels) mean_AUC += AUC mean_decision_value += decision_value_and_max_mcc[0] mean_mcc += decision_value_and_max_mcc[1] if sample_size_over_thousand_flag: break if not sample_size_over_thousand_flag: mean_AUC /= self.fold mean_decision_value /= self.fold mean_mcc /= self.fold self.write_log(node_num, learning_rate, window_size, mean_AUC, mean_decision_value, mean_mcc) self.add_log(node_num, learning_rate, window_size, mean_AUC, mean_decision_value, mean_mcc) return mean_AUC
def createDataset2(nInputs,inputSize,nOutputs): index = 1 ds = SupervisedDataSet(inputSize,nOutputs) i = 0 j = 0 pList =candleGen() print len(pList) input = [] z = 0 for sub in pList: if nInputs == j: break elif i < inputSize: input.append(sub[index]) i = i+1 elif i == inputSize: ds.appendLinked(input,sub[index]) input.pop(0) input.append(sub[index]) j = j + 1 i = i + 1 else: ds.appendLinked(input,sub[index]) input.pop(0) input.append(sub[index]) j = j + 1 return ds
def get_supervised_dataset(race_data, race_factors): race_bins = get_bins(race_data) race_bin_groups = pd.DataFrame.from_dict(race_bins).groupby('race_id') # Input, ouput data_set = SupervisedDataSet(6, 15) for race_id, race_bin in race_bin_groups: # Skipe bins with fewer than 10% race population if not np.count_nonzero(race_bin.population_pct) > 10: continue race_factor = race_factors[race_factors.race_id == race_id] # If race has missing factor data then skip if race_factor.empty: continue input_factors = [first(race_factor.high_temp) / 100.0, first(race_factor.low_temp) / 100.0, first(race_factor.high_humidity) / 100.0, first(race_factor.low_humidity) / 100.0, first(race_factor.starting_elevation) / 10000.0, first(race_factor.gross_elevation_gain) / 10000.0 ] output_factors = race_bin.population_pct.tolist() data_set.appendLinked(input_factors, output_factors) return data_set
def fit(self): trainds = SupervisedDataSet(self.INPUT_SIZE, 1) for i in range(self.str_train, self.end_train): trainds.appendLinked(self.data[i-self.INPUT_SIZE:i],self.data[i]) trainer = BackpropTrainer(self.net, trainds, learningrate=self.eta, weightdecay=self.lmda, momentum=0.1, shuffle=False) trainer.trainEpochs(self.epochs) trainer = None
def fit(self): trainds = SupervisedDataSet(self.INPUT_SIZE, 1) for i in range(self.str_train, self.end_train): trainds.appendLinked(self.data[i-self.INPUT_SIZE:i],self.data[i]) trainer = BackpropTrainer(self.net, trainds, learningrate=self.eta, weightdecay=self.lmda, momentum=0.1, shuffle=False) trainer.trainEpochs(self.epochs) trainer = None
def buildDataSet(fTrainSet): ds = SupervisedDataSet(8, 1) for row in fTrainSet: inVec = row[2:10] tarVec = row[10] ds.appendLinked(inVec, tarVec) return ds
def main(T=10, load_brain=False, save_brain=False): singles = [room for room in rooms.allRooms if room.capacity == "Single"] preprocessed = preprocess_rooms(singles) all_vectors = [room_to_feature_vector(room, preprocessed) for room in singles] training_sequences = getLabeledRoomsFeaturesAndLabels(getRoomsMap(singles, all_vectors)) input_units = len(all_vectors[0]) if load_brain and "net" in brain_shelf: net = brain_shelf["net"] net.sorted = False net.sortModules() else: net = FeedForwardNetwork() layer_in = LinearLayer(input_units) layer_hidden = SigmoidLayer(1000) layer_hidden2 = SigmoidLayer(100) layer_out = LinearLayer(1) net.addInputModule(layer_in) net.addModule(layer_hidden) net.addModule(layer_hidden2) net.addOutputModule(layer_out) in_to_hidden = FullConnection(layer_in, layer_hidden) hidden_to_hidden = FullConnection(layer_hidden, layer_hidden2) hidden_to_out = FullConnection(layer_hidden2, layer_out) net.addConnection(in_to_hidden) net.addConnection(hidden_to_hidden) net.addConnection(hidden_to_out) net.sortModules() training_data = SupervisedDataSet(len(all_vectors[0]), 1) for training_seq in training_sequences: training_data.appendLinked(training_seq[1], training_seq[2]) trainer = BackpropTrainer(net, training_data) for i in xrange(T): error = trainer.train() print "Training iteration %d. Error: %f" % (i + 1, error) if save_brain: brain_shelf["net"] = net labeled_rooms = [] for i, vector in enumerate(all_vectors): labeled_rooms.append((singles[i], net.activate(vector))) available_rooms = available.get_available_rooms() labeled_rooms.sort(key=lambda x: -x[1]) for room, label in labeled_rooms: if room.num in available_rooms: print "%16.12f: %s" % (label, room)
def do_evaluate(eval_data, folds_number, iter_number): eval_set = SupervisedDataSet(len(feats), 1) for inst in eval_data: eval_set.appendLinked(inst.features(), [inst.class_idx()]) res = evaluate(net_placeholder[0], eval_set) with open(os.path.join("results", str(folds_number) + ".net." + str(iter_number) + ".obj"), "w") as f: pickle.dump(res, f) res = evaluate_base(eval_set) with open(os.path.join("results", str(folds_number) + ".base." + str(iter_number) + ".obj"), 'w') as f: pickle.dump(res, f) print res
def load_from_file(filename): input_size = 9 output_size = 1 dataset = SupervisedDataSet(input_size, output_size) with open(filename, 'r') as datafile: for line in datafile: data = line.strip().split(' ') dataset.appendLinked( tuple(data[:input_size]), tuple(data[-output_size:])) return dataset
def gettraining(self): DS = SupervisedDataSet(self.datainput, 8) for trn in self.training: inf = open(trn,'r') for line in inf: val = line.split(' ', 2) index = self.fileindex[val[0]] if index>=10: input=self.fftfile(val[0]) output=self.tobit(int(val[1])) DS.appendLinked(input, output) inf.close() return DS
def buildDataset(path,indexes): f = open(path) ds = SupervisedDataSet(len(indexes[0]),len(indexes[1])) indexin,indexout = indexes for line in f.readlines(): outline = [float(x) for x in line.split('\t')[:-1]] inpt,outpt = [],[] for i in indexin: inpt.append(outline[i]) for i in indexout: outpt.append(outline[i]) ds.appendLinked(inpt,outpt) return ds
def create_NN_classifier(genes, positive_dataset, negative_dataset): maxEpochs_for_trainer = 60 node_num, learning_rate, window_size = genes node_num, learning_rate, window_size = int(node_num), float(learning_rate), int(window_size) train_labels, train_dataset = create_train_labels_and_dataset(positive_dataset, negative_dataset) indim = 21 * (2 * window_size + 1) ds = SupervisedDataSet(indim, 1) for i in xrange(len(train_labels)): ds.appendLinked(train_dataset[i], [train_labels[i]]) net = buildNetwork(indim, node_num, 1, outclass=SigmoidLayer, bias=True) trainer = BackpropTrainer(net, ds, learningrate=learning_rate) trainer.trainUntilConvergence(maxEpochs=maxEpochs_for_trainer) return net
def _generate_Pybrain_DS(self): vect_stream = [] for word in self.sent_stream: vect_stream.append(self._word_to_vec(word)) to_conv = zip(vect_stream, vect_stream[1:]) to_conv.append((vect_stream[-1], vect_stream[0])) #add wrap around DS = SupervisedDataSet(29,29) for inp, targ in to_conv: DS.appendLinked(inp,targ) return DS
def create_int_dataset(n_input, n_output, codecs): ds = SupervisedDataSet(n_input, n_output) if n_input == 3 * 1: for i in range(0, len(codecs), 1): if i + 1 < len(codecs): ds.appendLinked(list(codecs[i]), list(codecs[i + 1])) elif n_input == 3 * 3: for i in range(0, len(codecs), 1): if i + 3 < len(codecs): ds.appendLinked( list(codecs[i] + codecs[i + 1] + codecs[i + 2]), list(codecs[i + 3])) elif n_input == 3 * 5: for i in range(0, len(codecs), 1): if i + 6 < len(codecs): ds.appendLinked( list(codecs[i] + codecs[i + 1] + codecs[i + 2] + codecs[i + 3] + codecs[i + 4]), list(codecs[i + 5])) elif n_input == 3 * 8: for i in range(0, len(codecs), 1): if i + 9 < len(codecs): ds.appendLinked( list(codecs[i] + codecs[i + 1] + codecs[i + 2] + codecs[i + 3] + codecs[i + 4] + codecs[i + 5] + codecs[i + 6] + codecs[i + 7]), list(codecs[i + 8])) else: print 'not implemented yet' return return ds
def trainNetwork(net, data): dimension = WINDOW_SIZE ds = SupervisedDataSet(dimension, dimension) num_windows = int(len(data) / WINDOW_SIZE) for offset in range(0, num_windows): lower = offset * WINDOW_SIZE upper = min(len(data), (offset + 1) * WINDOW_SIZE) test_input = rfft(np.copy(data[lower:upper])) test_input.shape = (1, WINDOW_SIZE) test_output = np.copy(test_input) ds.appendLinked(test_input, test_output) trainer = BackpropTrainer(net, dataset=ds) for i in range(10): print("epoch {}".format(i)) trainer.trainEpochs(1)
def getSeparateDataSets(testSize = 0.2): trnDs = ClassificationDataSet(len(feats), nb_classes=len(classes)) tstDs = SupervisedDataSet(len(feats), 1) for c in classes: with codecs.open(os.path.join(data_root, c+".txt"), 'r', 'utf8') as f: lines = f.readlines() breakpoint = (1.0 - testSize) * len(lines) for i in range(len(lines)): r = Record("11", lines[i], c, "") if i < breakpoint: trnDs.appendLinked(r.features(), [r.class_idx()]) else: tstDs.appendLinked(r.features(), [r.class_idx()]) trnDs._convertToOneOfMany([0, 1]) return trnDs, tstDs
def create_NN_classifier(genes, positive_dataset, negative_dataset): maxEpochs_for_trainer = 60 node_num, learning_rate, window_size = genes node_num, learning_rate, window_size = int(node_num), float( learning_rate), int(window_size) train_labels, train_dataset = create_train_labels_and_dataset( positive_dataset, negative_dataset) indim = 21 * (2 * window_size + 1) ds = SupervisedDataSet(indim, 1) for i in xrange(len(train_labels)): ds.appendLinked(train_dataset[i], [train_labels[i]]) net = buildNetwork(indim, node_num, 1, outclass=SigmoidLayer, bias=True) trainer = BackpropTrainer(net, ds, learningrate=learning_rate) trainer.trainUntilConvergence(maxEpochs=maxEpochs_for_trainer) return net
def neuralNetwork_eval_func(self, chromosome): node_num, learning_rate, window_size = self.decode_chromosome( chromosome) if self.check_log(node_num, learning_rate, window_size): return self.get_means_from_log(node_num, learning_rate, window_size)[0] folded_dataset = self.create_folded_dataset(window_size) indim = 21 * (2 * window_size + 1) mean_AUC = 0 mean_decision_value = 0 mean_mcc = 0 sample_size_over_thousand_flag = False for test_fold in xrange(self.fold): test_labels, test_dataset, train_labels, train_dataset = folded_dataset.get_test_and_training_dataset( test_fold) if len(test_labels) + len(train_labels) > 1000: sample_size_over_thousand_flag = True ds = SupervisedDataSet(indim, 1) for i in xrange(len(train_labels)): ds.appendLinked(train_dataset[i], [train_labels[i]]) net = buildNetwork(indim, node_num, 1, outclass=SigmoidLayer, bias=True) trainer = BackpropTrainer(net, ds, learningrate=learning_rate) trainer.trainUntilConvergence(maxEpochs=self.maxEpochs_for_trainer) decision_values = [ net.activate(test_dataset[i]) for i in xrange(len(test_labels)) ] decision_values = map(lambda x: x[0], decision_values) AUC, decision_value_and_max_mcc = validate_performance.calculate_AUC( decision_values, test_labels) mean_AUC += AUC mean_decision_value += decision_value_and_max_mcc[0] mean_mcc += decision_value_and_max_mcc[1] if sample_size_over_thousand_flag: break if not sample_size_over_thousand_flag: mean_AUC /= self.fold mean_decision_value /= self.fold mean_mcc /= self.fold self.write_log(node_num, learning_rate, window_size, mean_AUC, mean_decision_value, mean_mcc) self.add_log(node_num, learning_rate, window_size, mean_AUC, mean_decision_value, mean_mcc) return mean_AUC
def trainer(dataTrain, dataTest, hiddenLayer, validationProportion, epochs): # criação do data set com 9 dados de entrada e 1 de saida dsTrain = SupervisedDataSet(9, 1) dsTest = SupervisedDataSet(9, 1) # adicionando os dados ao dataset train for x in dataTrain: dsTrain.appendLinked(x[:9], x[9:]) # adicionando os dados ao dataset test for x in dataTest: dsTest.appendLinked(x[:9], x[9:]) # criação da rede net = buildNetwork(9, hiddenLayer, 1, bias=True) # treinamento da rede neural trainer = BackpropTrainer(net, dsTrain) trainer.train() trainer.trainUntilConvergence(verbose=False, validationProportion=validationProportion, maxEpochs=epochs) # testando rede resTrain = net.activateOnDataset(dsTrain) resTest = net.activateOnDataset(dsTest) # verificando resultado hitTrain = [] for index, x in enumerate(resTrain): hitTrain.append(int(round(x[0], 0)) == dataTrain[index][9]) hitTest = [] for index, x in enumerate(resTest): hitTest.append(int(round(x[0], 0)) == dataTest[index][9]) resultTrain = Counter(hitTrain) resultTest = Counter(hitTest) return Resultado( { 'acertos': resultTrain[True] / len(hitTrain), 'erros': resultTrain[False] / len(hitTrain) }, { 'acertos': resultTest[True] / len(hitTest), 'erros': resultTest[False] / len(hitTest) })
def NN_get_nextcase(self, NNcases): ds_e = SupervisedDataSet( self.NN_input_size, self.NN_target_size ) nextcase = NNcases[0] evalue = None for NNcase in NNcases: ds_e.appendLinked( self.gen_NN_input(NNcase[1]), None ) e = self.NN_net.activateOnDataset( ds_e )[0][0] if evalue is None: evalue = e if self.rank_attrs['sort'].lower()=='ascend': if e<evalue: evalue = e nextcase = NNcase elif self.rank_attrs['sort'].lower()=='descend': if e>evalue: evalue = e nextcase = NNcase return nextcase + (evalue, )
def NN_get_nextcase(self, NNcases): ds_e = SupervisedDataSet(self.NN_input_size, self.NN_target_size) nextcase = NNcases[0] evalue = None for NNcase in NNcases: ds_e.appendLinked(self.gen_NN_input(NNcase[1]), None) e = self.NN_net.activateOnDataset(ds_e)[0][0] if evalue is None: evalue = e if self.rank_attrs['sort'].lower() == 'ascend': if e < evalue: evalue = e nextcase = NNcase elif self.rank_attrs['sort'].lower() == 'descend': if e > evalue: evalue = e nextcase = NNcase return nextcase + (evalue, )
def createDs(func): outputMax = -np.inf outputMin = np.inf global outputMax global outputMin ds = SupervisedDataSet(2, 1) for j in range(N): for i in range(N): input = [i, j] output = func(j, i) # math.sqrt(i**2+j**2) ds.appendLinked(input, output) if output > outputMax: outputMax = output if outputMin > output: outputMin = output ds.outputMax = outputMax ds.outputMin = outputMin return ds
def train(self, training_set): train_points = [self._get_x(t) for t in training_set] train_distances = get_elements(self._distances, training_set) predictor_points = train_distances#[a + b for a, b in zip(train_distances, train_points)] #print(predictor_points) ensembles = get_elements(self._best_ensemble_by_time, training_set) ens_combinations = [self._ens_combinatinons[ens] for ens in ensembles] print(self._m_count + self.p_count, len(self._models_combinations)) ds = SupervisedDataSet(self._m_count , len(self._models_combinations))#+ self.p_count, len(self._models_combinations)) for input_data, target in list(zip(predictor_points, ens_combinations)): print(input_data, target) ds.appendLinked(input_data, target) trainer = BackpropTrainer(self._ann, ds) trainer.trainEpochs(100)
def createDataset(nInputs,inputSize,nOutputs): index = 0 ds = SupervisedDataSet(inputSize,nOutputs) i = 0 j = 0 pList =candleGen() input = [] for sub in pList: if nInputs == j: break if i < inputSize: input.append(sub[index]) else: ds.appendLinked(input,sub[index]) input = [] input.append(sub[index]) i = 0 j = j + 1 i = i + 1 return ds
def buildDataSet(fTrainSet): ds = SupervisedDataSet(len(homeDict) + len(awayDict) + 8, 1) for row in fTrainSet: homeTeam = [0]*len(homeDict) if row[0] in homeDict.keys(): homeTeam[homeDict[row[0]]] = 1 else: homeTeam[-1] = 1 #"other" awayTeam = [0]*len(awayDict) if row[1] in awayDict.keys(): awayTeam[awayDict[row[1]]] = 1 else: awayTeam[-1] = 1 #"other" inVec = homeTeam + awayTeam + row[2:10] tarVec = row[10] ds.appendLinked(inVec, tarVec) return ds
def createDataset3(pList, nInputs,inputSize,nOutputs): index = 1 ds = SupervisedDataSet(inputSize,nOutputs) i = 0 j = 0 input = [] z = 0 for sub in pList: val = normalize(sub[index]) if nInputs == j: break elif i < inputSize: input.append(val) i = i+1 else: ds.appendLinked(input,val) input.pop(0) input.append(val) j = j + 1 return ds
def get_data(path_to_data): print 'Loading data from', path_to_data data_file = open(path_to_data, 'r+b') mmap_file = mmap.mmap(data_file.fileno(), 0) summary = 0 header = [int(item) for item in mmap_file.readline().split(' ')] counter = 0 pbar = progressbar.ProgressBar(maxval=header[0]) pbar.start() line = mmap_file.readline() data = SupervisedDataSet(header[1], header[2]) while line != '': data_line = [float(item) for item in line.split(' ')] line = mmap_file.readline() result_line = [float(item) for item in line.split(' ')] line = mmap_file.readline() data.appendLinked(data_line, result_line) counter += 1 pbar.update(counter) pbar.finish() print 'Data successfuly loaded' return [header, data]
def createDs(func): outputMax = -np.inf outputMin = np.inf global outputMax global outputMin ds = SupervisedDataSet( 1, 1 ) #for j in range(N): rs = [] for i in range(N): rs.append(random.random()) rs.sort() for r in rs: input = r#[i] output = nonLinearFunc(r) ds.appendLinked(input , output) if output > outputMax: outputMax = output if outputMin > output: outputMin = output ds.outputMax = outputMax ds.outputMin = outputMin return ds
def entrenarO(red): #Se inicializa el dataset ds = SupervisedDataSet(4096,1) """Se crea el dataset, para ello procesamos cada una de las imagenes obteniendo las figuras, luego se le asignan los valores deseados del resultado la red neuronal.""" print "O - Figura" for i,c in enumerate(os.listdir(os.path.dirname('C:\\Users\\LuisD\\Desktop\\Reconocimiento\\prueba/'))): try: im = cv2.imread('C:\\Users\\LuisD\\Desktop\\Reconocimiento\\prueba/'+c) cv2.resize(im,(64,64)) pim = pi.ProcesarImagen(im) ds.appendLinked(pim.flatten(),10) except: pass print len(ds) print i,c trainer = BackpropTrainer(red, ds) print "Entrenando hasta converger" trainer.trainUntilConvergence() NetworkWriter.writeToFile(red, 'rna_o.xml')
def trainMinError(trainer,dsV,minTrainer=None,batch_size=0,epochs=50,plotErr=False,i0=0): dsT = trainer.ds if minTrainer == None: minTrainer = deepcopy(trainer) for i in range(epochs): if batch_size==0: ds = dsT else: ds = SupervisedDataSet(len(dsT['input'][0]),len(dsT['target'][0])) data = zip(dsT['input'],dsT['target']) shuffle(data) for k in range(batch_size): ds.appendLinked(data[k][0],data[k][1]) trainer.ds = ds trainer.train() TE = trainer.testOnData(dsT) VE = trainer.testOnData(dsV) MVE = minTrainer.testOnData(dsV) if VE<MVE: minTrainer = BackpropTrainer(deepcopy(trainer.module),dsT) if plotErr: plotError(i+i0,TE,VE,MVE) trainer.ds = dsT return minTrainer
def fit(self, X, y): ''' Train the model. :param X: list of numpy arrays representing the training samples. :param y: numpy array representing the training samples' true values. ''' input_dimension = len(X[0]) self.nnw = buildNetwork(input_dimension, input_dimension * self.hidden_layer_multiplier, 1) if util.VERBOSE: print 'Generating neural network data set:' print '\tInput layer dimension: %d' % input_dimension print '\tHidden layer dimension: %d ' % ( input_dimension * self.hidden_layer_multiplier) # Create a data set for training samples with input_demnsion number of features, and outputs with dimension 1. data_set = SupervisedDataSet(input_dimension, 1) for i in xrange(len(y)): data_set.appendLinked(X[i], np.array(y[i])) if util.VERBOSE: print 'Finished generating neural network data set.' print 'Starting to train neural network.' # Train the neural network with backpropagation on the data set. self.trainer = BackpropTrainer(self.nnw, dataset=data_set) for i in xrange(3): error = self.trainer.train() print 'Iteration: %d\tError: %f' % (i, error) if util.VERBOSE: print 'Finished training neural network.'
def create_int_dataset(n_input, n_output, codecs): ds = SupervisedDataSet(n_input, n_output) if n_input == 3 * 1: for i in range(0, len(codecs), 1): if i + 1 < len(codecs): ds.appendLinked( list(codecs[i]), list(codecs[i + 1]) ) elif n_input == 3 * 3: for i in range(0, len(codecs), 1): if i + 3 < len(codecs): ds.appendLinked( list(codecs[i] + codecs[i + 1] + codecs[i + 2]), list(codecs[i + 3]) ) elif n_input == 3 * 5: for i in range(0, len(codecs), 1): if i + 6 < len(codecs): ds.appendLinked( list(codecs[i] + codecs[i + 1] + codecs[i + 2] + codecs[i + 3] + codecs[i + 4]), list(codecs[i + 5]) ) elif n_input == 3 * 8: for i in range(0, len(codecs), 1): if i + 9 < len(codecs): ds.appendLinked( list(codecs[i] + codecs[i + 1] + codecs[i + 2] + codecs[i + 3] + codecs[i + 4] + codecs[i + 5] + codecs[i + 6] + codecs[i + 7]), list(codecs[i + 8]) ) else: print 'not implemented yet' return return ds
print sorted_word_dict[-9:] plt.plot(occurrences) plt.xlabel('word indices') plt.ylabel('occurrences') plt.ylim([0, 5000]) plt.show() ######## Build training set and save to file ############ print "Saving to file..." #PyBrain has some nice classes to do all this. from pybrain.datasets import SupervisedDataSet import numpy as np DS = SupervisedDataSet(dict_size, 1) for m_list, target in [[spamlist, 1], [hamlist, 0]]: for mail in m_list: #each data point is a list (or vector) the size of the dictionary wordvector = np.zeros(dict_size) #now go through the email and put the occurrences of each word #in it's respective spot (i.e. word_dict[word]) in the vector for word in mail: if word in word_dict: wordvector[word_dict[word]] += 1 DS.appendLinked(np.log(wordvector + 1), [target]) #put word occurrences on a log scale #TODO: use MySQL instead of csv DS.saveToFile('dataset.csv') print "Done."
# Pull out all indicators into a single pandas dataframe. Prefix all rows # with "LTC" ltc = d.ltc.combine("LTC") # take our ltc dataframe, and get targets (prices in next 10 minutes) # in the form of compound return prices (other options are "PRICES", # which are raw price movements) dataset, tgt = dtools.gen_ds(ltc, 1, ltc_opts, "CRT") # initialize a pybrain dataset DS = SupervisedDataSet(len(dataset.values[0]), np.size(tgt.values[0])) # fill it for i in xrange(len(dataset)): DS.appendLinked(dataset.values[i], [tgt.values[i]]) # split 70% for training, 30% for testing train_set, test_set = DS.splitWithProportion(0.7) # build our recurrent network with 10 hidden neurodes, one recurrent # connection, using tanh activation functions net = RecurrentNetwork() hidden_neurodes = 10 net.addInputModule(LinearLayer(len(train_set["input"][0]), name="in")) net.addModule(TanhLayer(hidden_neurodes, name="hidden1")) net.addOutputModule(LinearLayer(len(train_set["target"][0]), name="out")) net.addConnection(FullConnection(net["in"], net["hidden1"], name="c1")) net.addConnection(FullConnection(net["hidden1"], net["out"], name="c2")) net.addRecurrentConnection(FullConnection(net["out"], net["hidden1"], name="cout")) net.sortModules()
def main(): try: while True: choice = input( "------------------------------------------\n1:All new\n2:Load all\n3:Add new data\n4:predict\n5:Print Data\n6:Recreate components\n7:Save\n8:Normalize\n9:print deNormKey\n10:print normalized data\n11:Generate Predicted Dataset\n12:Print generated DataSet\n13:plot generated DataSet\n14:2D plot\n15:To csv\n16:Exit\n-----------------------------------------\n" ) if (choice == 1): global printDS printDS = SupervisedDataSet(2, 4) createNetwork() createDataSet() createTrainer() i = input("Enter the number of data lines you want to add: ") iterAppend(i) printDS = deepcopy(DS) normalize() print("printDS--------------------------------") print(printDS) print("DS-------------------------------------") print(DS) trainData() saveAll() if (choice == 2): loadAll() trainData() if (choice == 3): i = input("Enter the number of data lines you want to add: ") iterAppend(i) trainData() if (choice == 4): a = input("Enter the first input: ") b = input("Enter the second input: ") out = [] out = predict(a, b) #remeber to add code that can append the output data to the network if (choice == 5): printData() if (choice == 6): inChoice = input("1:New network\n2:New Dataset\n") if (inChoice == 1): createNetwork() trainData() if (inChoice == 2): createDataSet() i = input( "Enter the number of data lines you want to add: ") iterAppend(i) trainData() if (choice == 7): saveAll() if (choice == 8): normalize() if (choice == 9): printDeNormKey() if (choice == 10): printNormalizedData() if (choice == 11): global generatedDataSet generatedDataSet = SupervisedDataSet(2, 4) ll1 = input("Enter the lower limit of the first input") hl1 = input("Enter the higher limit of the first input") ll2 = input("Enter the lower limit of the second input") hl2 = input("Enter the higher limit of the second input") for i1 in np.arange(ll1, hl1, 0.02): for i2 in np.arange(ll2, hl2, 0.5): out = [] out = predict(i1, i2) generatedDataSet.appendLinked( [i1, i2], [out[0], out[1], out[2], out[3]]) if (choice == 12): print(generatedDataSet) if (choice == 13): plotData() if (choice == 14): plot2D() if (choice == 15): to_csv() if (choice == 16): break except Exception as e: print(str(e))
def graphNN(ticker, date, epochs, verbose): """ The function builds a data set of stock prices, normalizes that data set, builds a linked data set to train the neural network, generates a neural network, trains the network, makes predictions, analyzes the predictions against testing data to generate statistics for comparison, and uses the statistics to generate graphs as a png file. :param ticker: the stock sticker to train and predict on :param date: the date to split the data on to create training and testing :param epochs: the number of times to train the network :param verbose: boolean value for verbose output :return tomorrowPrice: the price prediction for tomorrow :return totalTime: the total time in seconds it took to train the network on the data set :return averageTimePerEpoch: the average time per training run :return averagePercentError:the average percent error of the predictions and the testing data :return minPercentError:the minimum percent error of the predictions and the testing data """ # request stock prices and split by the specified date to create training and testing data sets if verbose: print 'Requesting data...' data = getStockPrices(ticker, frequency="daily", update=True) trainData, testData = splitByDate(data, date) xTrain, yTrain = preprocessStocks(trainData) xTest, yTest = preprocessStocks(testData) # allocate space for predictions and error values fucturePredictions = [] trainingPredictions = [] percentError = [] if verbose: print 'complete.' if verbose: print 'Normalizing data...' # normalize the values to a percentage of their max values to increase network training speed xTrain, yTrain, xTest, yTest, priceScaleFactor, timeScaleFactor = normalize( xTrain, yTrain, xTest, yTest) if verbose: print 'complete.' if verbose: print 'Building dataset...' # build a linked data set to allow for training and error calculation ds = SupervisedDataSet(1, 1) for i in range(0, len(xTrain)): ds.appendLinked(xTrain[i], yTrain[i]) if verbose: print 'complete.' if verbose: print 'Buidling network...' rnn = buildNetwork(1, 3, 3, 3, 3, 3, 3, 3, 3, 1, bias=True, recurrent=True, hiddenclass=TanhLayer) if verbose: print 'complete' if verbose: print 'Training network...' trainer = BackpropTrainer(rnn, ds, learningrate=0.01) totalTime, averageTimePerEpoch, trainerErrorValues, epochTimes = trainNetwork( trainer, epochs, verbose) if verbose: print 'Training network 100.0% complete.' if verbose: print 'Predicting...' # prime the network for i in xTrain: rnn.activate(i) # make predictions with network on the training data to show general shape of approximated function for i in xTrain: trainingPredictions.append(rnn.activate(i)) # make predictions with the network on the testing data to validate the accuracy of the network for i in xTest: fucturePredictions.append(rnn.activate(i)) # predict tomorrow's price tomorrowPrice = rnn.activate(xTest[len(xTest) - 1] + 1) * priceScaleFactor if verbose: print 'complete.' if verbose: print 'Generating graphs...' # denormalize xTrain, yTrain, xTest, yTest, fucturePredictions, trainingPredictions = denormalize( xTrain, yTrain, xTest, yTest, fucturePredictions, trainingPredictions, priceScaleFactor, timeScaleFactor) # calculate percent error for i in range(0, len(yTest)): percentError.append((abs( (yTest[i] - fucturePredictions[i]) / yTest[i]) * 100)) # calculates statistics on the analysis of the network sumPercentError = sum(percentError) averagePercentError = sumPercentError / len(percentError) numDataPoints = len(xTrain) + len(xTest) minPercentError = min(percentError) # generate the graphs and save them to the working directory graphOutput(xTrain, yTrain, xTest, yTest, fucturePredictions, trainingPredictions, ticker) if verbose: print 'complete.' # returns return tomorrowPrice, numDataPoints, totalTime, averageTimePerEpoch, averagePercentError, minPercentError
def create_binary_dataset(n_input, n_output, codecs): ds = SupervisedDataSet(n_input, n_output) if n_input == 11: for i in range(0, len(codecs) - n_input % 10, 1): ds.appendLinked(ret_Characters(codecs[i]), ret_Characters(codecs[i + 1])) elif n_input == 22: for i in range(0, len(codecs) - n_input % 10, 1): ds.appendLinked( ret_Characters(codecs[i]) + ret_Characters(codecs[i + 1]), ret_Characters(codecs[i + 2])) elif n_input == 33: for i in range(0, len(codecs) - n_input % 10, 1): ds.appendLinked( ret_Characters(codecs[i]) + ret_Characters(codecs[i + 1]) + ret_Characters(codecs[i + 2]), ret_Characters(codecs[i + 3])) elif n_input == 44: for i in range(0, len(codecs) - n_input % 10, 1): ds.appendLinked( ret_Characters(codecs[i]) + ret_Characters(codecs[i + 1]) + ret_Characters(codecs[i + 2]) + ret_Characters(codecs[i + 3]), ret_Characters(codecs[i + 4])) elif n_input == 55: for i in range(0, len(codecs) - n_input % 10, 1): ds.appendLinked( ret_Characters(codecs[i]) + ret_Characters(codecs[i + 1]) + ret_Characters(codecs[i + 2]) + ret_Characters(codecs[i + 3]) + ret_Characters(codecs[i + 4]), ret_Characters(codecs[i + 5])) elif n_input == 66: for i in range(0, len(codecs) - n_input % 10, 1): ds.appendLinked( ret_Characters(codecs[i]) + ret_Characters(codecs[i + 1]) + ret_Characters(codecs[i + 2]) + ret_Characters(codecs[i + 3]) + ret_Characters(codecs[i + 4]) + ret_Characters(codecs[i + 5]), ret_Characters(codecs[i + 6])) elif n_input == 77: ds = SupervisedDataSet(n_input, n_output) for i in range(0, len(codecs) - n_input % 10, 1): ds.appendLinked( ret_Characters(codecs[i]) + ret_Characters(codecs[i + 1]) + ret_Characters(codecs[i + 2]) + ret_Characters(codecs[i + 3]) + ret_Characters(codecs[i + 4]) + ret_Characters(codecs[i + 5]) + ret_Characters(codecs[i + 6]), ret_Characters(codecs[i + 7])) elif n_input == 88: for i in range(0, len(codecs) - n_input % 10, 1): ds.appendLinked( ret_Characters(codecs[i]) + ret_Characters(codecs[i + 1]) + ret_Characters(codecs[i + 2]) + ret_Characters(codecs[i + 3]) + ret_Characters(codecs[i + 4]) + ret_Characters(codecs[i + 5]) + ret_Characters(codecs[i + 6]) + ret_Characters(codecs[i + 7]), ret_Characters(codecs[i + 8])) elif n_input == 99: for i in range(0, len(codecs) - n_input % 10, 1): ds.appendLinked( ret_Characters(codecs[i]) + ret_Characters(codecs[i + 1]) + ret_Characters(codecs[i + 2]) + ret_Characters(codecs[i + 3]) + ret_Characters(codecs[i + 4]) + ret_Characters(codecs[i + 5]) + ret_Characters(codecs[i + 6]) + ret_Characters(codecs[i + 7]) + ret_Characters(codecs[i + 8]), ret_Characters(codecs[i + 9])) return ds
window[7] = normalize(window[7], max_volume, min_volume) window[9] = normalize(window[9], max_volume, min_volume) output = n.activate(window) for j in range(0, 5): prediction = denormalize(output[j], max_price, min_price) print prediction, ticks_future[j][0] writer.writerow([ticks_future[j][2], prediction, ticks_future[j][0]]) last_five = [] for day in range(0, 100): ticks = map(lambda x: data.next(), range(0, 5)) last_five = map(lambda x: data.next(), range(0, 5)) DS.appendLinked(*ticks_to_inputs_outputs(ticks, last_five)) with open('predictions.csv', 'wb') as output_file: writer = csv.writer(output_file, delimiter=',', quotechar='\"', quoting=csv.QUOTE_MINIMAL) for i in range(0, 18): trainer.trainUntilConvergence(validationProportion=0.55, maxEpochs=1000, verbose=False) ticks = map(lambda x: data.next(), range(0, 5)) predict_next_five(last_five, ticks, writer) DS.appendLinked(*ticks_to_inputs_outputs(last_five, ticks)) last_five = ticks
class NeuralNetworkRegression(algorithmbase): def ExtraParams(self, hiddenlayerscount, hiddenlayernodescount): self.hiddenlayerscount = hiddenlayerscount self.hiddenlayernodescount = hiddenlayernodescount return self def PreProcessTrainData(self): self.traindata = preprocess_apply(self.traindata, self.missingvaluemethod, self.preprocessingmethods) def PrepareModel(self, savedmodel=None): if savedmodel != None: self.trainer = savedmodel else: attributescount = len(self.traindata[0]) self.ds = SupervisedDataSet(attributescount, 1) for i in range(len(self.traindata)): self.ds.appendLinked(self.traindata[i], self.trainlabel[i]) self.net = FeedForwardNetwork() inLayer = LinearLayer(len(self.traindata[0])) self.net.addInputModule(inLayer) hiddenLayers = [] for i in range(self.hiddenlayerscount): hiddenLayer = SigmoidLayer(self.hiddenlayernodescount) hiddenLayers.append(hiddenLayer) self.net.addModule(hiddenLayer) outLayer = LinearLayer(1) self.net.addOutputModule(outLayer) layers_connections = [] layers_connections.append(FullConnection(inLayer, hiddenLayers[0])) for i in range(self.hiddenlayerscount - 1): layers_connections.append( FullConnection(hiddenLayers[i - 1], hiddenLayers[i])) layers_connections.append( FullConnection(hiddenLayers[-1], outLayer)) for layers_connection in layers_connections: self.net.addConnection(layers_connection) self.net.sortModules() #training the self.network self.trainer = BackpropTrainer(self.net, self.ds) self.trainer.train() def PreProcessTestDate(self): self.testdata = preprocess_apply(self.testdata, self.missingvaluemethod, self.preprocessingmethods) def Predict(self): prediction = [] for testrecord in self.testdata: prediction.append(self.net.activate(testrecord)[0]) self.result = [self.testlabel, prediction] def GetModel(self): return self.trainer
def train_network(trainer, dataset=None, k_fold=1, bold_driver=False, maxEpochs=1000): prev_err = 1000 out_train = open("./errors/train_MSE.txt", "w") out_test = open("./errors/test_MSE.txt", "w") out_valid = open("./errors/valid_MSE.txt", "w") ptrain = open("./errors/train_progression.txt", "w") ptest = open("./errors/test_progression.txt", "w") # numero di discese lungo il gradiente ad ogni sessione di train test_cont = 0 train_progression = [] test_progression = [] assert isinstance(trainer, myBackpropTrainer) net = trainer.module if dataset is None: dataset = trainer.ds assert isinstance(dataset, SupervisedDataSet) ds_dim = dataset.getLength() n = dataset.getLength() / k_fold base = 0 for i in range(0, ds_dim - (ds_dim % k_fold), n): # crea un dataset vuoto per calcolare l'errore di validazione ds_test = SupervisedDataSet(net.indim, net.outdim) ds_train = SupervisedDataSet(net.indim, net.outdim) print 'train ', (base / n) + 1, ' on ', ((ds_dim - (ds_dim % k_fold)) / n) # costruzione dei datasets di train e test per la cross validation for b in range(ds_dim - ds_dim % k_fold): if base <= b < (base + n): ds_test.appendLinked(*dataset.getLinked(b)) else: ds_train.appendLinked(*dataset.getLinked(b)) base += n # ds_test = dataset # ds_train = dataset tmp_train, tmp_test = trainer.trainUntilConvergence( datasetTrain=ds_train, datasetTest=ds_test, verbose=False, maxEpochs=maxEpochs, continueEpochs=maxEpochs / 2) # tmp_train, tmp_test = trainer.trainUntilConvergence(maxEpochs=1000, verbose=True, # continueEpochs=1000, validationProportion=0.30, # trainingData=ds_train, validationData=ds_test, # convergence_threshold=10) train_progression += tmp_train test_progression += tmp_test # implementa il bold driver, aggiusta il learning rate in base all'evoluzione del train error if bold_driver: if (sum(tmp_train) / len(tmp_train)) < prev_err: prev_err = (sum(tmp_train) / len(tmp_train)) trainer.descent.alpha += trainer.descent.alpha * 0.01 # alpha = learning rate else: prev_err = (sum(tmp_train) / len(tmp_train)) trainer.descent.alpha -= trainer.descent.alpha * 0.01 trainer.descent.alpha -= trainer.descent.alpha * 0.5 # alpha = learning rate print trainer.descent.alpha print sum(tmp_train) / len(tmp_train) # testOnData e Validator calcolano lo stesso errore (MSE) in due modi differenti # implementato per vedere se le due funzioni si comportano in maniera coerente if net.indim % 11 == 0: val = evaluate_binary_error(net, ds_test, verbose=False) else: val = evaluate_int_error(net, ds_test, verbose=True) # scrive gli errori su file per permettere che siano pollati in seguito out_train.write(str(sum(tmp_train) / len(tmp_train)) + '\n') out_valid.write(str(val) + '\n') out_test.write(str(sum(tmp_test) / len(tmp_test)) + '\n') for i in range(len(train_progression)): ptrain.write(str(train_progression[i]) + '\n') for i in range(len(test_progression)): ptest.write(str(test_progression[i]) + '\n') out_train.close() out_test.close() out_valid.close() ptest.close() ptrain.close()
fo01 = open('out', 'w') fo02 = open('dist', 'w') fonet = open('net', 'w') #load mat from matlab mat = sio.loadmat('Features.mat') #print(mat) X = mat['X'] y = mat['y'] length = X.shape[0] #set data alldata = SupervisedDataSet(14, 7) for n in arange(0, length): alldata.appendLinked(X[n], y[n]) #split data into test data and training data tstdata, trndata = alldata.splitWithProportion(0.25) #build network fnn = buildNetwork(trndata.indim, 100, trndata.outdim, outclass=SigmoidLayer, fast=True) #print fnn #build trainer trainer = BackpropTrainer(fnn, dataset=trndata,
temp_inputs.append(float(item)) counter = counter + 1 else: prediction_inputs.append(temp_inputs) prediction_outputs.append(float(item)) temp_inputs = [] counter = 0 # for x in outputs: # x = x + 30 # DS = ClassificationDataSet(20, 1, nb_classes=60) DS = SupervisedDataSet(10,1) for x, y in zip(inputs, outputs): DS.appendLinked(x,y) # DS.addSample(x,y) # DS._convertToOneOfMany(bounds=[0,1]) error2 = 0.0 local_min_error = 100000 for x in range(0,35): if count == 16 and local_min_error > 5.4: break error2 = error fnn = buildNetwork( DS.indim, 30, DS.outdim, hiddenclass = SigmoidLayer, outclass=SoftmaxLayer ) target = [16,14,9,16,-7,-2,16,-1,-6,8,7,10,-6,-2,12,2,3]
from pybrain.datasets import SupervisedDataSet DS = SupervisedDataSet(16, 26) DS.appendLinked([1, 2, 3], [4, 5]) len(DS) 1
# xTest = xTrain # yTest = yTrain xTrain, yTrain, xTest, yTest, priceScaleFactor, timeScaleFactor = normalize( xTrain, yTrain, xTest, yTest) print xTrain print yTrain print xTest print yTest print priceScaleFactor print timeScaleFactor # build data set from x training data and y training data ds = SupervisedDataSet(1, 1) for i in range(0, len(xTrain)): ds.appendLinked(xTrain[i], yTrain[i]) # number of runs runs = 5 rnn = buildNetwork(1, 3, 3, 3, 3, 3, 3, 3, 3, 1, bias=True,
net = buildNetwork(16, 150, 26) trainingset = open("fulltrainingset.txt", "r") traininglines = trainingset.read().splitlines() DS = SupervisedDataSet( 16, 26 ) for line in traininglines: line = line.replace("[", "") line = line.replace("]", "") splitline = line.split("!") entries = splitline[0].split(",") desired = splitline[1].split(",") entries = list(map(int, entries)) desired = list(map(int, desired)) DS.appendLinked( entries, desired ) trainer = BackpropTrainer(net, DS, 0.01, momentum=0.02) for i in range(1, 40): print(trainer.train()) testingset = open("fulltestingset.txt", "r") testinglines = testingset.read().splitlines() counter = 0 correctanswers = 0 for line in testinglines: line = line.replace("[", "") line = line.replace("]", "") values = line.split("!") entries = values[0].split(",") values = values[1].split(",") entries = list(map(int, entries))
from deap import base, creator, tools import random, time creator.create("FitnessMax", base.Fitness, weights=(1.0, )) creator.create("Individual", list, fitness=creator.FitnessMax) toolbox = base.Toolbox() # Attribute generator toolbox.register("attr_float", random.uniform, -2, 2) # Structure initializers toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_float, 4) toolbox.register("population", tools.initRepeat, list, toolbox.individual) DS = SupervisedDataSet(1, 1) DS.appendLinked([1], [-1]) DS.appendLinked([-0.83], [0.83]) DS.appendLinked([-0.3], [0.3]) DS.appendLinked([0.3], [-0.3]) DS.appendLinked([0.028], [-0.028]) DS.appendLinked([-1], [1]) def calcError(x, y): return abs(x - y) / x def testNetwork(eVar): global net # So that we can look at the network after we're done training net = buildNetwork(1, 1, 1, bias=True, outputbias=True)
from pybrain.supervised.trainers import BackpropTrainer net = buildNetwork(16, 30, 26) trainingset = open("sampple-trainingset", "r") traininglines = trainingset.read().splitlines() DS = SupervisedDataSet(16, 26) for line in traininglines: line = line.replace("[", "") line = line.replace("]", "") values = line.split("!") entries = values[0].split(",") values = values[1].split(",") entries = list(map(int, entries)) values = list(map(int, values)) DS.appendLinked(entries, values) trainer = BackpropTrainer(net, DS) trainer.trainUntilConvergence() #testingset = open("testingset.txt", "r") #testinglines = testingset.read().splitlines() #testing_data = [] #for line in testinglines: # line = line.replace("[", "") # line = line.replace("]", "") # values = line.split("!") # entries = values[0].split(",") # values = values[1].split(",") # entries = list(map(int, entries)) # values = list(map(int, values)) # testing_tuple = (entries, values) # testing_data.append(testing_tuple)
DS = SupervisedDataSet(nFeatures, nOutput) count = 0 for sample in data: # # Discard the label row # count = count +1 if sample[0] == 'gas [m3]': continue label, x = sample[0], sample[1:] # # Insert the sample into the Dataset # DS.appendLinked(x, label) # # Divide the dataset in training set and test set # #tstdata, DS = DS.splitWithProportion( 0.25 ) trainData, tstdata = splitWithProportion(DS, 0.75 ) print "Number of Dataset patterns: ", len(DS) print "Number of training patterns: ", len(trainData) print "Number of test patterns: ", len(tstdata) print "Input and output dimensions: ", trainData.indim, trainData.outdim print "number of units in hidden layer: ", nNeurons # # Build network with #
# Pull out all indicators into a single pandas dataframe. Prefix all rows # with "LTC" ltc = d.ltc.combine("LTC") # take our ltc dataframe, and get targets (prices in next 10 minutes) # in the form of compound return prices (other options are "PRICES", # which are raw price movements) dataset, tgt = dtools.gen_ds(ltc, 1, ltc_opts, "CRT") # initialize a pybrain dataset DS = SupervisedDataSet(len(dataset.values[0]), np.size(tgt.values[0])) # fill it for i in xrange(len(dataset)): DS.appendLinked(dataset.values[i], [tgt.values[i]]) # split 70% for training, 30% for testing train_set, test_set = DS.splitWithProportion(.7) # build our recurrent network with 10 hidden neurodes, one recurrent # connection, using tanh activation functions net = RecurrentNetwork() hidden_neurodes = 10 net.addInputModule(LinearLayer(len(train_set["input"][0]), name="in")) net.addModule(TanhLayer(hidden_neurodes, name="hidden1")) net.addOutputModule(LinearLayer(len(train_set["target"][0]), name="out")) net.addConnection(FullConnection(net["in"], net["hidden1"], name="c1")) net.addConnection(FullConnection(net["hidden1"], net["out"], name="c2")) net.addRecurrentConnection( FullConnection(net["out"], net["hidden1"], name="cout"))
#odom_dict = csv_to_dict('out/odom.csv') #print odom_dict # #r1_rssi_dict = csv_to_dict('out/r1_rssi.csv') #print r1_rssi_dict # #r1_odom_rssi_dict = csv_to_dict('out/r1_odom_rssi.csv') #print r1_odom_rssi_dict time,odom,rssi = csv_to_arrays('out/r1_time_odom_rssi.csv') trndata = SupervisedDataSet( 4, 1 ) trndata.appendLinked( [1,2,3,4], [5] ) print len(trndata) print trndata['input'] #trndata.addSample((-15,-85,-25,-75), (0)) #trndata.addSample((-70,-70,-35,-35), (5)) #trndata.addSample((-85,-15,-75,-25), (50)) for i in range(len(odom)): trndata.addSample(rssi[i], odom[i]) print "Number of training patterns: ", len(trndata) print "Input and output dimensions: ", trndata.indim, trndata.outdim print "First sample (input, target, class):"
VALID = 100 TEST = 100 INPUT_SIZE = 30 train, valid, test = data[:TRAIN], data[TRAIN - INPUT_SIZE:TRAIN + VALID], data[TRAIN - INPUT_SIZE + VALID:TRAIN + VALID + TEST] trainds = SupervisedDataSet(INPUT_SIZE, 1) testds = SupervisedDataSet(INPUT_SIZE, 1) validds = SupervisedDataSet(INPUT_SIZE, 1) for i in range(INPUT_SIZE, train.shape[0]): trainds.appendLinked(train[i - INPUT_SIZE:i], train[i]) for i in range(INPUT_SIZE, test.shape[0]): testds.appendLinked(test[i - INPUT_SIZE:i], test[i]) for i in range(INPUT_SIZE, valid.shape[0]): validds.appendLinked(valid[i - INPUT_SIZE:i], valid[i]) THREADS = 4 hidden_range = [4, 32] eta_range = [0.0001, 10.0] activation_func = [SigmoidLayer, TanhLayer] lamda_range = [1e-7, 1e-5] epochs_factor = 1 besthparams = []
class NET(): def __init__(self, arg): self.inputsize = arg[0] self.outputsize = arg[-1] self.hiden = arg[1:-1] self.err = 1 self.old_err = 1 b = [] b.append(self.inputsize) b += self.hiden b.append(self.outputsize) #print b#"%s, %s, %s, hiddenclass=TanhLayer"%(self.inputsize, self.hiden, self.outputsize) self.net = FeedForwardNetwork() self.inputlayer = LinearLayer(self.inputsize, "Input") self.net.addInputModule(self.inputlayer) self.outputlayer = LinearLayer(self.outputsize, "Output") self.net.addOutputModule(self.outputlayer) self.hidenlayers = [] for i in xrange(len(self.hiden)): self.hidenlayers.append(SigmoidLayer(self.hiden[i], "hiden%s" % i)) self.net.addModule(self.hidenlayers[-1]) self.net.addConnection( FullConnection(self.inputlayer, self.outputlayer)) for i in xrange(len(self.hidenlayers)): self.net.addConnection( FullConnection(self.inputlayer, self.hidenlayers[i])) self.net.addConnection( FullConnection(self.hidenlayers[i], self.outputlayer)) for i in xrange(len(self.hidenlayers)): for j in xrange(i + 1, len(self.hidenlayers)): self.net.addConnection( FullConnection(self.hidenlayers[i], self.hidenlayers[j])) #self.print_conections(self.net) self.net.sortModules() self.ds = SupervisedDataSet(self.inputsize, self.outputsize) def Update(self, hiden, h): self.net = FeedForwardNetwork() self.inputlayer = LinearLayer(self.inputsize, "Input") self.net.addInputModule(self.inputlayer) self.outputlayer = LinearLayer(self.outputsize, "Output") self.net.addOutputModule(self.outputlayer) self.hidenlayers = [] for i in xrange(len(hiden)): self.hidenlayers.append(SigmoidLayer(hiden[i], "hiden%s" % i)) self.net.addModule(self.hidenlayers[-1]) self.net.addConnection( FullConnection(self.inputlayer, self.outputlayer)) for i in xrange(len(self.hidenlayers)): self.net.addConnection( FullConnection(self.inputlayer, self.hidenlayers[i])) self.net.addConnection( FullConnection(self.hidenlayers[i], self.outputlayer)) for i in xrange(len(self.hidenlayers)): for j in xrange(i + 1, len(self.hidenlayers)): if i < h: self.net.addConnection( FullConnection(self.hidenlayers[i], self.hidenlayers[j])) elif i == h: self.net.addConnection( FullConnection(self.hidenlayers[i], self.hidenlayers[j], inSliceTo=hiden[i] - 1)) else: self.net.addConnection( FullConnection(self.hidenlayers[i], self.hidenlayers[j])) #self.print_conections(self.net) self.net.sortModules() self.hiden = hiden def print_conections(self, n): print("BEGIN") for mod in n.modules: print(mod) for conn in n.connections[mod]: print(conn) for cc in range(len(conn.params)): print(conn.whichBuffers(cc), conn.params[cc]) print("END") def AddData(self, datainput, dataoutput, learningrate): if len(dataoutput) != len(datainput): print("Not equals data", len(dataoutput), len(datainput)) return 1 self.ds = SupervisedDataSet(self.inputsize, self.outputsize) for i in xrange(len(dataoutput)): self.ds.appendLinked(datainput[i], dataoutput[i]) self.trainer = BackpropTrainer(self.net, dataset=self.ds, learningrate=learningrate) return 0 def TrainNet(self, epoch, error): if epoch <= 5: epoch = 5 i = 0 count = 0 while i < epoch: if error == self.err: break self.err = self.trainer.train() if self.err == self.old_err: count += 1 else: count = 0 if count == 3: self.err = self.old_err return (self.err, 1) self.old_err = self.err i += 1 #self.SaveNet('%s %s_%s_%s.work'%(self.err, self.inputsize, self.hiden, self.outputsize)) return [self.err, 0] def TrainNetOnce(self): self.err = self.trainer.train() return self.err def SaveNet(self, filename=None): if filename == None: NetworkWriter.writeToFile( self.net, '%s %s_%s_%s.xml' % (self.err, self.inputsize, self.hiden, self.outputsize)) else: NetworkWriter.writeToFile(self.net, filename) def LoadNet(self, fname): self.net = NetworkReader.readFrom(fname) tree = ET.parse(fname) x = tree.getroot() l = [] for modules in x.findall('Network/Modules/SigmoidLayer/dim'): l.append(int(modules.get("val"))) self.hiden = l[:] self.inputsize = self.net.indim self.outputsize = self.net.outdim def TestNet(self, inp): if len(inp) != self.inputsize: return 0 return self.net.activate(inp[:]) def UpdateWeights(self, f1, f2=None): n = NetworkReader.readFrom(f1) if f2 != None: n2 = NetworkReader.readFrom(f2) def DictParams(n): l1 = [] for mod in n.modules: l = [] for conn in n.connections[mod]: if conn.paramdim > 0: l.append([conn.outmod.name, conn.params]) d = dict(l) l1.append([mod.name, d]) d1 = dict(l1) return d1 d1 = DictParams(n) if f2 != None: d2 = DictParams(n2) d3 = DictParams(self.net) params = np.array([]) if f2 != None: for i in d2: for j in d2[i]: try: b = d3[i][j][:] b[:d2[i][j].size] = d2[i][j][:] d3[i].update({j: b}) except: pass for i in d1: for j in d1[i]: try: b = d3[i][j][:] b[:d1[i][j].size] = d1[i][j][:] d3[i].update({j: b}) except: pass for i in d3["Input"]: params = np.hstack((params, d3["Input"][i])) for i in xrange(len(self.hiden)): for j in d3["hiden%s" % i]: params = np.hstack((params, d3["hiden%s" % i][j])) self.net._setParameters(params)
from pybrain.structure import RecurrentNetwork, LinearLayer, FullConnection, SigmoidLayer import time top = 1000 features = [] for i in range(0, top): features.append(i) labels = [] for i in range(0, top): labels.append((i / 2.0)) ds = SupervisedDataSet(1, 1) for i in range(0, top): ds.appendLinked(features[i], labels[i]) #TrainDS, TestDS = ds.splitWithProportion(0.8) # for input, target in ds: # print input, target # net = buildNetwork(1, 3, 1, bias=True, hiddenclass=TanhLayer) # rnn = RecurrentNetwork() rnn = buildNetwork(1, 25, 1, bias=True, recurrent=False, hiddenclass=TanhLayer) # rnn.addInputModule(LinearLayer(1, 'in')); # rnn.addModule(SigmoidLayer(25,'hidden')); # rnn.addOutputModule(LinearLayer(1,'out')); # rnn.addConnection(FullConnection(rnn['in'],rnn['hidden'],'feed')); # rnn.addConnection(FullConnection(rnn['hidden'],rnn['out'],'give'));
# Initialize training data form numInput = 2 # Number of input features ds = SupervisedDataSet(numInput, 1) # Load training data from text file (comma separated) trainingDataFile = 'regrData.txt' tf = open(trainingDataFile, 'r') for line in tf.readlines(): # Split the values on the current line, and convert to float tfData = [float(x) for x in line.strip().split(',') if x != ''] inData = tuple(tfData[:numInput]) # Grab first numInput values outData = tuple(tfData[numInput:]) # Grab the rest # Add the data to the datasets ds.appendLinked(inData, outData) # ------- # Build a feed forward neural network (that can have large output) # ------- from pybrain.structure import SigmoidLayer, LinearLayer from pybrain.tools.shortcuts import buildNetwork numHidden = 100 net = buildNetwork( ds.indim, # Number of input units numHidden, # Number of hidden units ds.outdim, # Number of output units bias=True, hiddenclass=SigmoidLayer, outclass=LinearLayer # Allows for a large output )
def build_dataset(self): if os.path.isfile(self.dataset_file): with open(self.dataset_file, "rb") as f: dataset = cPickle.load(f) else: dataset = SupervisedDataSet(len(features), 1) if os.path.isfile(self.done_articles_file): with open(self.done_articles_file, "rb") as f: done_articles = cPickle.load(f) else: done_articles = {} value = -1 decision = "y" for file_name in os.listdir(self.articles_dir): print "\n\n" print "---" * 10 decision = raw_input("Do another article? [y/n] ") if decision[0].lower() != "y": break with open("articles/" + file_name) as article: text = "" first = True for line in article.readlines()[1:]: text += line sentences = tokenize(text, "sentence", return_spans=False) article_position = done_articles.get(file_name, 0) if article_position >= len(sentences): continue print "Looking at:", file_name, "from position", article_position for sentence in sentences[article_position:]: extractor = FeatureExtractor(sentence) vectors = extractor.get_feature_vectors( features, "sentence")[0] print sentence value = -1 while value == -1: rating = raw_input("nothing=OK, space=bad, q=quit: ") if rating == "": value = [0] elif rating[:1].lower() == "q": value = None elif rating[:1] == " ": value = [1] # quit on q if value == None: break dataset.appendLinked(vectors, value) done_articles[file_name] = done_articles.get(file_name, 0) + 1 with open(self.dataset_file, "wb") as f: cPickle.dump(dataset, f) with open(self.done_articles_file, "wb") as f: cPickle.dump(done_articles, f)
n = FeedForwardNetwork() n.addInputModule(LinearLayer(1, name='in')) n.addInputModule(BiasUnit(name='bias')) n.addModule(TanhLayer(3, name='gotan')) n.addOutputModule(LinearLayer(1, name='out')) n.addConnection(FullConnection(n['bias'], n['gotan'])) n.addConnection(FullConnection(n['in'], n['gotan'])) n.addConnection(FullConnection(n['gotan'], n['out'])) n.sortModules() # initialize the backprop trainer and train t = BackpropTrainer(n, learningrate=0.1, momentum=0.0, verbose=True) #DATASET DS = SupervisedDataSet(1, 1) X = random.rand(100, 1) * 100 Y = X**3 + random.rand(100, 1) * 5 maxy = float(max(Y)) maxx = 100.0 for r in range(X.shape[0]): DS.appendLinked((X[r] / maxx), (Y[r] / maxy)) t.trainOnDataset(DS, 200) plt.plot(X, Y, '.b') X = [[i] for i in arange(0, 100, 0.1)] Y = list(map(lambda x: n.activate(array(x) / maxx) * maxy, X)) plt.plot(X, Y, '-g') #