class NeuralAgent(QLearningAgent): def setup_approximation(self, n): self.neural_net = buildNetwork(n, 6, 1, bias=True, hiddenclass=LinearLayer, outclass=LinearLayer) self.trainer = BackpropTrainer(self.neural_net) def value_of_state(self, state_vector): return self.neural_net.activate(state_vector)[0] def learn_q(self, state_vector, action, old, new): ds = SupervisedDataSet(len(state_vector), 1) ds.addSample(state_vector, new) self.trainer.setData(ds) self.trainer.trainEpochs(10) print new - self.value_of_state(state_vector) def save_weights(self, filename): with open(filename, "w") as savefile: pickle.dump(self.neural_net, savefile) def load_weights(self, filename): with open(filename, "r") as readfile: self.neural_net = pickle.load(readfile)
def main(): for stock in STOCK_TICKS: # Download Data get_data(stock) # Import Data days = extract_data(stock) today = days.pop(0) # Make DataSet data_set = ClassificationDataSet(INPUT_NUM, 1, nb_classes=2) for day in days: target = 0 if day.change > 0: target = 1 data_set.addSample(day.return_metrics(), [target]) # Make Network network = buildNetwork(INPUT_NUM, MIDDLE_NUM, MIDDLE_NUM, OUTPUT_NUM) # Train Network trainer = BackpropTrainer(network) trainer.setData(data_set) trainer.trainUntilConvergence(maxEpochs=EPOCHS_MAX) # Activate Network prediction = network.activate(today.return_metrics()) print prediction
class PHC_NN(PHC_FA): '''PHC with neural function approximation. ''' delta=0.1 maxNumberofAverage=30 weightdecay=0.001 trainingEpochPerUpdateWight=2 def __init__(self, num_features, num_actions, indexOfAgent=None): PHC_FA.__init__(self, num_features, num_actions, indexOfAgent) self.linQ = buildNetwork(num_features + num_actions, (num_features + num_actions), 1, hiddenclass = SigmoidLayer, outclass = LinearLayer) self.linPolicy = buildNetwork(num_features, (num_features + num_actions), num_actions, hiddenclass = SigmoidLayer,outclass = SigmoidLayer) self.trainer4LinQ=BackpropTrainer(self.linQ,weightdecay=self.weightdecay) self.trainer4LinPolicy=BackpropTrainer(self.linPolicy,weightdecay=self.weightdecay) def _pi(self, state): """Given state, compute probabilities for each action.""" values = np.array(self.linPolicy.activate(r_[state])) z=np.sum(values) return (values/z).flatten() def _qValues(self, state): """ Return vector of q-values for all actions, given the state(-features). """ values = np.array([self.linQ.activate(r_[state, one_to_n(i, self.num_actions)]) for i in range(self.num_actions)]) return values.flatten() def _updateWeights(self, state, action, reward, next_state): """ state and next_state are vectors, action is an integer. """ #update Q-value function approximator target=reward + self.rewardDiscount * max(self._qValues(next_state)) inp=r_[asarray(state), one_to_n(action, self.num_actions)] self.trainer4LinQ=BackpropTrainer(self.linQ,weightdecay=self.weightdecay) ds = SupervisedDataSet(self.num_features+self.num_actions,1) ds.addSample(inp, target) self.trainer4LinQ.trainOnDataset(ds) #Update policy bestAction=r_argmax(self._qValues(state)) target= one_to_n(bestAction, self.num_actions) inp=r_[asarray(state)] ds = SupervisedDataSet(self.num_features,self.num_actions) ds.addSample(inp, target) self.trainer4LinPolicy=BackpropTrainer(self.linPolicy, learningrate=self.delta, weightdecay=self.weightdecay) self.trainer4LinPolicy.setData(ds) self.trainer4LinPolicy.trainEpochs(epochs=self.trainingEpochPerUpdateWight)
def Train(self): self.load_dataset() for i in range(50): for exception in EXCEPTIONS: self.ds.addSample(exception, (0)) for line in self.data: self.ds.addSample(line, (1)) print("Training data are loaded to the network.") trainer = BackpropTrainer(self.net) trainer.setData(self.ds) print("Training... please stand by") trainer.trainEpochs(self.count_trainIterations) print("Neural network is trained!") if (self.toSave): self.Save()
print "errorrr!!!" print len(fv1) print len(fv2) continue fv_diff = numpy.atleast_2d(numpy.asarray(fv1) - numpy.asarray(fv2)) #print fv_diff.shape #print len(fv_diff[0].tolist()) print count #cPickle.dump([0,1],open('lol','wb')) #cPickle.load(open('lol','rb')) ##### # pybrain training ###### dataSet = SupervisedDataSet(67584, 2) dataSet.addSample(fv_diff[0].tolist(),y) trainer.setData(dataSet) print "training" trainer.train() print "finished train" res=net.activate(fv_diff[0].tolist()) if((res[0]>res[1]) & (y[0]>y[1])): tp=tp+1 elif((res[0]<=res[1]) & (y[0]>y[1])): fp=fp+1 elif((res[0]>res[1]) & (y[0]<y[1])): fn=fn+1 ######## # fann ######## if(count%interval == 0): #ann.train(fv_diff[0].tolist(),y)
t_ds_list.append((events_list, truth_val_list)) t_ds_list.append((events_list, truth_val_list)) t_ds_list.append((events_list, truth_val_list)) t_ds_list.append((events_list, truth_val_list)) t_ds_list.append((events_list, truth_val_list)) t_ds_list.append((events_list, truth_val_list)) t_ds_list.append((events_list, truth_val_list)) t_ds_list.append((events_list, truth_val_list)) t_ds = SupervisedDataSet(events_len, 1) random.shuffle(t_ds_list) for data in t_ds_list: t_ds.addSample(data[0], data[1]) trainer = BackpropTrainer(rnn_net, **parameters) trainer.setData(t_ds) trainer.train() del t_ds # release memory # predict rnn_net.reset() frslt = open('../test/rnn_result8.csv', 'w') fts = open('../test/rnn_test.csv', 'r') for tsline in fts: splited = tsline.split(",") enroll_id_str = str(int(splited[0])) rvsd = splited[1:] rvsd.reverse()
def _train_CV(self,perms, n_folds = 5, num_neuron = 50,learning_rate_input=0.01, decay=0.01,maxEpochs_input=200,verbose_input=True): '''call the class in model validators''' '''and do cross validation''' '''pass values''' perf_tst = 0 perf_trn = 0 # Set up network # NetworkWriter.writeToFile(net, 'temp_net_this.xml') # Set up trainer # Not the dataset have not been setted here # Need to be dealt with inside the loop of CV for i in range(n_folds): # shuffle out the index of training data and the test data train_perms_idxs = list(range(n_folds)) train_perms_idxs.pop(i) temp_list = [] for train_perms_idx in train_perms_idxs: temp_list.append(perms[ train_perms_idx ]) # These are the index for training data train_idxs = np.concatenate(temp_list) # this is the index for test set: test_idxs = perms[i] print "Training on part: ", i, '.......' trn_ds_ann, tst_ds_ann = self._set_dataset(train_idxs, test_idxs) # Initialize network and training object # net_this = NetworkReader.readFrom('temp_net_this.xml') net_this = buildNetwork(self.indim, num_neuron, self.outdim,bias=True, hiddenclass = SigmoidLayer) trainer_this = BackpropTrainer(net_this,learningrate = learning_rate_input, weightdecay=decay, verbose=verbose_input) # set up training data for the trainer trainer_this.setData(trn_ds_ann) #train asked times: trainer_this.trainEpochs(maxEpochs_input) for iter in range(self.max_Epoches): # trainer_this.trainUntilConvergence(verbose=False,maxEpochs=maxEpochs_input) trainer_this.trainEpochs(1) trn_error = self._net_performance(net_this, trn_ds_ann) tst_error = self._net_performance(net_this, tst_ds_ann) if verbose_input == True: print "Training", iter+1,"times" print "the trn error is: ", trn_error print "the test error is: ",tst_error perf_this_tst = self._net_performance(net_this, tst_ds_ann) perf_this_trn = self._net_performance(net_this, trn_ds_ann) print 'The Performance of this time on Test is: ', perf_this_tst print 'The Performance of this time on Training is: ', perf_this_trn print 'Number of Neuron: ', num_neuron print '###' perf_tst = perf_tst + perf_this_tst perf_trn = perf_trn + perf_this_trn perf_tst /= n_folds perf_trn /= n_folds print perf_tst, perf_trn return perf_tst, perf_trn
def predict_ball(hidden_nodes, is_elman=True, training_data=5000, epoch=-1, parameters={}, predict_count=128): # build rnn n = construct_network(hidden_nodes, is_elman) # make training data ep = 1 if epoch < 0 else epoch initial_v = ball_data.gen_velocity(BOX_SIZE) data_set = ball_data.bounce_ball((training_data + 1) * ep, BOX_SIZE, None, initial_v=initial_v) total_avg = np.average(data_set, axis=0) total_std = np.std(data_set, axis=0) # initial_p = data_set[np.random.choice(range(training_data))][:2] training_ds = [] normalized_d = __normalize(data_set) for e_index in range(ep): t_ds = SupervisedDataSet(4, 4) e_begin = e_index * training_data for j in range(e_begin, e_begin + training_data): # from current, predict next p_in = normalized_d[j].tolist() p_out = normalized_d[j + 1].tolist() t_ds.addSample(p_in, p_out) training_ds.append(t_ds) del data_set # release memory # training network err1 = 0 if epoch < 0: trainer = BackpropTrainer(n, training_ds[0], **parameters) err1 = trainer.train() else: trainer = BackpropTrainer(n, **parameters) epoch_errs = [] for ds in training_ds: trainer.setData(ds) epoch_errs.append(trainer.train()) err1 = max(epoch_errs) del training_ds # release memory # predict initial_p = ball_data.gen_position(BOX_SIZE) predict = None next_pv = np.hstack((initial_p, initial_v)) n.reset() for i in range(predict_count): predict = next_pv if predict is None else np.vstack((predict, next_pv)) p_normalized = (next_pv - total_avg) / total_std next_pv = n.activate(p_normalized.tolist()) restored = np.array(next_pv) * total_std + total_avg next_pv = restored real = ball_data.bounce_ball(predict_count, BOX_SIZE, initial_p, initial_v) err_matrix = (predict - real) ** 2 err_distance = np.sqrt(np.sum(err_matrix[:, 0:2], axis=1)).reshape((predict_count, 1)) err_velocity = np.sum(np.sqrt(err_matrix[:, 2:4]), axis=1).reshape((predict_count, 1)) err2 = np.hstack((err_distance, err_velocity)) return predict, real, err1, err2
outmatrix = outputUnits(entry); lpos = 0; ds = SupervisedDataSet(NUMINPUTS, NUMOUTPUTS); for letterContexts in wordstream(input_entries = (entry,)): #print("letterContexts", letterContexts); for inarray in convertToBinary(letterContexts): outarray = outmatrix[lpos]; #print("inarray",inarray); #print("outarray",outarray); #print("inlen %d outlen %d" % (len(inarray), len(outarray))); ds.addSample(inarray, outarray); observed = net.activate(inarray); phoneme = entry.phonemes[lpos]; observedPhoneme = closestByDotProduct(observed[:MINSTRESS], articFeatures); phonemeErrors.append(bool(phoneme != observedPhoneme)); stress = entry.stress[lpos]; observedStress = closestByDotProduct(observed[MINSTRESS:], stressFeatures); stressErrors.append(bool(stress != observedStress)); lpos += 1 trainer.setData(ds); #pdb.set_trace(); err = trainer.train(); #print(err, " ", entry); print("accuracy: phonemes %.3f stresses %.3f" % (1 - np.mean(phonemeErrors), 1 - np.mean(stressErrors)) ); #accuracy is a vector with one element in {0,1} for each letter i #that we have #trained so far. #make that two vectors, one for phoneme and one for stress.
class Predictor(): def __init__(self, inSize, outSize, LearningRate): self.learning_rate = LearningRate self.ds = SupervisedDataSet(inSize, outSize) self.net = buildNetwork(inSize, 10, outSize, hiddenclass=TanhLayer, bias=True) self.trainer = BackpropTrainer(self.net, self.ds, learningrate=self.learning_rate, verbose = False, weightdecay=WEIGHT_DECAY) self.prediction = [0] * outSize self.mse = 100 self.age=0 #Specific to Mai's code. Make input and output masks. self.inputMask = [1 for i in range(inSize)] # self.outputMask = [random.randint(0, 1) for i in range(outSize)] self.outputMask = [0]*outSize r = random.randint(0,outSize-1) self.outputMask[r] = 1 self.error = 0 self.errorHistory = [] self.dErrorHistory = [] self.slidingError = 0 self.dError = 0 self.fitness = 0 self.problem=r self.previousData=[] def getPrediction(self, input): out = self.net.activate(input) return out def trainPredictor(self): self.age+=1 new_ds=deepcopy(self.ds) if FLAGS.sliding_training: if len(self.previousData)!=0: for sample,target in self.previousData: new_ds.addSample(sample,target) self.trainer.setData(new_ds) for i in range(FLAGS.epochs): e = self.trainer.train() if FLAGS.sliding_training: self.previousData=deepcopy(self.ds) #Update possible fitness indicators. #Error now self.error = e #Entire error history if len(self.errorHistory) < 5: self.errorHistory.append(e) else: for i in range(len(self.errorHistory)-1): self.errorHistory[i] = self.errorHistory[i+1] self.errorHistory[-1] = e #Sliding window error over appeox last 10 episodes characturistic time. self.slidingError = self.slidingError*0.9 + self.error #Instantaneous difference in last er ror between episodes. if len(self.errorHistory) > 1: self.dError = self.errorHistory[-1] - self.errorHistory[-2] return e def getFitness(self, type): fit = 0 #Fitness function 1 Chrisantha's attempt if type == 0:#SIMPLE MINIMIZE PREDICTION ERROR FITNESS FUNCTION FOR PREDICTORS. # fit = -self.dError/(1.0*self.error) fit = -self.error elif type == 1: #Fitness function 2 Mai's attempt (probably need to use adaptive thresholds for this to be ok) if self.error > ERROR_THRESHOLD and self.dError > DERROR_THRESHOLD: fit = 0 else: fit = 1 self.fitness = fit return fit def storeDataPoint(self, inputA, targetA): self.ds.addSample(inputA, targetA) def predict(self,inputA): return self.net.activate(inputA)
def train_the_nn(max_iterations,iterations_between_reports,train_percent_of_dataset,layerDims): print 'creating total dict' fv_dict = this_create_total_dict(0,train_percent_of_dataset) print 'finished creating dict' ###### # pybrain ########### print "building network" net = buildNetwork(*layerDims,hiddenclass=SigmoidLayer,outclass=SoftmaxLayer) print "finished building network" #net.addInputModule(LinearLayer(67584, 'visible')) trainer = BackpropTrainer(net) #trainer = pybrain.supervised.trainers.BackpropTrainer(net, ds, learningrate = 0.001, momentum = 0.99) ########## # fann ######### #ann = libfann.neural_net() #ann.create_standard_array((67584,300,100,2)) #ann.set_learning_rate(learning_rate) #ann.set_activation_function_output(libfann.SIGMOID_SYMMETRIC_STEPWISE) fn=0 fp=0 tp=0 for i in xrange(max_iterations): (fv_diff,y) = get_diff_of_fvs(fv_dict,iterations_between_reports,i) #print fv_diff.shape #print len(fv_diff[0].tolist()) #cPickle.dump([0,1],open('lol','wb')) #cPickle.load(open('lol','rb')) ##### # pybrain training ###### dataSet = SupervisedDataSet(67584, 2) dataSet.addSample(fv_diff[0].tolist(),y) trainer.setData(dataSet) #print "training" #print "training" trainer.train() #print "finished train" res=net.activate(fv_diff[0].tolist()) #print "finished test" if((res[0]>res[1]) & (y[0]>y[1])): tp=tp+1 elif((res[0]<=res[1]) & (y[0]>y[1])): fp=fp+1 elif((res[0]>res[1]) & (y[0]<y[1])): fn=fn+1 ######## # fann ######## if(i%iterations_between_reports == 0): #ann.train(fv_diff[0].tolist(),y) #print "testing" print i print net.activate(fv_diff[0].tolist()) print y #print ann.run(fv_diff[0].tolist()) #print y print "prec" print (tp/(1e-4+fp+tp)) print "recall" print (tp/(1e-4+fn+tp)) tp=0 fp=0 fn=0 return net
def predict_ball(hidden_nodes, is_elman=True, training_data=16, epoch=-1, parameters={}, predict_count=16): # build rnn n = construct_network(hidden_nodes, is_elman) # make training data ep = 1 if epoch < 0 else epoch initial_p = [9., 7.] initial_v = [1., 1.] # initial_v = ball_data.gen_velocity(BOX_SIZE) data_set = ball_data.bounce_ball((training_data + 1) * ep, BOX_SIZE, initial_p=initial_p, initial_v=initial_v) total_avg = np.average(data_set, axis=0) total_std = np.std(data_set, axis=0) total_std[2] = 1. total_std[3] = 1. # initial_p = data_set[np.random.choice(range(training_data))][:2] training_ds = [] print("data_set = {}".format(data_set)) normalized_d = __normalize(data_set, total_avg, total_std) # print("normalized_d = {}".format(normalized_d)) for e_index in range(ep): t_ds = SequentialDataSet(4, 4) t_ds.newSequence() e_begin = e_index * training_data for j in range(e_begin, e_begin + training_data): # from current, predict next p_in = normalized_d[0].tolist() p_out = normalized_d[j + 1].tolist() t_ds.addSample(p_in, p_out) training_ds.append(t_ds) # training network err1 = 0 if epoch < 0: trainer = BackpropTrainer(n, training_ds[0], learningrate=3e-4, weightdecay=1e-2, verbose=True) err1 = trainer.trainEpochs(20000) else: trainer = BackpropTrainer(n, **parameters) epoch_errs = [] for ds in training_ds: trainer.setData(ds) epoch_errs.append(trainer.train()) err1 = max(epoch_errs) # predict predict = None next_pv = np.hstack((initial_p, initial_v)) n.reset() for i in range(predict_count): predict = next_pv if predict is None else np.vstack((predict, next_pv)) # print("predict = {}".format(predict)) p_normalized = (data_set[0] - total_avg) / total_std next_pv = n.activate(p_normalized.tolist()) restored = np.array(next_pv) * total_std + total_avg next_pv = restored print("restored, answer = {}, {}".format(restored, data_set[i + 1])) real = ball_data.bounce_ball(predict_count, BOX_SIZE, initial_p, initial_v) err_matrix = (predict - real) ** 2 err_distance = np.sqrt(np.sum(err_matrix[:, 0:2], axis=1)).reshape((predict_count, 1)) err_velocity = np.sum(np.sqrt(err_matrix[:, 2:4]), axis=1).reshape((predict_count, 1)) err2 = np.hstack((err_distance, err_velocity)) return predict, real, err1, err2
class PHC_NN(PHC_FA): '''PHC with neural function approximation. ''' delta = 0.1 maxNumberofAverage = 30 weightdecay = 0.001 trainingEpochPerUpdateWight = 2 def __init__(self, num_features, num_actions, indexOfAgent=None): PHC_FA.__init__(self, num_features, num_actions, indexOfAgent) self.linQ = buildNetwork(num_features + num_actions, (num_features + num_actions), 1, hiddenclass=SigmoidLayer, outclass=LinearLayer) self.linPolicy = buildNetwork(num_features, (num_features + num_actions), num_actions, hiddenclass=SigmoidLayer, outclass=SigmoidLayer) self.trainer4LinQ = BackpropTrainer(self.linQ, weightdecay=self.weightdecay) self.trainer4LinPolicy = BackpropTrainer(self.linPolicy, weightdecay=self.weightdecay) def _pi(self, state): """Given state, compute probabilities for each action.""" values = np.array(self.linPolicy.activate(r_[state])) z = np.sum(values) return (values / z).flatten() def _qValues(self, state): """ Return vector of q-values for all actions, given the state(-features). """ values = np.array([ self.linQ.activate(r_[state, one_to_n(i, self.num_actions)]) for i in range(self.num_actions) ]) return values.flatten() def _updateWeights(self, state, action, reward, next_state): """ state and next_state are vectors, action is an integer. """ #update Q-value function approximator target = reward + self.rewardDiscount * max(self._qValues(next_state)) inp = r_[asarray(state), one_to_n(action, self.num_actions)] self.trainer4LinQ = BackpropTrainer(self.linQ, weightdecay=self.weightdecay) ds = SupervisedDataSet(self.num_features + self.num_actions, 1) ds.addSample(inp, target) self.trainer4LinQ.trainOnDataset(ds) #Update policy bestAction = r_argmax(self._qValues(state)) target = one_to_n(bestAction, self.num_actions) inp = r_[asarray(state)] ds = SupervisedDataSet(self.num_features, self.num_actions) ds.addSample(inp, target) self.trainer4LinPolicy = BackpropTrainer(self.linPolicy, learningrate=self.delta, weightdecay=self.weightdecay) self.trainer4LinPolicy.setData(ds) self.trainer4LinPolicy.trainEpochs( epochs=self.trainingEpochPerUpdateWight)
class PHC_WoLF_NN(PHC_FA): '''PHC_WoLF with neural function ''' deltaW = 0.05 deltaL = 0.2 maxNumberofAverage = 30 weightdecay = 0.001 trainingEpochPerUpdateWight = 1 def __init__(self, num_features, num_actions, indexOfAgent=None): PHC_FA.__init__(self, num_features, num_actions, indexOfAgent) self.linQ = buildNetwork(num_features + num_actions, (num_features + num_actions), 1, hiddenclass=SigmoidLayer, outclass=LinearLayer) self.linPolicy = buildNetwork(num_features, (num_features + num_actions), num_actions, hiddenclass=SigmoidLayer, outclass=SigmoidLayer) self.averagePolicy = [] self.trainer4LinQ = BackpropTrainer(self.linQ, weightdecay=self.weightdecay) self.trainer4LinPolicy = BackpropTrainer(self.linPolicy, weightdecay=self.weightdecay) def _pi(self, state): """Given state, compute softmax probability for each action.""" values = np.array(self.linPolicy.activate(r_[state])) z = np.sum(values) return (values / z).flatten() def _qValues(self, state): """ Return vector of q-values for all actions, given the state(-features). """ values = np.array([ self.linQ.activate(r_[state, one_to_n(i, self.num_actions)]) for i in range(self.num_actions) ]) return values.flatten() def _piAvr(self, state): pi = np.zeros(self.num_actions) for elem in self.averagePolicy: values = np.array(elem.activate(r_[state])) pi = np.add(pi.flatten(), values.flatten()) z = np.sum(pi) pi = pi / z return pi.flatten() def _updateWeights(self, state, action, reward, next_state): """ state and next_state are vectors, action is an integer. """ #update Q-value function approximator target = reward + self.rewardDiscount * max(self._qValues(next_state)) inp = r_[asarray(state), one_to_n(action, self.num_actions)] self.trainer4LinQ = BackpropTrainer(self.linQ, weightdecay=self.weightdecay) ds = SupervisedDataSet(self.num_features + self.num_actions, 1) ds.addSample(inp, target) self.trainer4LinQ.trainOnDataset(ds) #update estimate of average policy self.averagePolicy.append(copy.deepcopy(self.linPolicy)) if len(self.averagePolicy) > self.maxNumberofAverage: self.averagePolicy.pop(np.random.randint(len(self.averagePolicy))) #update policy function approximator delta = None cumRewardOfCurrentPolicy = 0.0 values = self._qValues(state) pi = self._pi(state) for elem_action in range(self.num_actions): cumRewardOfCurrentPolicy = pi[elem_action] * values[elem_action] cumRewardOfAveragePolicy = 0.0 api = self._piAvr(state) for elem_action in range(self.num_actions): cumRewardOfAveragePolicy = api[elem_action] * values[elem_action] if cumRewardOfCurrentPolicy > cumRewardOfAveragePolicy: delta = self.deltaW else: delta = self.deltaL #Update policy bestAction = r_argmax(self._qValues(state)) target = one_to_n(bestAction, self.num_actions) inp = r_[asarray(state)] ds = SupervisedDataSet(self.num_features, self.num_actions) ds.addSample(inp, target) self.trainer4LinPolicy = BackpropTrainer(self.linPolicy, learningrate=(delta), weightdecay=self.weightdecay) self.trainer4LinPolicy.setData(ds) self.trainer4LinPolicy.trainEpochs( epochs=self.trainingEpochPerUpdateWight)
class PHC_WoLF_NN(PHC_FA): '''PHC_WoLF with neural function ''' deltaW=0.05 deltaL=0.2 maxNumberofAverage=30 weightdecay=0.001 trainingEpochPerUpdateWight=1 def __init__(self, num_features, num_actions, indexOfAgent=None): PHC_FA.__init__(self, num_features, num_actions, indexOfAgent) self.linQ = buildNetwork(num_features + num_actions, (num_features + num_actions), 1, hiddenclass = SigmoidLayer, outclass = LinearLayer) self.linPolicy = buildNetwork(num_features, (num_features + num_actions), num_actions, hiddenclass = SigmoidLayer,outclass = SigmoidLayer) self.averagePolicy=[] self.trainer4LinQ=BackpropTrainer(self.linQ,weightdecay=self.weightdecay) self.trainer4LinPolicy=BackpropTrainer(self.linPolicy,weightdecay=self.weightdecay) def _pi(self, state): """Given state, compute softmax probability for each action.""" values = np.array(self.linPolicy.activate(r_[state])) z=np.sum(values) return (values/z).flatten() def _qValues(self, state): """ Return vector of q-values for all actions, given the state(-features). """ values = np.array([self.linQ.activate(r_[state, one_to_n(i, self.num_actions)]) for i in range(self.num_actions)]) return values.flatten() def _piAvr(self, state): pi=np.zeros(self.num_actions) for elem in self.averagePolicy: values = np.array(elem.activate(r_[state])) pi=np.add(pi.flatten(),values.flatten()) z=np.sum(pi) pi=pi/z return pi.flatten() def _updateWeights(self, state, action, reward, next_state): """ state and next_state are vectors, action is an integer. """ #update Q-value function approximator target=reward + self.rewardDiscount * max(self._qValues(next_state)) inp=r_[asarray(state), one_to_n(action, self.num_actions)] self.trainer4LinQ=BackpropTrainer(self.linQ,weightdecay=self.weightdecay) ds = SupervisedDataSet(self.num_features+self.num_actions,1) ds.addSample(inp, target) self.trainer4LinQ.trainOnDataset(ds) #update estimate of average policy self.averagePolicy.append(copy.deepcopy(self.linPolicy)) if len(self.averagePolicy) > self.maxNumberofAverage: self.averagePolicy.pop(np.random.randint(len(self.averagePolicy))) #update policy function approximator delta=None cumRewardOfCurrentPolicy=0.0 values=self._qValues(state) pi=self._pi(state) for elem_action in range(self.num_actions): cumRewardOfCurrentPolicy=pi[elem_action]*values[elem_action] cumRewardOfAveragePolicy=0.0 api=self._piAvr(state) for elem_action in range(self.num_actions): cumRewardOfAveragePolicy=api[elem_action]*values[elem_action] if cumRewardOfCurrentPolicy > cumRewardOfAveragePolicy: delta=self.deltaW else: delta=self.deltaL #Update policy bestAction=r_argmax(self._qValues(state)) target=one_to_n(bestAction, self.num_actions) inp=r_[asarray(state)] ds = SupervisedDataSet(self.num_features,self.num_actions) ds.addSample(inp, target) self.trainer4LinPolicy=BackpropTrainer(self.linPolicy, learningrate=(delta), weightdecay=self.weightdecay) self.trainer4LinPolicy.setData(ds) self.trainer4LinPolicy.trainEpochs(epochs=self.trainingEpochPerUpdateWight)
def predict_ball(hidden_nodes, is_elman=True, training_data=5000, epoch=-1, parameters={}, predict_count=128): # build rnn n = construct_network(hidden_nodes, is_elman) # make training data ep = 1 if epoch < 0 else epoch initial_v = ball_data.gen_velocity(BOX_SIZE) data_set = ball_data.bounce_ball((training_data + 1) * ep, BOX_SIZE, None, initial_v=initial_v) total_avg = np.average(data_set, axis=0) total_std = np.std(data_set, axis=0) # initial_p = data_set[np.random.choice(range(training_data))][:2] training_ds = [] normalized_d = __normalize(data_set) for e_index in range(ep): t_ds = SupervisedDataSet(4, 4) e_begin = e_index * training_data for j in range(e_begin, e_begin + training_data): # from current, predict next p_in = normalized_d[j].tolist() p_out = normalized_d[j + 1].tolist() t_ds.addSample(p_in, p_out) training_ds.append(t_ds) del data_set # release memory # training network err1 = 0 if epoch < 0: trainer = BackpropTrainer(n, training_ds[0], **parameters) err1 = trainer.train() else: trainer = BackpropTrainer(n, **parameters) epoch_errs = [] for ds in training_ds: trainer.setData(ds) epoch_errs.append(trainer.train()) err1 = max(epoch_errs) del training_ds # release memory # predict initial_p = ball_data.gen_position(BOX_SIZE) predict = None next_pv = np.hstack((initial_p, initial_v)) n.reset() for i in range(predict_count): predict = next_pv if predict is None else np.vstack((predict, next_pv)) p_normalized = (next_pv - total_avg) / total_std next_pv = n.activate(p_normalized.tolist()) restored = np.array(next_pv) * total_std + total_avg next_pv = restored real = ball_data.bounce_ball(predict_count, BOX_SIZE, initial_p, initial_v) err_matrix = (predict - real) ** 2 err_distance = np.sqrt(np.sum(err_matrix[:, 0:2], axis=1)).reshape((predict_count, 1)) err_velocity = np.sum(np.sqrt(err_matrix[:, 2:4]), axis=1).reshape((predict_count, 1)) err2 = np.hstack((err_distance, err_velocity)) return predict, real, err1, err2
from pybrain.tools.validation import CrossValidator, ModuleValidator translation = {"x": 0, "o": 1, "b": 2} def row_preprocess(row): return [translation[x] for x in row] if __name__ == "__main__": raw_data = list(csv.reader(open("tic-tac-toe.data"))) targets = [1 if x[-1] == "positive" else 0 for x in raw_data] inputs = [row_preprocess(x[:-1]) for x in raw_data] alldata = ClassificationDataSet(9, class_labels=["negative", "positive"]) for (i, t) in zip(inputs, targets): alldata.addSample(i, [t]) network = buildNetwork(9, 3, 1, hiddenclass=SigmoidLayer, outclass=LinearLayer) # validation_data, training_data = alldata.splitWithProportion(0.25) trainer = BackpropTrainer(network, verbose=True, weightdecay=0.001, learningrate=0.1) trainer.setData(alldata) trainer.trainUntilConvergence(maxEpochs=6000) validator = CrossValidator(trainer, alldata, n_folds=10, valfunc=ModuleValidator.MSE) ret = validator.validate() print(ret)
t_ds_list.append((events_list, truth_val_list)) t_ds_list.append((events_list, truth_val_list)) t_ds_list.append((events_list, truth_val_list)) t_ds_list.append((events_list, truth_val_list)) t_ds_list.append((events_list, truth_val_list)) t_ds_list.append((events_list, truth_val_list)) t_ds_list.append((events_list, truth_val_list)) t_ds_list.append((events_list, truth_val_list)) t_ds = SupervisedDataSet(events_len, 1) random.shuffle(t_ds_list) for data in t_ds_list: t_ds.addSample(data[0], data[1]) trainer = BackpropTrainer(rnn_net, **parameters) trainer.setData(t_ds) trainer.train() del t_ds # release memory # predict rnn_net.reset() frslt = open('../test/rnn_result8.csv', 'w') fts = open('../test/rnn_test.csv', 'r') for tsline in fts: splited = tsline.split(",") enroll_id_str = str(int(splited[0])) rvsd = splited[1:] rvsd.reverse()
else: input_rates.append(0) prev = exchange_rates[i][1] for i in xrange(window_s + INPUT_LEN, window_s + INPUT_LEN + OUTPUT_LEN): output_rates.append(exchange_rates[i][1]) y_arr = np.array(output_rates) angle = np.polyfit(x_arr, y_arr, 1)[0] # print "learn_angle " + str(angle) # print "add angle" + str(angle) # print "add input_rates len " + str(len(input_rates)) ds.addSample(input_rates, [angle]) trainer = BackpropTrainer(rnn_net, **parameters) trainer.setData(ds) trainer.train() del ds # release memory # predict rnn_net.reset() dump_fd = open("./rnn_net.dump", "w") pickle.dump(rnn_net, dump_fd) ### training end # frslt = open('../test/rnn_result8.csv', 'w') # frslt.write(enroll_id_str + "," + str(result[0]) + "\n") portfolio = 1000000
def ANN_edge_analysis(a_network, a_gene, a_dataset, boot_val): "Creates and trains a network that is created to reflect the structure of the hypothesized network" regulatory_network = FeedForwardNetwork() # retrievingneeded parameters from the input network data_node_list = get_sub_list_from_network(a_network, a_gene, "gene,TF", 1) # Need to add +1 node to the input layer that represents the "other" control variables # describing network modules to be used inLayer = LinearLayer(len(data_node_list)-1) #hiddenLayer = LinearLayer(len(data_node_list)-1)) outLayer = LinearLayer(1) # Adding layers to network regulatory_network.addInputModule(inLayer) #regulatory_network.addModule(hiddenLayer) regulatory_network.addOutputModule(outLayer) # Adding connections between layers #in_to_hidden = LinearConnection(inLayer,hiddenLayer) #hidden_to_out = FullConnection(hiddenLayer, outLayer) in_to_out = FullConnection(inLayer, outLayer) #regulatory_network.addConnection(in_to_hidden) #regulatory_network.addConnection(hidden_to_out) regulatory_network.addConnection(in_to_out) get_nn_details(regulatory_network) # Other stuff added regulatory_network.sortModules() # Formatting the dataset input_dimention = len(data_node_list)-1 print "in_dimention = ", input_dimention DS = SupervisedDataSet( input_dimention, 1 ) # Adding data, there may be a problem with order here where tfs are not always the same... seems ok though for experiment in a_dataset: tf_list = [] gene_list = [] tf_labels = [] for TF in data_node_list: if TF != a_gene: #print TF, "<---" tf_list.append(experiment[TF]) tf_labels.append(TF) else: #print TF, "<---gene" gene_list.append(experiment[TF]) print tf_list print gene_list if (check_missing_experiments(tf_list) == True) and (check_missing_experiments(gene_list) == True): float_tf_list = [float(i) for i in tf_list] float_gene_list = [float(i) for i in gene_list] DS.appendLinked( float_tf_list, float_gene_list ) print "......" print DS # Training trainer = BackpropTrainer(regulatory_network, momentum=0.1, verbose=True, weightdecay=0.01) trainer.setData(DS) result_list = [] boot_count = 0 while boot_count < boot_val: #trainer.trainEpochs(1000) trainer.trainUntilConvergence(validationProportion=0.25) print regulatory_network this = get_nn_details(regulatory_network) result_list.append(this) regulatory_network.reset() boot_count += 1 print tf_labels print regulatory_network.params print in_to_out.params print inLayer pesos_conexiones(regulatory_network) NetworkWriter.writeToFile(regulatory_network, 'trained_net.xml') return result_list
if rewardTransition < 0: costVector[targetDirDiscrete] = rewardTransition rw = rewardTransition else: costVector[targetDirDiscrete] = ALPHA_FACTOR * costVector[ targetDirDiscrete] + (1.0 - ALPHA_FACTOR) * rewardTransition rw = (1.0 - ALPHA_FACTOR) * rewardTransition # Learn the updated Q-value. if isLearning: ds.clear() ds.addSample( (stepStartingPos[0] / XSIZE, stepStartingPos[1] / YSIZE, math.sin(oldDir * 0.25 * math.pi), math.cos(oldDir * 0.25 * math.pi)), rw) trainer.setData(ds) trainer.trainEpochs(1) # ==================================== # Final paint step # ==================================== if pygame.display.get_active(): clock.tick(2) screen.blit(screenBuffer, (0, 0)) pygame.display.flip() print "Trainer Alpha Value: ", trainer.descent.alpha # Let's look at the events. Key presses from 0 to 8 are possible, as well as space for switching between Q values and best direction painting. ESCape ends the program. for event in pygame.event.get(): if event.type == pygame.locals.QUIT or (
#set the normalization limits from 0 to 1 net = buildNetwork(trndata.indim, 500, tstdata.outdim, hiddenclass=TanhLayer, outclass=SoftmaxLayer, bias=True) trainer = BackpropTrainer(net, trndata, learningrate=0.01, lrdecay=1, momentum=0.00, verbose=False, batchlearning=False, weightdecay=0.0) trainer.setData(trndata) trainer.trainUntilConvergence(verbose=True, trainingData=data, maxEpochs=1) net.offset = 0 m = myo.Myo() e = [] def proc_emg(emg, moving, times=[]): global e, emg_correctmean, emg_filtered, emg_rectified, low_pass, sfreq, emg_envelope e = emg #emg_correctmean = e - np.mean(e) emg_correctmean = scipy.signal.detrend(e) high = 20 / (1000 / 2)