class NeuralAgent(QLearningAgent):
    def setup_approximation(self, n):
        self.neural_net = buildNetwork(n,
                                       6,
                                       1,
                                       bias=True,
                                       hiddenclass=LinearLayer,
                                       outclass=LinearLayer)
        self.trainer = BackpropTrainer(self.neural_net)

    def value_of_state(self, state_vector):
        return self.neural_net.activate(state_vector)[0]

    def learn_q(self, state_vector, action, old, new):
        ds = SupervisedDataSet(len(state_vector), 1)
        ds.addSample(state_vector, new)

        self.trainer.setData(ds)
        self.trainer.trainEpochs(10)

        print new - self.value_of_state(state_vector)

    def save_weights(self, filename):
        with open(filename, "w") as savefile:
            pickle.dump(self.neural_net, savefile)

    def load_weights(self, filename):
        with open(filename, "r") as readfile:
            self.neural_net = pickle.load(readfile)
def main():
    for stock in STOCK_TICKS:
        # Download Data
        get_data(stock)

        # Import Data
        days = extract_data(stock)
        today = days.pop(0)

        # Make DataSet
        data_set = ClassificationDataSet(INPUT_NUM, 1, nb_classes=2)
        for day in days:
            target = 0
            if day.change > 0:
                target = 1
            data_set.addSample(day.return_metrics(), [target])

        # Make Network
        network = buildNetwork(INPUT_NUM, MIDDLE_NUM, MIDDLE_NUM, OUTPUT_NUM)

        # Train Network
        trainer = BackpropTrainer(network)
        trainer.setData(data_set)
        trainer.trainUntilConvergence(maxEpochs=EPOCHS_MAX)

        # Activate Network
        prediction = network.activate(today.return_metrics())
        print prediction
class PHC_NN(PHC_FA):
    '''PHC with neural function approximation. '''
    delta=0.1
    maxNumberofAverage=30
    weightdecay=0.001
    trainingEpochPerUpdateWight=2
    
    def __init__(self, num_features, num_actions, indexOfAgent=None):    
        PHC_FA.__init__(self, num_features, num_actions, indexOfAgent)
        self.linQ = buildNetwork(num_features + num_actions, (num_features + num_actions), 1, hiddenclass = SigmoidLayer, outclass = LinearLayer)
        self.linPolicy = buildNetwork(num_features, (num_features + num_actions), num_actions, hiddenclass = SigmoidLayer,outclass = SigmoidLayer)
        self.trainer4LinQ=BackpropTrainer(self.linQ,weightdecay=self.weightdecay)
        self.trainer4LinPolicy=BackpropTrainer(self.linPolicy,weightdecay=self.weightdecay)

    def _pi(self, state):
        """Given state, compute probabilities for each action."""
        values = np.array(self.linPolicy.activate(r_[state]))
        z=np.sum(values)
        return (values/z).flatten()
    
    def _qValues(self, state):
        """ Return vector of q-values for all actions, 
        given the state(-features). """
        values = np.array([self.linQ.activate(r_[state, one_to_n(i, self.num_actions)]) for i in range(self.num_actions)])
        return values.flatten()

            
    def _updateWeights(self, state, action, reward, next_state):
        """ state and next_state are vectors, action is an integer. """
        #update Q-value function approximator
        target=reward + self.rewardDiscount * max(self._qValues(next_state))
        inp=r_[asarray(state), one_to_n(action, self.num_actions)]
        self.trainer4LinQ=BackpropTrainer(self.linQ,weightdecay=self.weightdecay)
        ds = SupervisedDataSet(self.num_features+self.num_actions,1)
        ds.addSample(inp, target)
        self.trainer4LinQ.trainOnDataset(ds)
        #Update policy
        bestAction=r_argmax(self._qValues(state))
        target= one_to_n(bestAction, self.num_actions)
        inp=r_[asarray(state)]
        ds = SupervisedDataSet(self.num_features,self.num_actions)
        ds.addSample(inp, target)
        self.trainer4LinPolicy=BackpropTrainer(self.linPolicy,
                                               learningrate=self.delta,
                                               weightdecay=self.weightdecay)
        self.trainer4LinPolicy.setData(ds)
        self.trainer4LinPolicy.trainEpochs(epochs=self.trainingEpochPerUpdateWight)
Ejemplo n.º 4
0
    def Train(self):
        self.load_dataset()

        for i in range(50):
            for exception in EXCEPTIONS:
                self.ds.addSample(exception, (0))

        for line in self.data:
            self.ds.addSample(line, (1))

        print("Training data are loaded to the network.")

        trainer = BackpropTrainer(self.net)
        trainer.setData(self.ds)

        print("Training... please stand by")

        trainer.trainEpochs(self.count_trainIterations)

        print("Neural network is trained!")

        if (self.toSave): self.Save()
     print "errorrr!!!"
     print len(fv1)
     print len(fv2)
     continue
 fv_diff = numpy.atleast_2d(numpy.asarray(fv1) - numpy.asarray(fv2))
 #print fv_diff.shape
 #print len(fv_diff[0].tolist())
 print count       
 #cPickle.dump([0,1],open('lol','wb'))
 #cPickle.load(open('lol','rb'))
 #####
 # pybrain training
 ######
 dataSet = SupervisedDataSet(67584, 2)
 dataSet.addSample(fv_diff[0].tolist(),y)
 trainer.setData(dataSet)
 print "training"
 trainer.train()
 print "finished train"
 res=net.activate(fv_diff[0].tolist())
 if((res[0]>res[1]) & (y[0]>y[1])):
     tp=tp+1
 elif((res[0]<=res[1]) & (y[0]>y[1])):
     fp=fp+1
 elif((res[0]>res[1]) & (y[0]<y[1])):
     fn=fn+1
 ########
 # fann
 ########
 if(count%interval == 0):
     #ann.train(fv_diff[0].tolist(),y)
Ejemplo n.º 6
0
    t_ds_list.append((events_list, truth_val_list))
    t_ds_list.append((events_list, truth_val_list))
    t_ds_list.append((events_list, truth_val_list))    
    t_ds_list.append((events_list, truth_val_list))
    t_ds_list.append((events_list, truth_val_list))
    t_ds_list.append((events_list, truth_val_list))
    t_ds_list.append((events_list, truth_val_list))
    t_ds_list.append((events_list, truth_val_list))    
    
t_ds = SupervisedDataSet(events_len, 1)
random.shuffle(t_ds_list)
for data in t_ds_list:
    t_ds.addSample(data[0], data[1])

trainer = BackpropTrainer(rnn_net, **parameters)
trainer.setData(t_ds)
trainer.train()

del t_ds  # release memory

# predict
rnn_net.reset()
frslt = open('../test/rnn_result8.csv', 'w')

fts = open('../test/rnn_test.csv', 'r')
for tsline in fts:
    splited = tsline.split(",")
    enroll_id_str = str(int(splited[0]))
    
    rvsd = splited[1:]
    rvsd.reverse()
Ejemplo n.º 7
0
    def _train_CV(self,perms, n_folds = 5, num_neuron = 50,learning_rate_input=0.01,
                  decay=0.01,maxEpochs_input=200,verbose_input=True):
        '''call the class in model validators'''
        '''and do cross validation'''
        '''pass values'''
        
        perf_tst = 0
        perf_trn = 0
        # Set up network     

#         NetworkWriter.writeToFile(net, 'temp_net_this.xml')
        # Set up trainer
        # Not the dataset have not been setted here
        # Need to be dealt with inside the loop of CV
        
        for i in range(n_folds):
            # shuffle out the index of training data and the test data
            train_perms_idxs = list(range(n_folds))
            train_perms_idxs.pop(i)
            temp_list = []
            for train_perms_idx in train_perms_idxs:
                temp_list.append(perms[ train_perms_idx ])

            # These are the index for training data            
            train_idxs = np.concatenate(temp_list)       
            # this is the index for test set:
            test_idxs = perms[i]       
 
            print "Training on part: ", i, '.......'  
            trn_ds_ann, tst_ds_ann = self._set_dataset(train_idxs, test_idxs)
            
            # Initialize network and training object
#             net_this = NetworkReader.readFrom('temp_net_this.xml')
            net_this = buildNetwork(self.indim,
                                    num_neuron,
                                    self.outdim,bias=True,
                                    hiddenclass = SigmoidLayer)
            trainer_this = BackpropTrainer(net_this,learningrate = learning_rate_input,
                                           weightdecay=decay,
                                           verbose=verbose_input)
            # set up training data for the trainer
            trainer_this.setData(trn_ds_ann)
            
            #train asked times:
            trainer_this.trainEpochs(maxEpochs_input)      
            for iter in range(self.max_Epoches):
#             trainer_this.trainUntilConvergence(verbose=False,maxEpochs=maxEpochs_input)
                trainer_this.trainEpochs(1)
                trn_error = self._net_performance(net_this, trn_ds_ann)
                tst_error = self._net_performance(net_this, tst_ds_ann)
                if verbose_input == True:
                    print "Training", iter+1,"times"
                    print "the trn error is: ", trn_error
                    print "the test error is: ",tst_error     
            perf_this_tst = self._net_performance(net_this, tst_ds_ann)
            perf_this_trn = self._net_performance(net_this, trn_ds_ann)
            
            print 'The Performance of this time on Test is: ', perf_this_tst
            print 'The Performance of this time on Training is: ', perf_this_trn
            print 'Number of Neuron: ', num_neuron 
            print '###'
            perf_tst = perf_tst + perf_this_tst   
            perf_trn = perf_trn + perf_this_trn  
                 
        perf_tst /= n_folds
        perf_trn /= n_folds
        
        print perf_tst, perf_trn
        return perf_tst, perf_trn
Ejemplo n.º 8
0
def predict_ball(hidden_nodes, is_elman=True, training_data=5000, epoch=-1, parameters={}, predict_count=128):

    # build rnn
    n = construct_network(hidden_nodes, is_elman)

    # make training data
    ep = 1 if epoch < 0 else epoch
    initial_v = ball_data.gen_velocity(BOX_SIZE)
    data_set = ball_data.bounce_ball((training_data + 1) * ep, BOX_SIZE, None, initial_v=initial_v)
    total_avg = np.average(data_set, axis=0)
    total_std = np.std(data_set, axis=0)
    # initial_p = data_set[np.random.choice(range(training_data))][:2]

    training_ds = []
    normalized_d = __normalize(data_set)
    for e_index in range(ep):
        t_ds = SupervisedDataSet(4, 4)
        e_begin = e_index * training_data
        for j in range(e_begin,  e_begin + training_data):
            # from current, predict next
            p_in = normalized_d[j].tolist()
            p_out = normalized_d[j + 1].tolist()
            t_ds.addSample(p_in, p_out)

        training_ds.append(t_ds)

    del data_set  # release memory

    # training network
    err1 = 0
    if epoch < 0:
        trainer = BackpropTrainer(n, training_ds[0], **parameters)
        err1 = trainer.train()
    else:
        trainer = BackpropTrainer(n, **parameters)
        epoch_errs = []
        for ds in training_ds:
            trainer.setData(ds)
            epoch_errs.append(trainer.train())

        err1 = max(epoch_errs)

    del training_ds  # release memory

    # predict
    initial_p = ball_data.gen_position(BOX_SIZE)
    predict = None
    next_pv = np.hstack((initial_p, initial_v))

    n.reset()
    for i in range(predict_count):
        predict = next_pv if predict is None else np.vstack((predict, next_pv))

        p_normalized = (next_pv - total_avg) / total_std
        next_pv = n.activate(p_normalized.tolist())
        restored = np.array(next_pv) * total_std + total_avg
        next_pv = restored

    real = ball_data.bounce_ball(predict_count, BOX_SIZE, initial_p, initial_v)
    err_matrix = (predict - real) ** 2
    err_distance = np.sqrt(np.sum(err_matrix[:, 0:2], axis=1)).reshape((predict_count, 1))
    err_velocity = np.sum(np.sqrt(err_matrix[:, 2:4]), axis=1).reshape((predict_count, 1))
    err2 = np.hstack((err_distance, err_velocity))

    return predict, real, err1, err2
Ejemplo n.º 9
0
		outmatrix = outputUnits(entry);
		lpos = 0;
		ds = SupervisedDataSet(NUMINPUTS, NUMOUTPUTS);
		for letterContexts in wordstream(input_entries = (entry,)):
			#print("letterContexts", letterContexts);
			for inarray in convertToBinary(letterContexts):
				outarray = outmatrix[lpos];
		#print("inarray",inarray);
		#print("outarray",outarray); 
	#print("inlen %d outlen %d" % (len(inarray), len(outarray)));
				ds.addSample(inarray, outarray);
				observed = net.activate(inarray);
				phoneme = entry.phonemes[lpos];
				observedPhoneme = closestByDotProduct(observed[:MINSTRESS], articFeatures);
				phonemeErrors.append(bool(phoneme != observedPhoneme));
				stress = entry.stress[lpos];
				observedStress = closestByDotProduct(observed[MINSTRESS:], stressFeatures);
				stressErrors.append(bool(stress != observedStress));
				lpos += 1
		trainer.setData(ds);
                #pdb.set_trace();
		err = trainer.train();
		#print(err, " ", entry);
	print("accuracy: phonemes %.3f stresses %.3f" % (1 - np.mean(phonemeErrors), 1 - np.mean(stressErrors)) );

    
#accuracy is a vector with one element in {0,1} for each letter i
#that we have #trained so far. 
#make that two vectors, one for phoneme and one for stress.

Ejemplo n.º 10
0
class Predictor(): 

	def __init__(self, inSize, outSize, LearningRate):

		self.learning_rate = LearningRate
		self.ds = SupervisedDataSet(inSize, outSize)
		self.net = buildNetwork(inSize, 10, outSize, hiddenclass=TanhLayer, bias=True)
		self.trainer = BackpropTrainer(self.net, self.ds, learningrate=self.learning_rate, verbose = False, weightdecay=WEIGHT_DECAY)
		self.prediction = [0] * outSize
		self.mse = 100
		self.age=0

		#Specific to Mai's code. Make input and output masks.  
		self.inputMask = [1 for i in range(inSize)]
		
#		self.outputMask = [random.randint(0, 1) for i in range(outSize)]
		self.outputMask = [0]*outSize
		r = random.randint(0,outSize-1)
		self.outputMask[r] = 1

		self.error = 0
		self.errorHistory = []
		self.dErrorHistory = []
		self.slidingError = 0
		self.dError = 0
		self.fitness = 0
		self.problem=r
		self.previousData=[]

	def getPrediction(self, input):

		out = self.net.activate(input)
		return out

	def trainPredictor(self):

		self.age+=1

		
		new_ds=deepcopy(self.ds)

		if FLAGS.sliding_training:
			if len(self.previousData)!=0:
				for sample,target in self.previousData:
					new_ds.addSample(sample,target)
		

		self.trainer.setData(new_ds)
		for i in range(FLAGS.epochs):
			e = self.trainer.train()
		
		if FLAGS.sliding_training:
			self.previousData=deepcopy(self.ds)

		#Update possible fitness indicators. 
		#Error now
		self.error = e
		#Entire error history
		if len(self.errorHistory) < 5:  
			self.errorHistory.append(e)
		else:
			for i in range(len(self.errorHistory)-1):
				self.errorHistory[i] = self.errorHistory[i+1]
			self.errorHistory[-1] = e

		#Sliding window error over appeox last 10 episodes characturistic time. 
		self.slidingError = self.slidingError*0.9 + self.error
		#Instantaneous difference in last er ror between episodes. 
		if len(self.errorHistory) > 1:
			self.dError = self.errorHistory[-1] - self.errorHistory[-2] 

		return e

	def getFitness(self, type):

		fit = 0 
		#Fitness function 1 Chrisantha's attempt 
		if type == 0:#SIMPLE MINIMIZE PREDICTION ERROR FITNESS FUNCTION FOR PREDICTORS. 
#           fit = -self.dError/(1.0*self.error)
			fit = -self.error
		elif type == 1:
			#Fitness function 2 Mai's attempt (probably need to use adaptive thresholds for this to be ok)
			if self.error > ERROR_THRESHOLD and self.dError > DERROR_THRESHOLD:
				fit = 0
			else:
				fit = 1

		self.fitness = fit
		return fit 

	def storeDataPoint(self, inputA, targetA):
		self.ds.addSample(inputA, targetA)

	def predict(self,inputA):
		return self.net.activate(inputA)
Ejemplo n.º 11
0
def train_the_nn(max_iterations,iterations_between_reports,train_percent_of_dataset,layerDims):

    print 'creating total dict'
    fv_dict = this_create_total_dict(0,train_percent_of_dataset)
    print 'finished creating dict'


    ######
    # pybrain
    ###########

    print "building network"
    net = buildNetwork(*layerDims,hiddenclass=SigmoidLayer,outclass=SoftmaxLayer)
    print "finished building network"
    #net.addInputModule(LinearLayer(67584, 'visible'))
    trainer = BackpropTrainer(net)


    #trainer = pybrain.supervised.trainers.BackpropTrainer(net, ds, learningrate = 0.001, momentum = 0.99)
    ##########
    # fann
    #########
    #ann = libfann.neural_net()
    #ann.create_standard_array((67584,300,100,2))

    #ann.set_learning_rate(learning_rate)
    #ann.set_activation_function_output(libfann.SIGMOID_SYMMETRIC_STEPWISE)
    
    
    fn=0
    fp=0
    tp=0
    for i in xrange(max_iterations):
        
        (fv_diff,y) = get_diff_of_fvs(fv_dict,iterations_between_reports,i)

        
        #print fv_diff.shape
        #print len(fv_diff[0].tolist())

        #cPickle.dump([0,1],open('lol','wb'))
        #cPickle.load(open('lol','rb'))
        #####
        # pybrain training
        ######
        dataSet = SupervisedDataSet(67584, 2)
        dataSet.addSample(fv_diff[0].tolist(),y)
        trainer.setData(dataSet)
        #print "training"
       
        #print "training"
        trainer.train()
        #print "finished train"
        res=net.activate(fv_diff[0].tolist())
        #print "finished test"
        
        if((res[0]>res[1]) & (y[0]>y[1])):
            tp=tp+1
        elif((res[0]<=res[1]) & (y[0]>y[1])):
            fp=fp+1
        elif((res[0]>res[1]) & (y[0]<y[1])):
            fn=fn+1
        ########
        # fann
        ########
        if(i%iterations_between_reports == 0):
            #ann.train(fv_diff[0].tolist(),y)
            #print "testing"
            print i
            print net.activate(fv_diff[0].tolist())
            print y
            #print ann.run(fv_diff[0].tolist())
            #print y
            print "prec"
            print (tp/(1e-4+fp+tp))
            print "recall"
            print (tp/(1e-4+fn+tp))
            tp=0
            fp=0
            fn=0


    return net
Ejemplo n.º 12
0
def predict_ball(hidden_nodes, is_elman=True, training_data=16, epoch=-1, parameters={}, predict_count=16):
    # build rnn
    n = construct_network(hidden_nodes, is_elman)

    # make training data
    ep = 1 if epoch < 0 else epoch
    initial_p = [9., 7.]
    initial_v = [1., 1.]
    # initial_v = ball_data.gen_velocity(BOX_SIZE)
    data_set = ball_data.bounce_ball((training_data + 1) * ep, BOX_SIZE, initial_p=initial_p, initial_v=initial_v)
    total_avg = np.average(data_set, axis=0)
    total_std = np.std(data_set, axis=0)
    total_std[2] = 1.
    total_std[3] = 1.
    # initial_p = data_set[np.random.choice(range(training_data))][:2]

    training_ds = []
    print("data_set = {}".format(data_set))
    normalized_d = __normalize(data_set, total_avg, total_std)
    # print("normalized_d = {}".format(normalized_d))
    for e_index in range(ep):
        t_ds = SequentialDataSet(4, 4)
        t_ds.newSequence()
        e_begin = e_index * training_data
        for j in range(e_begin, e_begin + training_data):
            # from current, predict next
            p_in = normalized_d[0].tolist()
            p_out = normalized_d[j + 1].tolist()
            t_ds.addSample(p_in, p_out)

        training_ds.append(t_ds)

    # training network
    err1 = 0
    if epoch < 0:
        trainer = BackpropTrainer(n, training_ds[0], learningrate=3e-4, weightdecay=1e-2, verbose=True)
        err1 = trainer.trainEpochs(20000)
    else:
        trainer = BackpropTrainer(n, **parameters)
        epoch_errs = []
        for ds in training_ds:
            trainer.setData(ds)
            epoch_errs.append(trainer.train())

        err1 = max(epoch_errs)

    # predict
    predict = None
    next_pv = np.hstack((initial_p, initial_v))

    n.reset()
    for i in range(predict_count):
        predict = next_pv if predict is None else np.vstack((predict, next_pv))
        # print("predict = {}".format(predict))

        p_normalized = (data_set[0] - total_avg) / total_std
        next_pv = n.activate(p_normalized.tolist())
        restored = np.array(next_pv) * total_std + total_avg
        next_pv = restored
        print("restored, answer = {}, {}".format(restored, data_set[i + 1]))

    real = ball_data.bounce_ball(predict_count, BOX_SIZE, initial_p, initial_v)
    err_matrix = (predict - real) ** 2
    err_distance = np.sqrt(np.sum(err_matrix[:, 0:2], axis=1)).reshape((predict_count, 1))
    err_velocity = np.sum(np.sqrt(err_matrix[:, 2:4]), axis=1).reshape((predict_count, 1))
    err2 = np.hstack((err_distance, err_velocity))

    return predict, real, err1, err2
class PHC_NN(PHC_FA):
    '''PHC with neural function approximation. '''
    delta = 0.1
    maxNumberofAverage = 30
    weightdecay = 0.001
    trainingEpochPerUpdateWight = 2

    def __init__(self, num_features, num_actions, indexOfAgent=None):
        PHC_FA.__init__(self, num_features, num_actions, indexOfAgent)
        self.linQ = buildNetwork(num_features + num_actions,
                                 (num_features + num_actions),
                                 1,
                                 hiddenclass=SigmoidLayer,
                                 outclass=LinearLayer)
        self.linPolicy = buildNetwork(num_features,
                                      (num_features + num_actions),
                                      num_actions,
                                      hiddenclass=SigmoidLayer,
                                      outclass=SigmoidLayer)
        self.trainer4LinQ = BackpropTrainer(self.linQ,
                                            weightdecay=self.weightdecay)
        self.trainer4LinPolicy = BackpropTrainer(self.linPolicy,
                                                 weightdecay=self.weightdecay)

    def _pi(self, state):
        """Given state, compute probabilities for each action."""
        values = np.array(self.linPolicy.activate(r_[state]))
        z = np.sum(values)
        return (values / z).flatten()

    def _qValues(self, state):
        """ Return vector of q-values for all actions, 
        given the state(-features). """
        values = np.array([
            self.linQ.activate(r_[state, one_to_n(i, self.num_actions)])
            for i in range(self.num_actions)
        ])
        return values.flatten()

    def _updateWeights(self, state, action, reward, next_state):
        """ state and next_state are vectors, action is an integer. """
        #update Q-value function approximator
        target = reward + self.rewardDiscount * max(self._qValues(next_state))
        inp = r_[asarray(state), one_to_n(action, self.num_actions)]
        self.trainer4LinQ = BackpropTrainer(self.linQ,
                                            weightdecay=self.weightdecay)
        ds = SupervisedDataSet(self.num_features + self.num_actions, 1)
        ds.addSample(inp, target)
        self.trainer4LinQ.trainOnDataset(ds)
        #Update policy
        bestAction = r_argmax(self._qValues(state))
        target = one_to_n(bestAction, self.num_actions)
        inp = r_[asarray(state)]
        ds = SupervisedDataSet(self.num_features, self.num_actions)
        ds.addSample(inp, target)
        self.trainer4LinPolicy = BackpropTrainer(self.linPolicy,
                                                 learningrate=self.delta,
                                                 weightdecay=self.weightdecay)
        self.trainer4LinPolicy.setData(ds)
        self.trainer4LinPolicy.trainEpochs(
            epochs=self.trainingEpochPerUpdateWight)
class PHC_WoLF_NN(PHC_FA):
    '''PHC_WoLF with neural function '''
    deltaW = 0.05
    deltaL = 0.2
    maxNumberofAverage = 30
    weightdecay = 0.001
    trainingEpochPerUpdateWight = 1

    def __init__(self, num_features, num_actions, indexOfAgent=None):
        PHC_FA.__init__(self, num_features, num_actions, indexOfAgent)
        self.linQ = buildNetwork(num_features + num_actions,
                                 (num_features + num_actions),
                                 1,
                                 hiddenclass=SigmoidLayer,
                                 outclass=LinearLayer)
        self.linPolicy = buildNetwork(num_features,
                                      (num_features + num_actions),
                                      num_actions,
                                      hiddenclass=SigmoidLayer,
                                      outclass=SigmoidLayer)
        self.averagePolicy = []
        self.trainer4LinQ = BackpropTrainer(self.linQ,
                                            weightdecay=self.weightdecay)
        self.trainer4LinPolicy = BackpropTrainer(self.linPolicy,
                                                 weightdecay=self.weightdecay)

    def _pi(self, state):
        """Given state, compute softmax probability for each action."""
        values = np.array(self.linPolicy.activate(r_[state]))
        z = np.sum(values)
        return (values / z).flatten()

    def _qValues(self, state):
        """ Return vector of q-values for all actions, 
        given the state(-features). """
        values = np.array([
            self.linQ.activate(r_[state, one_to_n(i, self.num_actions)])
            for i in range(self.num_actions)
        ])
        return values.flatten()

    def _piAvr(self, state):
        pi = np.zeros(self.num_actions)
        for elem in self.averagePolicy:
            values = np.array(elem.activate(r_[state]))
            pi = np.add(pi.flatten(), values.flatten())
        z = np.sum(pi)
        pi = pi / z
        return pi.flatten()

    def _updateWeights(self, state, action, reward, next_state):
        """ state and next_state are vectors, action is an integer. """
        #update Q-value function approximator
        target = reward + self.rewardDiscount * max(self._qValues(next_state))
        inp = r_[asarray(state), one_to_n(action, self.num_actions)]
        self.trainer4LinQ = BackpropTrainer(self.linQ,
                                            weightdecay=self.weightdecay)
        ds = SupervisedDataSet(self.num_features + self.num_actions, 1)
        ds.addSample(inp, target)
        self.trainer4LinQ.trainOnDataset(ds)

        #update estimate of average policy
        self.averagePolicy.append(copy.deepcopy(self.linPolicy))
        if len(self.averagePolicy) > self.maxNumberofAverage:
            self.averagePolicy.pop(np.random.randint(len(self.averagePolicy)))

        #update policy function approximator
        delta = None
        cumRewardOfCurrentPolicy = 0.0
        values = self._qValues(state)
        pi = self._pi(state)
        for elem_action in range(self.num_actions):
            cumRewardOfCurrentPolicy = pi[elem_action] * values[elem_action]
        cumRewardOfAveragePolicy = 0.0
        api = self._piAvr(state)
        for elem_action in range(self.num_actions):
            cumRewardOfAveragePolicy = api[elem_action] * values[elem_action]
        if cumRewardOfCurrentPolicy > cumRewardOfAveragePolicy:
            delta = self.deltaW
        else:
            delta = self.deltaL

        #Update policy
        bestAction = r_argmax(self._qValues(state))
        target = one_to_n(bestAction, self.num_actions)
        inp = r_[asarray(state)]
        ds = SupervisedDataSet(self.num_features, self.num_actions)
        ds.addSample(inp, target)
        self.trainer4LinPolicy = BackpropTrainer(self.linPolicy,
                                                 learningrate=(delta),
                                                 weightdecay=self.weightdecay)
        self.trainer4LinPolicy.setData(ds)
        self.trainer4LinPolicy.trainEpochs(
            epochs=self.trainingEpochPerUpdateWight)
class PHC_WoLF_NN(PHC_FA):
    '''PHC_WoLF with neural function '''
    deltaW=0.05
    deltaL=0.2
    maxNumberofAverage=30
    weightdecay=0.001
    trainingEpochPerUpdateWight=1
    
    def __init__(self, num_features, num_actions, indexOfAgent=None):    
        PHC_FA.__init__(self, num_features, num_actions, indexOfAgent)
        self.linQ = buildNetwork(num_features + num_actions, (num_features + num_actions), 1, hiddenclass = SigmoidLayer, outclass = LinearLayer)
        self.linPolicy = buildNetwork(num_features, (num_features + num_actions), num_actions, hiddenclass = SigmoidLayer,outclass = SigmoidLayer)
        self.averagePolicy=[]
        self.trainer4LinQ=BackpropTrainer(self.linQ,weightdecay=self.weightdecay)
        self.trainer4LinPolicy=BackpropTrainer(self.linPolicy,weightdecay=self.weightdecay)

    def _pi(self, state):
        """Given state, compute softmax probability for each action."""
        values = np.array(self.linPolicy.activate(r_[state]))
        z=np.sum(values)
        return (values/z).flatten()
    
    def _qValues(self, state):
        """ Return vector of q-values for all actions, 
        given the state(-features). """
        values = np.array([self.linQ.activate(r_[state, one_to_n(i, self.num_actions)]) for i in range(self.num_actions)])
        return values.flatten()

    def _piAvr(self, state):
        pi=np.zeros(self.num_actions)
        for elem in self.averagePolicy:
            values = np.array(elem.activate(r_[state]))
            pi=np.add(pi.flatten(),values.flatten())
        z=np.sum(pi)
        pi=pi/z
        return pi.flatten()
        
    def _updateWeights(self, state, action, reward, next_state):
        """ state and next_state are vectors, action is an integer. """
        #update Q-value function approximator
        target=reward + self.rewardDiscount * max(self._qValues(next_state))
        inp=r_[asarray(state), one_to_n(action, self.num_actions)]
        self.trainer4LinQ=BackpropTrainer(self.linQ,weightdecay=self.weightdecay)
        ds = SupervisedDataSet(self.num_features+self.num_actions,1)
        ds.addSample(inp, target)        
        self.trainer4LinQ.trainOnDataset(ds)

        #update estimate of average policy
        self.averagePolicy.append(copy.deepcopy(self.linPolicy))
        if len(self.averagePolicy) > self.maxNumberofAverage:
            self.averagePolicy.pop(np.random.randint(len(self.averagePolicy)))
            
        #update policy function approximator
        delta=None
        cumRewardOfCurrentPolicy=0.0
        values=self._qValues(state)
        pi=self._pi(state)
        for elem_action in range(self.num_actions):
            cumRewardOfCurrentPolicy=pi[elem_action]*values[elem_action]
        cumRewardOfAveragePolicy=0.0
        api=self._piAvr(state)
        for elem_action in range(self.num_actions):
            cumRewardOfAveragePolicy=api[elem_action]*values[elem_action]
        if cumRewardOfCurrentPolicy > cumRewardOfAveragePolicy:
            delta=self.deltaW
        else:
            delta=self.deltaL
        
        #Update policy
        bestAction=r_argmax(self._qValues(state))
        target=one_to_n(bestAction, self.num_actions)
        inp=r_[asarray(state)]
        ds = SupervisedDataSet(self.num_features,self.num_actions)
        ds.addSample(inp, target)
        self.trainer4LinPolicy=BackpropTrainer(self.linPolicy,
                                               learningrate=(delta),
                                               weightdecay=self.weightdecay)
        self.trainer4LinPolicy.setData(ds)
        self.trainer4LinPolicy.trainEpochs(epochs=self.trainingEpochPerUpdateWight)
                        
        
Ejemplo n.º 16
0
def predict_ball(hidden_nodes, is_elman=True, training_data=5000, epoch=-1, parameters={}, predict_count=128):

    # build rnn
    n = construct_network(hidden_nodes, is_elman)

    # make training data
    ep = 1 if epoch < 0 else epoch
    initial_v = ball_data.gen_velocity(BOX_SIZE)
    data_set = ball_data.bounce_ball((training_data + 1) * ep, BOX_SIZE, None, initial_v=initial_v)
    total_avg = np.average(data_set, axis=0)
    total_std = np.std(data_set, axis=0)
    # initial_p = data_set[np.random.choice(range(training_data))][:2]

    training_ds = []
    normalized_d = __normalize(data_set)
    for e_index in range(ep):
        t_ds = SupervisedDataSet(4, 4)
        e_begin = e_index * training_data
        for j in range(e_begin,  e_begin + training_data):
            # from current, predict next
            p_in = normalized_d[j].tolist()
            p_out = normalized_d[j + 1].tolist()
            t_ds.addSample(p_in, p_out)

        training_ds.append(t_ds)

    del data_set  # release memory

    # training network
    err1 = 0
    if epoch < 0:
        trainer = BackpropTrainer(n, training_ds[0], **parameters)
        err1 = trainer.train()
    else:
        trainer = BackpropTrainer(n, **parameters)
        epoch_errs = []
        for ds in training_ds:
            trainer.setData(ds)
            epoch_errs.append(trainer.train())

        err1 = max(epoch_errs)

    del training_ds  # release memory

    # predict
    initial_p = ball_data.gen_position(BOX_SIZE)
    predict = None
    next_pv = np.hstack((initial_p, initial_v))

    n.reset()
    for i in range(predict_count):
        predict = next_pv if predict is None else np.vstack((predict, next_pv))

        p_normalized = (next_pv - total_avg) / total_std
        next_pv = n.activate(p_normalized.tolist())
        restored = np.array(next_pv) * total_std + total_avg
        next_pv = restored

    real = ball_data.bounce_ball(predict_count, BOX_SIZE, initial_p, initial_v)
    err_matrix = (predict - real) ** 2
    err_distance = np.sqrt(np.sum(err_matrix[:, 0:2], axis=1)).reshape((predict_count, 1))
    err_velocity = np.sum(np.sqrt(err_matrix[:, 2:4]), axis=1).reshape((predict_count, 1))
    err2 = np.hstack((err_distance, err_velocity))

    return predict, real, err1, err2
Ejemplo n.º 17
0
from pybrain.tools.validation import CrossValidator, ModuleValidator

translation = {"x": 0, "o": 1, "b": 2}


def row_preprocess(row):
    return [translation[x] for x in row]


if __name__ == "__main__":
    raw_data = list(csv.reader(open("tic-tac-toe.data")))
    targets = [1 if x[-1] == "positive" else 0 for x in raw_data]
    inputs = [row_preprocess(x[:-1]) for x in raw_data]
    alldata = ClassificationDataSet(9, class_labels=["negative", "positive"])

    for (i, t) in zip(inputs, targets):
        alldata.addSample(i, [t])

    network = buildNetwork(9, 3, 1, hiddenclass=SigmoidLayer, outclass=LinearLayer)

    # validation_data, training_data = alldata.splitWithProportion(0.25)

    trainer = BackpropTrainer(network, verbose=True, weightdecay=0.001, learningrate=0.1)
    trainer.setData(alldata)

    trainer.trainUntilConvergence(maxEpochs=6000)

    validator = CrossValidator(trainer, alldata, n_folds=10, valfunc=ModuleValidator.MSE)
    ret = validator.validate()
    print(ret)
    t_ds_list.append((events_list, truth_val_list))
    t_ds_list.append((events_list, truth_val_list))
    t_ds_list.append((events_list, truth_val_list))
    t_ds_list.append((events_list, truth_val_list))
    t_ds_list.append((events_list, truth_val_list))
    t_ds_list.append((events_list, truth_val_list))
    t_ds_list.append((events_list, truth_val_list))
    t_ds_list.append((events_list, truth_val_list))

t_ds = SupervisedDataSet(events_len, 1)
random.shuffle(t_ds_list)
for data in t_ds_list:
    t_ds.addSample(data[0], data[1])

trainer = BackpropTrainer(rnn_net, **parameters)
trainer.setData(t_ds)
trainer.train()

del t_ds  # release memory

# predict
rnn_net.reset()
frslt = open('../test/rnn_result8.csv', 'w')

fts = open('../test/rnn_test.csv', 'r')
for tsline in fts:
    splited = tsline.split(",")
    enroll_id_str = str(int(splited[0]))

    rvsd = splited[1:]
    rvsd.reverse()
Ejemplo n.º 19
0
            else:
                input_rates.append(0)
            prev = exchange_rates[i][1]
        for i in xrange(window_s + INPUT_LEN, window_s + INPUT_LEN + OUTPUT_LEN):
            output_rates.append(exchange_rates[i][1])


        y_arr = np.array(output_rates)
        angle = np.polyfit(x_arr, y_arr, 1)[0]
        #    print "learn_angle " + str(angle)
#        print "add angle" + str(angle)
#        print "add input_rates len " + str(len(input_rates))
        ds.addSample(input_rates, [angle])

    trainer = BackpropTrainer(rnn_net, **parameters)
    trainer.setData(ds)
    trainer.train()

    del ds  # release memory

    # predict
    rnn_net.reset()

    dump_fd = open("./rnn_net.dump", "w")
    pickle.dump(rnn_net, dump_fd)
### training end

# frslt = open('../test/rnn_result8.csv', 'w')
# frslt.write(enroll_id_str + "," + str(result[0]) + "\n")        

portfolio = 1000000
Ejemplo n.º 20
0
def ANN_edge_analysis(a_network, a_gene, a_dataset, boot_val):

	"Creates and trains a network that is created to reflect the structure of the hypothesized network"

	regulatory_network = FeedForwardNetwork()

	# retrievingneeded parameters from the input network

	data_node_list = get_sub_list_from_network(a_network, a_gene, "gene,TF", 1)

	# Need to add +1 node to the input layer that represents the "other" control variables

	# describing network modules to be used
	inLayer = LinearLayer(len(data_node_list)-1)
	#hiddenLayer = LinearLayer(len(data_node_list)-1))
	outLayer = LinearLayer(1)


	# Adding layers to network
	regulatory_network.addInputModule(inLayer)
	#regulatory_network.addModule(hiddenLayer)
	regulatory_network.addOutputModule(outLayer)

	# Adding connections between layers

	#in_to_hidden = LinearConnection(inLayer,hiddenLayer)
	#hidden_to_out = FullConnection(hiddenLayer, outLayer)

	in_to_out = FullConnection(inLayer, outLayer)

	#regulatory_network.addConnection(in_to_hidden)
	#regulatory_network.addConnection(hidden_to_out)

	regulatory_network.addConnection(in_to_out)

	get_nn_details(regulatory_network)

	# Other stuff added

	regulatory_network.sortModules()

	# Formatting the dataset 

	input_dimention = len(data_node_list)-1
	print "in_dimention = ", input_dimention

	DS = SupervisedDataSet( input_dimention, 1 )

	# Adding data, there may be a problem with order here where tfs are not always the same... seems ok though

	for experiment in a_dataset:
		tf_list = []
		gene_list = []
		tf_labels = []
		for TF in data_node_list:
			if TF != a_gene:
				#print TF, "<---"
				tf_list.append(experiment[TF])
				tf_labels.append(TF)
			else:
				#print TF, "<---gene"
				gene_list.append(experiment[TF])

		print tf_list
		print gene_list


		if (check_missing_experiments(tf_list) == True) and (check_missing_experiments(gene_list) == True):
			float_tf_list = [float(i) for i in tf_list]
			float_gene_list = [float(i) for i in gene_list]
			DS.appendLinked( float_tf_list, float_gene_list )

	print "......"

	print DS

	# Training
	trainer = BackpropTrainer(regulatory_network, momentum=0.1, verbose=True, weightdecay=0.01)

	trainer.setData(DS)

	result_list = []

	boot_count = 0
	while boot_count < boot_val:
		#trainer.trainEpochs(1000)
		trainer.trainUntilConvergence(validationProportion=0.25)
		print regulatory_network
		this = get_nn_details(regulatory_network)
		result_list.append(this)
		regulatory_network.reset()
		boot_count += 1

	print tf_labels
	print regulatory_network.params
	print in_to_out.params
	print inLayer
	pesos_conexiones(regulatory_network)

	NetworkWriter.writeToFile(regulatory_network, 'trained_net.xml')
	return result_list
Ejemplo n.º 21
0
        if rewardTransition < 0:
            costVector[targetDirDiscrete] = rewardTransition
            rw = rewardTransition
        else:
            costVector[targetDirDiscrete] = ALPHA_FACTOR * costVector[
                targetDirDiscrete] + (1.0 - ALPHA_FACTOR) * rewardTransition
            rw = (1.0 - ALPHA_FACTOR) * rewardTransition

        # Learn the updated Q-value.
        if isLearning:
            ds.clear()
            ds.addSample(
                (stepStartingPos[0] / XSIZE, stepStartingPos[1] / YSIZE,
                 math.sin(oldDir * 0.25 * math.pi),
                 math.cos(oldDir * 0.25 * math.pi)), rw)
            trainer.setData(ds)
            trainer.trainEpochs(1)

    # ====================================
    # Final paint step
    # ====================================
    if pygame.display.get_active():
        clock.tick(2)
        screen.blit(screenBuffer, (0, 0))
        pygame.display.flip()
    print "Trainer Alpha Value: ", trainer.descent.alpha

    # Let's look at the events. Key presses from 0 to 8 are possible, as well as space for switching between Q values and best direction painting. ESCape ends the program.
    for event in pygame.event.get():

        if event.type == pygame.locals.QUIT or (
Ejemplo n.º 22
0
#set the normalization limits from 0 to 1
net = buildNetwork(trndata.indim,
                   500,
                   tstdata.outdim,
                   hiddenclass=TanhLayer,
                   outclass=SoftmaxLayer,
                   bias=True)
trainer = BackpropTrainer(net,
                          trndata,
                          learningrate=0.01,
                          lrdecay=1,
                          momentum=0.00,
                          verbose=False,
                          batchlearning=False,
                          weightdecay=0.0)
trainer.setData(trndata)
trainer.trainUntilConvergence(verbose=True, trainingData=data, maxEpochs=1)

net.offset = 0

m = myo.Myo()
e = []


def proc_emg(emg, moving, times=[]):

    global e, emg_correctmean, emg_filtered, emg_rectified, low_pass, sfreq, emg_envelope
    e = emg
    #emg_correctmean = e - np.mean(e)
    emg_correctmean = scipy.signal.detrend(e)
    high = 20 / (1000 / 2)