def __init__(self,A,B,x_var,y_var,bearing_var,N,initial_state): self.bearing_var = bearing_var self.A = A self.B = B self.x_var = x_var self.y_var = y_var self.num_particles = N self.innovation = None scen = scenario(1,1) loc_min = np.array([scen.x_min, scen.y_min]).reshape(2,1) loc_max = np.array([scen.x_max - scen.x_min, scen.y_max - scen.y_min]).reshape(2, 1) a = np.kron(loc_min, np.ones([1, self.num_particles])) b = np.kron(loc_max, np.ones([1, self.num_particles])) vel_min = np.array([scen.vel_min, scen.vel_min]).reshape(2, 1) vel_max = np.array([scen.vel_max - scen.vel_min, scen.vel_max - scen.vel_min]).reshape(2, 1) aa = np.kron(vel_min, np.ones([1, self.num_particles])) bb = np.kron(vel_max, np.ones([1, self.num_particles])) #initial_loc_particles = b*np.random.rand(2,self.num_particles)+a temp_cov = np.eye(2) temp_cov[0, 0] = 0 temp_cov[1, 1] = 0 initial_loc_particles = np.kron(np.array(initial_state[0:2]).reshape(2,1),np.ones([1, self.num_particles])) + 0*np.random.multivariate_normal(np.zeros([2]),temp_cov,self.num_particles).transpose() initial_vel_particles = bb*np.random.rand(2,self.num_particles)+aa initial_state_particles = np.concatenate((initial_loc_particles,initial_vel_particles)) """ temp_cov = np.eye(4) temp_cov[0,0] = 20 temp_cov[1,1] = 20 temp_cov[2,2] = 1 temp_cov[3,3] = 1 initial_state = initial_state.reshape(4,1) a = np.kron(initial_state,np.ones([1, self.num_particles])) initial_state_particles = a + np.random.multivariate_normal(np.zeros([4]),temp_cov,self.num_particles).transpose() """ self.particles_k_km1 = initial_state_particles self.particles_k_k = initial_state_particles self.bearing_k_km1 = np.zeros([self.num_particles,1]) self.weight_k_km1 = (1.0/self.num_particles)*np.ones([self.num_particles,1]) self.weight_k_k = (1.0 / self.num_particles) * np.ones([self.num_particles, 1]) Q = np.eye(2) Q[0, 0] = .01 Q[1, 1] = .01 self.predicted_noise_covariance = (self.B.dot(Q)).dot(self.B.transpose()) Z = np.eye(4) Z[2,2] = .01 Z[3,3] = .01 Z[0,0] = 5 Z[1,1] = 5
def main(scen_file): import scenario struct = network_structure(scenario.scenario(scen_file)) string = '' for item in struct: item = map(lambda x: str(x) if x is not None else '', list(item)) string += '(' + ','.join(item) + ')' + ',' print string[:-1]
def output_from_settings(settings): scen = scenario(settings.scenario) ref_links = load_reference_links(scen) ml, ors, srcs = zip(*network_structure(scen)) ml_idxs = link_indices(ml, ref_links) or_idxs = link_indices(ors, ref_links) src_idxs = link_indices(srcs, ref_links) ml_lengths = link_lengths(ml, ref_links) ml_lanes = link_lanes(ml, ref_links) return scen, Output(ml, ors, ml_idxs, ml_lengths, ml_lanes, or_idxs, src_idxs)
def on_exec_scenario(self, *args): """Récupération des paramètres d'analyse""" Scen = scenario() Scen.beta=self.app.beta.get_value() Scen.sigmaH=self.app.sigmah.get_value() #Scen.v_h_facteur= self.app.vh.get_value() #Scen.v_h_facteur=(Scen.v_h_facteur) Scen.dt=self.app.dt.get_value_as_int() Scen.Thrf=self.app.thrf.get_value_as_int() Scen.TR=self.app.tr.get_value_as_int() Scen.K=self.app.k.get_value_as_int() Scen.M=self.app.m.get_value_as_int() Scen.scale=self.app.scale.get_value_as_int() return Scen
t.hiddenDuplexCollision = hiddenDuplexCollision[k] t.exposedSpatialReuse = exposedSpatialReuse[k] nodes.append(t) print nodes "-------- print ENTER to confirm the input--------------" #confirmKey = raw_input("If setting is ready, press ENTER to continue, any other key to abort ... ") #assert confirmKey == '', "setting wrong, programs abort :(" #nodes[2].goodChans = np.array( [0,0,0,1] ) #nodes[1].goodChans = np.array( [0,1,1,0] ) #nodes[2].goodChans = np.array( [0,0,1,1] ) simulationScenario = scenario(numSteps, 'fixed', 3) # Vector and Matrix Initializations actions = np.zeros((numNodes, numChans)) collisions = np.zeros(numNodes) collisionTally = np.zeros((numNodes, numNodes)) #TODO collisionHist = np.zeros((numSteps, numNodes)) cumulativeCollisions = np.zeros((numSteps, numNodes)) cumulativeAbsents = np.zeros((numSteps, numNodes)) mdpLearnTime = np.zeros(numSteps) dqnLearnTime = np.zeros(numSteps) ''' =================================================================== ''' ''' MAIN LOOP BEGIN ''' ''' =================================================================== '''
def run(args): # initialize parameters of interest # Method: # 0: linear policy # 1: RBF policy # 2: MLP policy method = args[0] RBF_components = args[1] MLP_neurons = args[2] process_index = args[3] folder_name = args[4] np.random.seed(1 + 100) vel_var = args[5] np.random.seed(process_index) print("Starting Thread:" + str(process_index)) #Initialize all the parameters params ={0:{},1:{},2:{}} if method==0: params[0]["weight"] = np.random.normal(0, .3, [2, num_states]) #params[0]["weight"] = np.array([[ 1.45702249, -1.17664153, -0.11593174, 1.02967173, -0.25321044, #0.09052774], #[ 0.67730786, 0.3213561 , 0.99580938, -2.39007038, -1.16340594, #-1.77515938]]) elif method==1: featurizer = sklearn.pipeline.FeatureUnion([("rbf1", RBFSampler(gamma=rbf_var, n_components=RBF_components, random_state=1))]) featurizer.fit(np.array(list_of_states)) # Use this featurizer for normalization params[1]["weight"] = np.random.normal(0, 1, [2, RBF_components]) elif method==2: params[2]["weigh1"] = np.random.normal(0, 1, [MLP_neurons, num_states]) params[2]["bias1"] = np.random.normal(0,1,[MLP_neurons,1]) params[2]["weigh2"] = np.random.normal(0, 1, [2, MLP_neurons]) params[2]["bias2"] = np.random.normal(0, 1, [2, 1]) return_saver = [] error_saver = [] episode_counter = 0 weight_saver1 = [] weight_saver2 = [] #for episode_counter in range(0,N_max): #Training parameters avg_reward = [] avg_error = [] var_reward = [] training = True result_folder = base_path+folder_name+"/" reward_file = open(result_folder+"reward_noise:"+str(vel_var)+"_"+str(process_index)+ "_linear_6states.txt","a") error_file = open(result_folder + "error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a") error_file_median = open(result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") var_file = open(result_folder + "var_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a") var_error_file = open(result_folder + "var_error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a") weight_file = open(result_folder + "weight_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a") #flatten initial weight and store the values if method==0: weight = params[0]['weight'] flatted_weights = list(weight[0, :]) + list(weight[1, :]) temp = [] [temp.append(str(x)) for x in flatted_weights] weight_file.write("\t".join(temp)+"\n") elif method==1: weight = params[1]['weight'] flatted_weights = list(weight[0, :]) + list(weight[1, :]) temp = [] [temp.append(str(x)) for x in flatted_weights] weight_file.write("\t".join(temp) + "\n") elif method==2: pass #weight = np.reshape(np.array(weights[0]), [2, 6]) sigma = sigma_max while episode_counter<N_max: #sigma = gen_learning_rate(episode_counter,sigma_max,.1,20000) if episode_counter%1500==0 and episode_counter>0: sigma-= .15 sigma = max(.1,sigma) #sigma = sigma_max discounted_return = np.array([]) discount_vector = np.array([]) #print(episodes_counter) scen = scenario(1,1) bearing_var = 1E-2#variance of bearing measurement #Target information x = 10000*random.random()-5000#initial x-location y = 10000 * random.random() - 5000#initial y-location xdot = 10*random.random()-5#initial xdot-value ydot = 10 * random.random() - 5#initial ydot-value init_target_state = [x,y,xdot,ydot]#initialize target state init_for_smc = [x+np.random.normal(0,5),y+np.random.normal(0,5),np.random.normal(0,5),np.random.normal(0,5)]#init state for the tracker (tracker doesn't know about the initial state) #init_for_smc = [x, y, xdot, ydot] init_sensor_state = [10000*random.random()-5000,10000 * random.random() - 5000,3,-2]#initial sensor-state temp_loc = np.array(init_target_state[0:2]).reshape(2,1) init_location_estimate = temp_loc+0*np.random.normal(np.zeros([2,1]),10) init_location_estimate = [init_location_estimate[0][0],init_location_estimate[1][0]] init_velocity_estimate = [6*random.random()-3,6*random.random()-3] init_velocity_estimate = [init_target_state[2],init_target_state[3]] init_estimate = init_location_estimate+init_velocity_estimate init_covariance = np.diag([MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY])#initial covariance of state estimation t = target(init_target_state[0:2], init_target_state[2], init_target_state[3], vel_var, vel_var, "CONS_V")#constant-velocity model for target motion A, B = t.constant_velocity(1E-10)#Get motion model x_var = t.x_var y_var = t.y_var tracker_object = EKF_tracker(init_for_smc, init_covariance, A,B,x_var,y_var,bearing_var)#create tracker object #smc_object = smc_tracker(A,B,x_var,y_var,bearing_var,1000,np.array(init_for_smc)) #Initialize sensor object if method==0: s = sensor("POLICY_COMM_LINEAR")#create sensor object (stochastic policy) elif method==1: s = sensor("POLICY_COMM_RBF") elif method==2: s = sensor("POLICY_COMM_MLP") measure = measurement(bearing_var)#create measurement object m = [] x_est = []; y_est = []; x_vel_est = []; y_vel_est = [] x_truth = []; y_truth = []; x_vel_truth = []; y_vel_truth = [] uncertainty = [] vel_error = [] pos_error = [] iteration = [] innovation = [] reward = [] episode_condition = True n=0 violation = 0 #store required information episode_state = [] episode_MLP_state = [] episode_actions = [] while episode_condition: t.update_location() m.append(measure.generate_bearing(t.current_location,s.current_location)) tracker_object.update_states(s.current_location, m[-1]) normalized_innovation = (tracker_object.innovation_list[-1])/tracker_object.innovation_var[-1] #print(normalized_innovation) #if (normalized_innovation<1E-4 or n<10) and n<200: #end of episode current_state = list(tracker_object.x_k_k.reshape(len(tracker_object.x_k_k))) + list(s.current_location) #print(current_state) #state normalization x_slope = 2.0/(scen.x_max-scen.x_min) y_slope = 2.0 / (scen.y_max - scen.y_min) x_slope_sensor = 2.0 / (40000) y_slope_sensor = 2.0 / (40000) vel_slope = 2.0/(scen.vel_max-scen.vel_min) #normalization current_state[0] = -1+x_slope*(current_state[0]-scen.x_min) current_state[1] = -1 + y_slope * (current_state[1] - scen.y_min) current_state[2] = -1 + vel_slope * (current_state[2] - scen.vel_min) current_state[3] = -1 + vel_slope * (current_state[3] - scen.vel_min) current_state[4] = -1 + x_slope * (current_state[4] -scen.x_min) current_state[5] = -1 + y_slope * (current_state[5] - scen.y_min) #Refactor states based on the usage if method==0 or method==2: input_state = current_state elif method==1: #Generate states for the RBF input input_state = featurizer.transform(np.array(current_state).reshape(1,len(current_state))) input_state = list(input_state[0]) extra_information = s.update_location_new(params,input_state,sigma) estimate = tracker_object.x_k_k episode_state.append(input_state) if method==2: episode_MLP_state.append(extra_information) #Output of the first layer for Gradient calculation truth = t.current_location x_est.append(estimate[0]) y_est.append(estimate[1]) x_vel_est.append(estimate[2]) y_vel_est.append(estimate[3]) x_truth.append(truth[0]) y_truth.append(truth[1]) x_vel_truth.append(t.current_velocity[0]) y_vel_truth.append(t.current_velocity[1]) vel_error.append(np.linalg.norm(estimate[2:4]-np.array([t.current_velocity[0],t.current_velocity[1]]).reshape(2,1))) pos_error.append(np.linalg.norm(estimate[0:2]-np.array(truth).reshape(2,1))) innovation.append(normalized_innovation[0]) unormalized_uncertainty = np.sum(tracker_object.p_k_k.diagonal()) #if unormalized_uncertainty>MAX_UNCERTAINTY: # normalized_uncertainty = 1 #else: # normalized_uncertainty = (1.0/MAX_UNCERTAINTY)*unormalized_uncertainty uncertainty.append((1.0 / MAX_UNCERTAINTY) * unormalized_uncertainty) if len(uncertainty)<window_size+window_lag: reward.append(0) else: current_avg = np.mean(uncertainty[-window_size:]) prev_avg = np.mean(uncertainty[-(window_size+window_lag):-window_lag]) if current_avg<prev_avg or uncertainty[-1]<.1: #if current_avg < prev_avg: reward.append(1) else: reward.append(0) #reward.append(-1*uncertainty[-1]) #update return discount_vector = gamma*np.array(discount_vector) discounted_return+= (1.0*reward[-1])*discount_vector new_return = 1.0*reward[-1] list_discounted_return = list(discounted_return) list_discounted_return.append(new_return) discounted_return = np.array(list_discounted_return) list_discount_vector = list(discount_vector) list_discount_vector.append(1) discount_vector = np.array(list_discount_vector) iteration.append(n) if n>episode_length: break n+=1 #Based on the return from the episode, update parameters of the policy model #Normalize returns by the length of episode #if episode_counter%10==0 and episode_counter>0: print(weight_saver[-1]) prev_params = dict(params) condition = True if np.mean(pos_error)>10000: continue episode_condition = False episode_counter-=1 #if episode_counter%100==0 and training: #print("Starting the evaluation phase...") #training = False #episode_condition = False condition = True if episode_condition and training: normalized_discounted_return = discounted_return episode_actions = s.sensor_actions #init_weight = np.array(weight) rate = gen_learning_rate(episode_counter,learning_rate,1E-8,10000) total_adjustment = np.zeros(np.shape(weight)) for e in range(0,len(episode_actions)): #calculate gradiant state = np.array(episode_state[e]).reshape(len(episode_state[e]),1) #calculate gradient if method==0: gradiant = ((episode_actions[e].reshape(2,1)-params[0]['weight'].dot(state)).dot(state.transpose()))/sigma**2#This is the gradiant elif method==1: gradiant = ((episode_actions[e].reshape(2, 1) - params[1]['weight'].dot(state)).dot( state.transpose())) / sigma ** 2 # This is the gradiant elif method==2: #Gradient for MLP pass if np.max(np.abs(gradiant))>1E2: continue #clip large gradients if method==0: adjustment_term = gradiant*normalized_discounted_return[e]#an unbiased sample of return params[0]['weight'] += rate * adjustment_term elif method==1: adjustment_term = gradiant * normalized_discounted_return[e] # an unbiased sample of return params[1]['weight'] += rate * adjustment_term elif method==2: #Gradient for MLP pass #if not condition: # weight = prev_weight # continue episode_counter+=1 #flatted_weights = list(weight[0, :]) + list(weight[1, :]) #temp = [] #[temp.append(str(x)) for x in flatted_weights] #weight_file.write("\t".join(temp)+"\n") #weight_saver1.append(weight[0][0]) #weight_saver2.append(weight[0][1]) else: #print("garbage trajectory: no-update") pass #if not training: return_saver.append(sum(reward)) error_saver.append(np.mean(pos_error)) #print(len(return_saver),n) if episode_counter%100 == 0 and episode_counter>0: # if episode_counter%100==0 and episode_counter>0: print(episode_counter, np.mean(return_saver), sigma) #print(params[method]['weight']) #weight = np.reshape(np.array(weights[episode_counter]), [2, 6]) #print(weight) reward_file.write(str(np.mean(sorted(return_saver)[0:int(.95*len(return_saver))]))+"\n") error_file.write(str(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n") error_file_median.write(str(np.median(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n") var_error_file.write(str(np.var(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n") var_file.write(str(np.var(sorted(return_saver)[0:int(.95*len(return_saver))]))+"\n") #weight_file.write(str(np.mean(return_saver)) + "\n") avg_reward.append(np.mean(sorted(return_saver)[0:int(.95*len(return_saver))])) avg_error.append(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))])) var_reward.append(np.var(return_saver)) reward_file.close() var_file.close() error_file.close() error_file_median.close() var_error_file.close() weight_file.close() reward_file = open( result_folder + "reward_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") error_file = open( result_folder + "error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") var_file = open( result_folder + "var_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") var_error_file = open( result_folder + "var_error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") weight_file = open( result_folder + "weight_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") error_file_median = open( result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") return_saver = [] error_saver = [] num_episodes.append(n)
xt1 = [] yt1 = [] xt2 = [] yt2 = [] xt3 = [] yt3 = [] xt4 = [] yt4 = [] while episode_counter < N_max: sensor_locations = {} for weight_index in range(0, 1): discounted_return = np.array([]) discount_vector = np.array([]) #print(episodes_counter) scen = scenario(1, 1) bearing_var = 1E-2 #variance of bearing measurement #Target information x = 10000 * np.random.random( [num_targets]) - 5000 #initial x-location y = 10000 * np.random.random( [num_targets]) - 5000 #initial y-location xdot = 10 * np.random.random([num_targets ]) - 5 #initial xdot-value ydot = 10 * np.random.random([num_targets ]) - 5 #initial ydot-value #TEMP x = np.array([-2000, 2000, 4000, 2000]) y = np.array([-4000, -4000, -1000, -2000]) xdot = [2, -2, -4, -2]
def run(args): #if __name__=="__main__": # initialize parameters of interest # Method: # 0: linear policy # 1: RBF policy # 2: MLP policy #method = args[0] #RBF_components = args[1] #MLP_neurons = args[2] process_index = args[3] folder_name = args[4] np.random.seed(process_index+100) #process_index = 0 #np.random.seed(process_index + 100) #vel_var = args[5] #num_targets = args[6] method = 0 RBF_components = 20 MLP_neurons = 50 vel_var = .001 num_targets = min(6,max(2,np.random.poisson(3))) num_targets = np.random.randint(2,10) #num_targets = 4 print("Starting Thread:" + str(process_index)) #Initialize all the parameters params ={0:{},1:{},2:{}} if method==0: params[0]["weight2"] = np.random.normal(0, .3, [2, num_states_layer2]) #params[0]["weight2"] = np.array([[ 3.97573312, 0.4639474 , 2.27280486, 12.9085868 , # 3.45722461, 6.36735166], #[-11.87940874, 2.59549414, -5.68556954, 2.87746786, # 7.08059984, 5.5631133 ]]) params[0]["weight"] = np.array([[7.18777985, -13.68815256, 1.69010242, -5.62483187, -4.30451483, 10.09592853], [13.33104057, 13.60537864, 3.46939294, 0.8446329, -14.79733566, -4.78599648]]) #params[0]["weight"] = np.array([[ 1.45702249, -1.17664153, -0.11593174, 1.02967173, -0.25321044, #0.09052774], #[ 0.67730786, 0.3213561 , 0.99580938, -2.39007038, -1.16340594, #-1.77515938]]) elif method==1: featurizer = sklearn.pipeline.FeatureUnion([("rbf1", RBFSampler(gamma=rbf_var, n_components=RBF_components, random_state=1))]) featurizer.fit(np.array(list_of_states)) # Use this featurizer for normalization params[1]["weight"] = np.random.normal(0, 1, [2, RBF_components]) elif method==2: params[2]["weigh1"] = np.random.normal(0, 1, [MLP_neurons, num_states]) params[2]["bias1"] = np.random.normal(0,1,[MLP_neurons,1]) params[2]["weigh2"] = np.random.normal(0, 1, [2, MLP_neurons]) params[2]["bias2"] = np.random.normal(0, 1, [2, 1]) return_saver = [] error_saver = [] episode_counter = 0 weight_saver1 = [] weight_saver2 = [] weight_saver2_1 = [] weight_saver2_2 = [] #for episode_counter in range(0,N_max): #Training parameters avg_reward = [] avg_error = [] var_reward = [] training = True result_folder = base_path+folder_name+"/" reward_file = open(result_folder+"reward_noise:"+str(vel_var)+"_"+str(process_index)+ "_linear_6states.txt","a") error_file = open(result_folder + "error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a") error_file_median = open(result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") var_file = open(result_folder + "var_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a") var_error_file = open(result_folder + "var_error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a") weight_file = open(result_folder + "weight_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a") #flatten initial weight and store the values if method==0: weight = params[0]['weight'] flatted_weights = list(weight[0, :]) + list(weight[1, :]) temp = [] [temp.append(str(x)) for x in flatted_weights] weight_file.write("\t".join(temp)+"\n") elif method==1: weight = params[1]['weight'] flatted_weights = list(weight[0, :]) + list(weight[1, :]) temp = [] [temp.append(str(x)) for x in flatted_weights] weight_file.write("\t".join(temp) + "\n") elif method==2: pass #weight = np.reshape(np.array(weights[0]), [2, 6]) init_max_target = 3 num_targets = init_max_target while episode_counter<N_max: if episode_counter%1000==0 and episode_counter>0: init_max_target +=1 init_max_target = min(20,init_max_target) if episode_counter%100==0 and episode_counter>0: num_targets = np.random.randint(3,init_max_target+1) sigma = gen_learning_rate(episode_counter,sigma_max,.1,5000) sigma = sigma_max discounted_return = np.array([]) discount_vector = np.array([]) #print(episodes_counter) scen = scenario(1,1) bearing_var = 1E-2#variance of bearing measurement #Target information x = 10000*np.random.random([num_targets])-5000#initial x-location y = 10000 * np.random.random([num_targets]) - 5000#initial y-location xdot = 10*np.random.random([num_targets])-5#initial xdot-value ydot = 10 * np.random.random([num_targets]) - 5#initial ydot-value #TEMP #x = [2000,-2000] #y = [2000,2000] #xdot = [1,1] #ydot = [-1,-1] init_target_state = [] init_for_smc = [] for target_counter in range(0,num_targets): init_target_state.append([x[target_counter],y[target_counter],xdot[target_counter],ydot[target_counter]])#initialize target state init_for_smc.append([x[target_counter]+np.random.normal(0,5),y[target_counter] +np.random.normal(0,5),np.random.normal(0,5),np.random.normal(0,5)])#init state for the tracker (tracker doesn't know about the initial state) #temp_loc = np.array(init_target_state[0:2]).reshape(2,1) #init_location_estimate = temp_loc+0*np.random.normal(np.zeros([2,1]),10) #init_location_estimate = [init_location_estimate[0][0],init_location_estimate[1][0]] #init_velocity_estimate = [6*random.random()-3,6*random.random()-3] #init_velocity_estimate = [init_target_state[2],init_target_state[3]] #init_estimate = init_location_estimate+init_velocity_estimate init_covariance = np.diag([MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY])#initial covariance of state estimation t = [] for i in range(0,num_targets): t.append(target(init_target_state[i][0:2], init_target_state[i][2], init_target_state[i][3], vel_var, vel_var, "CONS_V"))#constant-velocity model for target motion A, B = t[0].constant_velocity(1E-10)#Get motion model x_var = t[0].x_var y_var = t[0].y_var tracker_object = [] for i in range(0,num_targets): tracker_object.append(EKF_tracker(init_for_smc[i], np.array(init_covariance), A,B,x_var,y_var,bearing_var))#create tracker object #smc_object = smc_tracker(A,B,x_var,y_var,bearing_var,1000,np.array(init_for_smc)) #Initialize sensor object if method==0: s = sensor("POLICY_COMM_LINEAR")#create sensor object (stochastic policy) elif method==1: s = sensor("POLICY_COMM_RBF") elif method==2: s = sensor("POLICY_COMM_MLP") measure = measurement(bearing_var)#create measurement object m = [] x_est = []; y_est = []; x_vel_est = []; y_vel_est = [] x_truth = []; y_truth = []; x_vel_truth = []; y_vel_truth = [] uncertainty = [] vel_error = [] pos_error = [] iteration = [] innovation = [] for i in range(0,num_targets): x_truth.append([]) y_truth.append([]) x_vel_truth.append([]) y_vel_truth.append([]) uncertainty.append([]) vel_error.append([]) x_est.append([]) y_est.append([]) x_vel_est.append([]) y_vel_est.append([]) pos_error.append([]) innovation.append([]) reward = [] episode_condition = True n=0 violation = 0 #store required information episode_state = [] episode_state_out_layer = [] episode_MLP_state = [] episode_actions = [] avg_uncertainty= [] max_uncertainty = [] while episode_condition: temp_m = [] input_state_temp = [] for i in range(0,num_targets): t[i].update_location() temp_m.append(measure.generate_bearing(t[i].current_location,s.current_location)) m.append(temp_m) temp_reward = [] target_actions = [] for i in range(0,num_targets): tracker_object[i].update_states(s.current_location, m[-1][i]) normalized_innovation = (tracker_object[i].innovation_list[-1])/tracker_object[i].innovation_var[-1] #print(normalized_innovation) #if (normalized_innovation<1E-4 or n<10) and n<200: #end of episode current_state = list(tracker_object[i].x_k_k.reshape(len(tracker_object[i].x_k_k))) + list(s.current_location) #print(current_state) #state normalization x_slope = 2.0/(scen.x_max-scen.x_min) y_slope = 2.0 / (scen.y_max - scen.y_min) x_slope_sensor = 2.0 / (40000) y_slope_sensor = 2.0 / (40000) vel_slope = 2.0/(scen.vel_max-scen.vel_min) #normalization current_state[0] = -1+x_slope*(current_state[0]-scen.x_min) current_state[1] = -1 + y_slope * (current_state[1] - scen.y_min) current_state[2] = -1 + vel_slope * (current_state[2] - scen.vel_min) current_state[3] = -1 + vel_slope * (current_state[3] - scen.vel_min) current_state[4] = -1 + x_slope * (current_state[4] -scen.x_min) current_state[5] = -1 + y_slope * (current_state[5] - scen.y_min) #Refactor states based on the usage if method==0 or method==2: input_state = current_state input_state_temp.append(input_state) #store input-sates elif method==1: #Generate states for the RBF input input_state = featurizer.transform(np.array(current_state).reshape(1,len(current_state))) input_state = list(input_state[0]) target_actions.append(s.generate_action(params,input_state,.01)) estimate = tracker_object[i].x_k_k episode_state.append(input_state) ####Neeed to get modified if method==2: episode_MLP_state.append(extra_information) #need to get modified truth = t[i].current_location x_est[i].append(estimate[0]) y_est[i].append(estimate[1]) x_vel_est[i].append(estimate[2]) y_vel_est[i].append(estimate[3]) x_truth[i].append(truth[0]) y_truth[i].append(truth[1]) x_vel_truth[i].append(t[i].current_velocity[0]) y_vel_truth[i].append(t[i].current_velocity[1]) vel_error[i].append(np.linalg.norm(estimate[2:4]-np.array([t[i].current_velocity[0],t[i].current_velocity[1]]).reshape(2,1))) pos_error[i].append(np.linalg.norm(estimate[0:2]-np.array(truth).reshape(2,1))) innovation[i].append(normalized_innovation[0]) unormalized_uncertainty = np.sum(tracker_object[i].p_k_k.diagonal()) #if unormalized_uncertainty>MAX_UNCERTAINTY: # normalized_uncertainty = 1 #else: # normalized_uncertainty = (1.0/MAX_UNCERTAINTY)*unormalized_uncertainty uncertainty[i].append((1.0 / MAX_UNCERTAINTY) * unormalized_uncertainty) #if len(uncertainty[i])<window_size+window_lag: # temp_reward.append(0) #else: # current_avg = np.mean(uncertainty[i][-window_size:]) # prev_avg = np.mean(uncertainty[i][-(window_size+window_lag):-window_lag]) # if current_avg<prev_avg or uncertainty[i][-1]<.1: #if current_avg < prev_avg: # temp_reward.append(1) #else: # temp_reward.append(0) this_uncertainty = [] [this_uncertainty.append(uncertainty[x][-1]) for x in range(0, num_targets)] avg_uncertainty.append(np.mean(this_uncertainty)) max_uncertainty.append(np.max(this_uncertainty)) if len(avg_uncertainty) < window_size + window_lag: reward.append(0) else: current_avg = np.mean(avg_uncertainty[-window_size:]) prev_avg = np.mean(avg_uncertainty[-(window_size + window_lag):-window_lag]) if current_avg < prev_avg or avg_uncertainty[-1] < .1: # if current_avg < prev_avg: reward.append(1) else: reward.append(0) #voting #if np.mean(temp_reward)>.5: # reward.append(np.mean(temp_reward)) #else: # reward.append(np.mean(temp_reward)) #if sum(reward)>1100 and num_targets>2: sys.exit(1) #Do something on target_actions #Create feature-vector from generated target actions normalized_state,index_matrix1,index_matrix2,slope = s.update_location_decentralized(target_actions,sigma,params) #Update the sensor location based on all individual actions #index_matrix: an n_s \times T matrix that shows the derivative of state in the output layer to the action space in the internal-layer backpropagated_to_internal_1 = index_matrix1.dot(np.array(input_state_temp))#8 by 6 backpropagated_to_internal_2 = index_matrix2.dot(np.array(input_state_temp))# 8 by 6 episode_state_out_layer.append(normalized_state) episode_state.append([backpropagated_to_internal_1,backpropagated_to_internal_2]) #each entry would be a T \times 6 matrix with T being the number of targets #reward.append(-1*uncertainty[-1]) #update return discount_vector = gamma*np.array(discount_vector) discounted_return+= (1.0*reward[-1])*discount_vector new_return = 1.0*reward[-1] list_discounted_return = list(discounted_return) list_discounted_return.append(new_return) discounted_return = np.array(list_discounted_return) list_discount_vector = list(discount_vector) list_discount_vector.append(1) discount_vector = np.array(list_discount_vector) iteration.append(n) if n>episode_length: break n+=1 #Based on the return from the episode, update parameters of the policy model #Normalize returns by the length of episode #if episode_counter%10==0 and episode_counter>0: print(weight_saver[-1]) prev_params = dict(params) condition = True for i in range(0,num_targets): if np.mean(pos_error[i])>10000: condition = False break episode_condition = False episode_counter-=1 if not condition: #print("OOPSSSS...") continue #if episode_counter%100==0 and training: #print("Starting the evaluation phase...") #training = False #episode_condition = False condition = True if episode_condition and training: normalized_discounted_return = discounted_return episode_actions = s.sensor_actions #init_weight = np.array(weight) rate = gen_learning_rate(episode_counter,learning_rate,1E-12,20000) internal_rate = gen_learning_rate(episode_counter, 3*1E-5, 1E-15, 20000) total_adjustment = np.zeros(np.shape(weight)) for e in range(0,len(episode_actions)): #calculate gradiant #state = np.array(episode_state[e]).reshape(len(episode_state[e]),1) out_state = np.array(episode_state_out_layer[e]).reshape(len(episode_state_out_layer[e]),1) backpropagated_terms = episode_state[e] #calculate gradient if method==0: deriv_with_out_state = (episode_actions[e].reshape(2, 1) - params[0]['weight2'].dot(out_state)).transpose().dot(params[0]['weight2']) #1 by n_s==> derivative of F with respect to the output state-vector internal_gradiant1 = deriv_with_out_state.dot(backpropagated_terms[0]) #1 by 6 internal_gradiant2 = deriv_with_out_state.dot(backpropagated_terms[1]) #1 by 6 internal_gradiant = np.concatenate([internal_gradiant1,internal_gradiant2]) #gradiant = ((episode_actions[e].reshape(2,1)-params[0]['weight'].dot(state)).dot(state.transpose()))/sigma**2#This is the gradiant gradiant_out_layer = ((episode_actions[e].reshape(2, 1) - params[0]['weight2'].dot(out_state)).dot( out_state.transpose())) / sigma ** 2 # This is the gradiant elif method==1: gradiant = ((episode_actions[e].reshape(2, 1) - params[1]['weight'].dot(state)).dot( state.transpose())) / sigma ** 2 # This is the gradiant elif method==2: #Gradient for MLP pass if np.max(np.abs(gradiant_out_layer))>1E2 or np.max(np.abs(internal_gradiant))>1E2: #print("OOPPSSSS...") continue #clip large gradients if method==0: adjustment_term_out_layer = gradiant_out_layer*normalized_discounted_return[e]#an unbiased sample of return adjustment_term_internal_layer = internal_gradiant*normalized_discounted_return[e] params[0]['weight2'] += rate * adjustment_term_out_layer params[0]['weight'] += internal_rate* adjustment_term_internal_layer elif method==1: adjustment_term = gradiant * normalized_discounted_return[e] # an unbiased sample of return params[1]['weight'] += rate * adjustment_term elif method==2: #Gradient for MLP pass #if not condition: # weight = prev_weight # continue episode_counter+=1 flatted_weights1 = list(params[0]['weight'][0, :]) + list(params[0]['weight'][1, :]) flatted_weights2 = list(params[0]['weight2'][0, :]) + list(params[0]['weight2'][1, :]) temp1 = [] [temp1.append(str(x)) for x in flatted_weights1] temp2 = [] [temp2.append(str(x)) for x in flatted_weights2] weight_file.write("\t".join(temp1)+"$$$"+"\t".join(temp2)+"\n") #flatted_weights = list(weight[0, :]) + list(weight[1, :]) #temp = [] #[temp.append(str(x)) for x in flatted_weights] #weight_file.write("\t".join(temp)+"\n") weight_saver1.append(params[0]['weight'][0][0]) weight_saver2.append(params[0]['weight'][1][0]) weight_saver2_1.append(params[0]['weight2'][0][0]) weight_saver2_2.append(params[0]['weight2'][1][0]) else: #print("garbage trajectory: no-update") pass #if not training: return_saver.append(sum(reward)) error_saver.append(np.mean(pos_error)) #print(len(return_saver),n) if episode_counter%100 == 0 and episode_counter>0: # if episode_counter%100==0 and episode_counter>0: print(episode_counter, np.mean(return_saver), sigma) #print(params[method]['weight']) #weight = np.reshape(np.array(weights[episode_counter]), [2, 6]) #print(weight) reward_file.write(str(np.mean(sorted(return_saver,reverse=True)[0:int(.95*len(return_saver))]))+"\n") error_file.write(str(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n") error_file_median.write(str(np.median(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n") var_error_file.write(str(np.var(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n") var_file.write(str(np.var(sorted(return_saver,reverse=True)[0:int(.95*len(return_saver))]))+"\n") #weight_file.write(str(np.mean(return_saver)) + "\n") avg_reward.append(np.mean(sorted(return_saver)[0:int(.95*len(return_saver))])) avg_error.append(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))])) var_reward.append(np.var(return_saver)) reward_file.close() var_file.close() error_file.close() error_file_median.close() var_error_file.close() weight_file.close() reward_file = open( result_folder + "reward_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") error_file = open( result_folder + "error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") var_file = open( result_folder + "var_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") var_error_file = open( result_folder + "var_error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") weight_file = open( result_folder + "weight_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") error_file_median = open( result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") return_saver = [] error_saver = [] num_episodes.append(n)
if __name__ == '__main__': robot_name='TIAGo' rospy.init_node('ltl_planner_%s' %robot_name) ######## world = rospy.get_param('world_name') if len(sys.argv) == 2: world = str(sys.argv[1]) print('Argument: %s.' %(str(sys.argv[1]))) else: print 'No argument : world set automatically at %s' %str(world) # to run: python ltl_planner.py '<> (r2 && <>r3)' # to run: python ltl_planner.py '([]<> r2) && ([]<> r3) && ([]<> r1)' # to run: python ltl_planner.py '<> r2 && ([]<> r3) && ([]<> r1)' ############### while 1: if world == 'small_office' or 'tutorial_office' or 'tabletop_cube': scenario.scenario(world) try: robot_task = rospy.get_param('plan') #print 'param = %s' %(str('plan')) if str(robot_task) == 'none': sys.exit() print('Robot task: %s.' %(str(robot_task))) [robot_motion, init_pose, robot_action] = robot_model planner(robot_motion, init_pose, robot_action, robot_task) except rospy.ROSInterruptException: pass
'ssh': { 'username': '******', 'key': '/home/bastien/.ssh/id_rsa_grid5k' } } }) if '__main__' == __name__: pw = getpass.getpass() config['iotlab']['ssh']['password'] = pw config['g5k']['ssh']['password'] = pw iotlab = iotlab_testbed(config.iotlab) g5k = g5k_testbed(config.g5k) sc = scenario(g5k, iotlab) sc.deploy() sc.play() # book nodes (2 nodes m3 and 1 a8 on iotlab) # g5k.book_nodes() # g5k.wait_nodes() # iotlab.deploy('') # deploy g5k / install ipfs # deploy iotlab flash m3! # set ssh tunnel between g5k andn iotlab # put an object # read it
def run(args): #if __name__=="__main__": # initialize parameters of interest # Method: # 0: linear policy # 1: RBF policy # 2: MLP policy #args = [0,20,50,0,"TEST1",.001,10] method = args[0] RBF_components = args[1] MLP_neurons = args[2] process_index = args[3] folder_name = args[4] #process_index = args[4] np.random.seed(process_index + 100) vel_var = args[5] num_targets = args[6] #method = 0 #RBF_components = 20 #MLP_neurons = 50 #vel_var = .001 #num_targets = min(6,max(2,np.random.poisson(3))) #num_targets = 2 print("Starting Thread:" + str(process_index)) #Initialize all the parameters (input && output-layers) params ={0:{},1:{},2:{}} if method==0: params[0]["weight"] = np.random.normal(0, .3, [2, output_size]) #Output-layer (maps flattened states to the actions) #params[0]["weight"] = [] #for f in range(0,filter_size): # params[0]["weight"].append(np.random.normal(0,1,[spatial_weight_size,temporal_weight_size])) #Convolution weith matrix #params[0]["weight"] = np.array([[ 1.45702249, -1.17664153, -0.11593174, 1.02967173, -0.25321044, #0.09052774], #[ 0.67730786, 0.3213561 , 0.99580938, -2.39007038, -1.16340594, #-1.77515938]]) elif method==1: featurizer = sklearn.pipeline.FeatureUnion([("rbf1", RBFSampler(gamma=rbf_var, n_components=RBF_components, random_state=1))]) featurizer.fit(np.array(list_of_states)) # Use this featurizer for normalization params[1]["weight"] = np.random.normal(0, 1, [2, RBF_components]) elif method==2: params[2]["weigh1"] = np.random.normal(0, 1, [MLP_neurons, num_states]) params[2]["bias1"] = np.random.normal(0,1,[MLP_neurons,1]) params[2]["weigh2"] = np.random.normal(0, 1, [2, MLP_neurons]) params[2]["bias2"] = np.random.normal(0, 1, [2, 1]) return_saver = [] error_saver = [] episode_counter = 0 weight_saver1 = [] weight_saver2 = [] #for episode_counter in range(0,N_max): #Training parameters avg_reward = [] avg_error = [] var_reward = [] training = True result_folder = base_path+folder_name+"/" reward_file = open(result_folder+"reward_noise:"+str(vel_var)+"_"+str(process_index)+ "_linear_6states.txt","a") error_file = open(result_folder + "error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a") error_file_median = open(result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") var_file = open(result_folder + "var_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a") var_error_file = open(result_folder + "var_error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a") weight_file = open(result_folder + "weight_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a") #flatten initial weight and store the values if method==0: weight = params[0]['weight'] flatted_weights = list(weight[0, :]) + list(weight[1, :]) temp = [] [temp.append(str(x)) for x in flatted_weights] weight_file.write("\t".join(temp)+"\n") elif method==1: weight = params[1]['weight'] flatted_weights = list(weight[0, :]) + list(weight[1, :]) temp = [] [temp.append(str(x)) for x in flatted_weights] weight_file.write("\t".join(temp) + "\n") elif method==2: pass #weight = np.reshape(np.array(weights[0]), [2, 6]) init_max_target = 3 num_targets = 3 while episode_counter<N_max: if episode_counter%1000==0 and episode_counter>0: init_max_target +=1 init_max_target = min(10,init_max_target) if episode_counter%100==0 and episode_counter>0: num_targets = np.random.randint(3,init_max_target+1) num_targets = 3 sigma = gen_learning_rate(episode_counter,sigma_max,.1,5000) sigma = sigma_max discounted_return = np.array([]) discount_vector = np.array([]) #print(episodes_counter) scen = scenario(1,1) bearing_var = 1E-2#variance of bearing measurement #Target information x = 10000*np.random.random([num_targets])-5000#initial x-location y = 10000 * np.random.random([num_targets]) - 5000#initial y-location xdot = 10*np.random.random([num_targets])-5#initial xdot-value ydot = 10 * np.random.random([num_targets]) - 5#initial ydot-value init_target_state = [] init_for_smc = [] for target_counter in range(0,num_targets): init_target_state.append([x[target_counter],y[target_counter],xdot[target_counter],ydot[target_counter]])#initialize target state init_for_smc.append([x[target_counter]+np.random.normal(0,5),y[target_counter] +np.random.normal(0,5),np.random.normal(0,5),np.random.normal(0,5)])#init state for the tracker (tracker doesn't know about the initial state) #temp_loc = np.array(init_target_state[0:2]).reshape(2,1) #init_location_estimate = temp_loc+0*np.random.normal(np.zeros([2,1]),10) #init_location_estimate = [init_location_estimate[0][0],init_location_estimate[1][0]] #init_velocity_estimate = [6*random.random()-3,6*random.random()-3] #init_velocity_estimate = [init_target_state[2],init_target_state[3]] #init_estimate = init_location_estimate+init_velocity_estimate init_covariance = np.diag([MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY])#initial covariance of state estimation t = [] for i in range(0,num_targets): t.append(target(init_target_state[i][0:2], init_target_state[i][2], init_target_state[i][3], vel_var, vel_var, "CONS_V"))#constant-velocity model for target motion A, B = t[0].constant_velocity(1E-10)#Get motion model x_var = t[0].x_var y_var = t[0].y_var tracker_object = [] for i in range(0,num_targets): tracker_object.append(EKF_tracker(init_for_smc[i], np.array(init_covariance), A,B,x_var,y_var,bearing_var))#create tracker object #smc_object = smc_tracker(A,B,x_var,y_var,bearing_var,1000,np.array(init_for_smc)) #Initialize sensor object if method==0: s = sensor("POLICY_COMM_LINEAR")#create sensor object (stochastic policy) elif method==1: s = sensor("POLICY_COMM_RBF") elif method==2: s = sensor("POLICY_COMM_MLP") measure = measurement(bearing_var)#create measurement object m = [] x_est = []; y_est = []; x_vel_est = []; y_vel_est = [] x_truth = []; y_truth = []; x_vel_truth = []; y_vel_truth = [] uncertainty = [] avg_uncertainty = [] max_uncertainty = [] vel_error = [] pos_error = [] iteration = [] innovation = [] for i in range(0,num_targets): x_truth.append([]) y_truth.append([]) x_vel_truth.append([]) y_vel_truth.append([]) uncertainty.append([]) vel_error.append([]) x_est.append([]) y_est.append([]) x_vel_est.append([]) y_vel_est.append([]) pos_error.append([]) innovation.append([]) reward = [] episode_condition = True n=0 violation = 0 #store required information episode_state = [] episode_state_out_layer = [] episode_grad_with_state_w1 = [] episode_grad_with_state_w2 = [] episode_MLP_state = [] episode_actions = [] while episode_condition: temp_m = [] for i in range(0,num_targets): t[i].update_location() temp_m.append(measure.generate_bearing(t[i].current_location,s.current_location)) m.append(temp_m) temp_reward = [] target_actions = [] #create input-feature matrix input_state = np.zeros([num_states,num_targets]) #create a fixed-size matrix for input states for i in range(0,num_targets): tracker_object[i].update_states(s.current_location, m[-1][i]) normalized_innovation = (tracker_object[i].innovation_list[-1])/tracker_object[i].innovation_var[-1] #print(normalized_innovation) #if (normalized_innovation<1E-4 or n<10) and n<200: #end of episode current_state = list(tracker_object[i].x_k_k.reshape(len(tracker_object[i].x_k_k))) + list(s.current_location) #print(current_state) #state normalization x_slope = 2.0/(scen.x_max-scen.x_min) y_slope = 2.0 / (scen.y_max - scen.y_min) x_slope_sensor = 2.0 / (40000) y_slope_sensor = 2.0 / (40000) vel_slope = 2.0/(scen.vel_max-scen.vel_min) #normalization current_state[0] = -1+x_slope*(current_state[0]-scen.x_min) current_state[1] = -1 + y_slope * (current_state[1] - scen.y_min) current_state[2] = -1 + vel_slope * (current_state[2] - scen.vel_min) current_state[3] = -1 + vel_slope * (current_state[3] - scen.vel_min) current_state[4] = -1 + x_slope * (current_state[4] -scen.x_min) current_state[5] = -1 + y_slope * (current_state[5] - scen.y_min) if method==0 or method==2:input_state[:,i] = current_state #target_actions.append(s.generate_action(params,input_state,.1)) estimate = tracker_object[i].x_k_k episode_state.append(input_state) ####Neeed to get modified if method==2: episode_MLP_state.append(extra_information) #need to get modified truth = t[i].current_location x_est[i].append(estimate[0]) y_est[i].append(estimate[1]) x_vel_est[i].append(estimate[2]) y_vel_est[i].append(estimate[3]) x_truth[i].append(truth[0]) y_truth[i].append(truth[1]) x_vel_truth[i].append(t[i].current_velocity[0]) y_vel_truth[i].append(t[i].current_velocity[1]) vel_error[i].append(np.linalg.norm(estimate[2:4]-np.array([t[i].current_velocity[0],t[i].current_velocity[1]]).reshape(2,1))) pos_error[i].append(np.linalg.norm(estimate[0:2]-np.array(truth).reshape(2,1))) innovation[i].append(normalized_innovation[0]) unormalized_uncertainty = np.sum(tracker_object[i].p_k_k.diagonal()) #if unormalized_uncertainty>MAX_UNCERTAINTY: # normalized_uncertainty = 1 #else: # normalized_uncertainty = (1.0/MAX_UNCERTAINTY)*unormalized_uncertainty uncertainty[i].append((1.0 / MAX_UNCERTAINTY) * unormalized_uncertainty) #Featurize input-state using pooling input_state = list(np.max(input_state,axis=1))+list(np.min(input_state,axis=1))+\ list(np.mean(input_state, axis=1))+list(np.median(input_state,axis=1)) this_uncertainty = [] [this_uncertainty.append(uncertainty[x][-1]) for x in range(0,num_targets)] avg_uncertainty.append(np.mean(this_uncertainty)) max_uncertainty.append(np.max(this_uncertainty)) if len(avg_uncertainty) < window_size + window_lag: reward.append(0) else: current_avg = np.mean(avg_uncertainty[-window_size:]) prev_avg = np.mean(avg_uncertainty[-(window_size + window_lag):-window_lag]) if current_avg < prev_avg or avg_uncertainty[-1] < .1: # if current_avg < prev_avg: reward.append(1) else: reward.append(0) #voting #if np.mean(temp_reward)>.5: # reward.append(np.mean(temp_reward)) #else: # reward.append(np.mean(temp_reward)) #if sum(reward)>1100 and num_targets>2: sys.exit(1) #Do something on target_actions #Create feature-vector from generated target actions s.update_location_new(params,np.array(input_state).reshape([len(input_state),1]),sigma) #Output created by the CNN episode_state_out_layer.append(input_state) #reward.append(-1*uncertainty[-1]) #update return discount_vector = gamma*np.array(discount_vector) discounted_return+= (1.0*reward[-1])*discount_vector new_return = 1.0*reward[-1] list_discounted_return = list(discounted_return) list_discounted_return.append(new_return) discounted_return = np.array(list_discounted_return) list_discount_vector = list(discount_vector) list_discount_vector.append(1) discount_vector = np.array(list_discount_vector) iteration.append(n) if n>episode_length: break n+=1 #Based on the return from the episode, update parameters of the policy model #Normalize returns by the length of episode #if episode_counter%10==0 and episode_counter>0: print(weight_saver[-1]) prev_params = dict(params) condition = True for i in range(0,num_targets): if np.mean(pos_error[i])>10000: condition = False break episode_condition = False episode_counter-=1 if not condition: #print("OOPSSSS...") continue condition = True prev_params = dict(params) if episode_condition and training: normalized_discounted_return = discounted_return episode_actions = s.sensor_actions #init_weight = np.array(weight) rate = gen_learning_rate(episode_counter,learning_rate,1E-8,10000) internal_rate = gen_learning_rate(episode_counter, 5*1E-5, 1E-9, 10000) total_adjustment = np.zeros(np.shape(weight)) for e in range(0,len(episode_actions)): #calculate gradiant #state = np.array(episode_state[e]).reshape(len(episode_state[e]),1) out_state = np.array(episode_state_out_layer[e]).reshape(len(episode_state_out_layer[e]),1) #calculate gradient if method==0: predicted_action = params[0]['weight'].dot(out_state) #gradiant = ((episode_actions[e].reshape(2,1)-params[0]['weight'].dot(state)).dot(state.transpose()))/sigma**2#This is the gradiant gradiant_out_layer = ((episode_actions[e].reshape(2, 1) - predicted_action).dot( out_state.transpose())) / sigma ** 2 # This is the gradiant elif method==1: gradiant = ((episode_actions[e].reshape(2, 1) - params[1]['weight'].dot(state)).dot( state.transpose())) / sigma ** 2 # This is the gradiant elif method==2: #Gradient for MLP pass if np.max(np.abs(gradiant_out_layer))>1E2:# or np.max(np.abs(gradiant_internal[0]))>1E2: #print("OOPPSSSS...") continue #clip large gradients if method==0: adjustment_term_out_layer = gradiant_out_layer*normalized_discounted_return[e]#an unbiased sample of return params[0]['weight'] += rate * adjustment_term_out_layer elif method==1: adjustment_term = gradiant * normalized_discounted_return[e] # an unbiased sample of return params[1]['weight'] += rate * adjustment_term elif method==2: #Gradient for MLP pass #if not condition: # weight = prev_weight # continue episode_counter+=1 flatted_weights = list(params[0]['weight'][0,:]) + list(params[0]['weight'][1,:]) temp = [] [temp.append(str(x)) for x in flatted_weights] weight_file.write("\t".join(temp)+"\n") #weight_saver1.append(params[0]['weight'][0][0][0]) #weight_saver2.append(params[0]['weight'][0][1][0]) else: #print("garbage trajectory: no-update") pass #if not training: return_saver.append(sum(reward)) error_saver.append(np.mean(pos_error)) #print(len(return_saver),n) if episode_counter%100 == 0 and episode_counter>0: # if episode_counter%100==0 and episode_counter>0: print(episode_counter, np.mean(return_saver), sigma) #print(params[method]['weight']) #weight = np.reshape(np.array(weights[episode_counter]), [2, 6]) #print(weight) reward_file.write(str(np.mean(sorted(return_saver)[0:int(.95*len(return_saver))]))+"\n") error_file.write(str(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n") error_file_median.write(str(np.median(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n") var_error_file.write(str(np.var(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n") var_file.write(str(np.var(sorted(return_saver)[0:int(.95*len(return_saver))]))+"\n") #weight_file.write(str(np.mean(return_saver)) + "\n") avg_reward.append(np.mean(sorted(return_saver)[0:int(.95*len(return_saver))])) avg_error.append(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))])) var_reward.append(np.var(return_saver)) reward_file.close() var_file.close() error_file.close() error_file_median.close() var_error_file.close() weight_file.close() reward_file = open( result_folder + "reward_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") error_file = open( result_folder + "error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") var_file = open( result_folder + "var_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") var_error_file = open( result_folder + "var_error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") weight_file = open( result_folder + "weight_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") error_file_median = open( result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") return_saver = [] error_saver = [] num_episodes.append(n)
for i in range(n_plat): liste[i] =m return liste #PARTIE SIMULATION DES PREMIERES PERIODES AVEC PRIX AU HASARD #définir listePRIX liste_tab_Y = [[] for i in range(n_plat)] liste_tab_X = [[] for i in range(n_plat)] listeRevenus1 = [[] for i in range(n_plat)] listeRevenus2 = [[] for i in range(n_plat)] listeRevenus3 = [[] for i in range(n_plat)] """matriceOriginelle = matricefacto.""" for i in range(n_jours_training): print("Jour ", i) state = [stock_beg for i in range(n_plat)] scen = scenario.scenario(n_clients, n_periodes, n_plat) for pe in range(n_periodes-1): liste_prix = remplirListePrix(pe) listeDemandes, tab_notes = scen.simuler(pe, liste_prix) for plat in range(n_plat): state[plat] -= listeDemandes[plat] a = 0 if state[plat] < 0: a = listeDemandes[plat]+state[plat] else: a = listeDemandes[plat] state[plat] = max(0, state[plat]) listeRevenus1[plat].append(a*liste_prix[plat]) # rajouter la partie NOTES....
def load_reference_links(scen): if type(scen) is str: scen = scenario(scen) ReferenceLinks = collections.namedtuple("ReferenceLinks", "ids lengths lanes") links = scen.findAll('link') return ReferenceLinks([int(link['id']) for link in links], [float(link['length']) for link in links], [float(link['lanes']) for link in links])
episode_counter = 0 weight_saver1 = [] weight_saver2 = [] avg_reward = [] var_reward = [] list_of_states = [] #Main loop to count number of valid episodes while episode_counter<N_max: #variable variance? #sigma = gen_learning_rate(episode_counter,sigma_max,.1,20000) sigma = sigma_max discounted_return = np.array([]) discount_vector = np.array([]) #print(episodes_counter) scen = scenario(1,1) #create scenario object (single-target, single-sensor) bearing_var = 1E-2#variance of bearing measurement #Initialize target location + velocity randomly x = 2000*random.random()-1000#initial x-location y = 2000 * random.random() - 1000#initial y-location xdot = 10*random.random()-5#initial xdot-value ydot = 10 * random.random() - 5#initial ydot-value init_target_state = [x,y,xdot,ydot]#initialize target state #Add noise to initial target location since the tracker doesn't know about the initial location init_for_smc = [x+np.random.normal(0,5),y+np.random.normal(0,5),np.random.normal(0,5),np.random.normal(0,5)]#init state for the tracker (tracker doesn't know about the initial state) #initialize sensor location randomly too init_sensor_state = [2000*random.random()-1000,2000 * random.random() - 1000,3,-2]#initial sensor-state temp_loc = np.array(init_target_state[0:2]).reshape(2,1) init_location_estimate = temp_loc+0*np.random.normal(np.zeros([2,1]),10) init_location_estimate = [init_location_estimate[0][0],init_location_estimate[1][0]]
import scenario scen = scenario.scenario(10000, 24, 3) somme1 = 0 somme2 = 0 somme3 = 0 for i in range(10000): tab = scen.simuler(12, [10, 10, 10]) somme1 += tab[0] somme2 += tab[1] somme3 += tab[2] print(somme1 / 10000) print(somme2 / 10000) print(somme3 / 10000)
def run(args): # initialize parameters of interest # Method: # 0: linear policy # 1: RBF policy # 2: MLP policy vel_var = args[0] heading_rate = args[1] experiment_folder_name = args[2] file = open( base_path + "/" + experiment_folder_name + "/best_data_" + str(heading_rate), "w") # initialize actor parameters MAX_UNCERTAINTY = 1E9 num_states = 6 weight = np.random.normal(0, 1, [2, num_states]) sigma_max = 1 num_episodes = [] gamma = .99 episode_length = 1500 learning_rate = 1E-3 min_learning_rate = 1E-6 N_max = 200 window_size = 50 window_lag = 10 return_saver = [] weight_saver1 = [] weight_saver2 = [] total_error = {} total_error_variance = {} total_reward = {} #for episode_counter in range(0,N_max): #Training parameters print("heading-rate=" + str(heading_rate)) for xdot_sensor in np.arange(-15, 16, 1): for ydot_sensor in np.arange(-15, 16, 1): episode_counter = 0 avg_reward = [] var_reward = [] error_saver = [] while episode_counter < N_max: sigma = gen_learning_rate(episode_counter, sigma_max, .1, 5000) sigma = sigma_max discounted_return = np.array([]) discount_vector = np.array([]) #print(episodes_counter) scen = scenario(1, 1) bearing_var = 1E-2 #variance of bearing measurement #Target information x = 10000 * random.random() - 5000 #initial x-location y = 10000 * random.random() - 5000 #initial y-location xdot = 10 * random.random() - 5 #initial xdot-value ydot = 10 * random.random() - 5 #initial ydot-value #x = 250; y = 50; xdot = 7; ydot = -5 init_target_state = [x, y, xdot, ydot] #initialize target state init_for_smc = [ x + np.random.normal(0, 5), y + np.random.normal(0, 5), np.random.normal(0, 5), np.random.normal(0, 5) ] #init state for the tracker (tracker doesn't know about the initial state) #init_for_smc = [x, y, xdot, ydot] init_sensor_state = [ 10000 * random.random() - 5000, 10000 * random.random() - 5000, 3, -2 ] #initial sensor-state temp_loc = np.array(init_target_state[0:2]).reshape(2, 1) init_location_estimate = temp_loc + 0 * np.random.normal( np.zeros([2, 1]), 10) init_location_estimate = [ init_location_estimate[0][0], init_location_estimate[1][0] ] init_velocity_estimate = [ 6 * random.random() - 3, 6 * random.random() - 3 ] init_velocity_estimate = [ init_target_state[2], init_target_state[3] ] init_estimate = init_location_estimate + init_velocity_estimate init_covariance = np.diag([ MAX_UNCERTAINTY, MAX_UNCERTAINTY, MAX_UNCERTAINTY, MAX_UNCERTAINTY ]) #initial covariance of state estimation t = target( init_target_state[0:2], init_target_state[2], init_target_state[3], vel_var, vel_var, "CONS_V") #constant-velocity model for target motion A, B = t.constant_velocity(1E-10) #Get motion model x_var = t.x_var y_var = t.y_var tracker_object = EKF_tracker( init_for_smc, init_covariance, A, B, x_var, y_var, bearing_var) #create tracker object #smc_object = smc_tracker(A,B,x_var,y_var,bearing_var,1000,np.array(init_for_smc)) s = sensor( "CONS_V", [0, 0], [xdot_sensor, ydot_sensor], heading_rate) #create sensor object (stochastic policy) #s = sensor("CONS_V") measure = measurement(bearing_var) #create measurement object m = [] x_est = [] y_est = [] x_vel_est = [] y_vel_est = [] x_truth = [] y_truth = [] x_vel_truth = [] y_vel_truth = [] uncertainty = [] vel_error = [] pos_error = [] iteration = [] innovation = [] reward = [] episode_condition = True n = 0 violation = 0 #store required information episode_state = [] episode_actions = [] while episode_condition: #if n>50: episode_condition=False #update location of target and sensor + generate new measurement #Also, run tracker object t.update_location() m.append( measure.generate_bearing(t.current_location, s.current_location)) tracker_object.update_states(s.current_location, m[-1]) #if len(tracker_object.meas_vec)>20: # tmp = np.zeros([2,2]) # for n in range(0,10): # vector = tracker_object.meas_vec[-1-n] # cov = (vector.transpose().dot(vector))/bearing_var # sliced_cov = np.array([[cov[0,0],cov[0,1]],[cov[1,0],cov[1,1]]]) # tmp+= sliced_cov #Fisher_matrix = tmp/10.0 #crlb = np.linalg.inv(Fisher_matrix) #print(crlb.diagonal()) #create state-vector normalized_innovation = ( tracker_object.innovation_list[-1] ) / tracker_object.innovation_var[-1] #print(normalized_innovation) #if (normalized_innovation<1E-4 or n<10) and n<200: #end of episode current_state = list( tracker_object.x_k_k.reshape(len( tracker_object.x_k_k))) + list(s.current_location) #print(current_state) #state normalization x_slope = 2.0 / (scen.x_max - scen.x_min) y_slope = 2.0 / (scen.y_max - scen.y_min) vel_slope = 2.0 / (scen.vel_max - scen.vel_min) #normalization current_state[0] = -1 + x_slope * (current_state[0] - scen.x_min) current_state[1] = -1 + y_slope * (current_state[1] - scen.y_min) current_state[2] = -1 + vel_slope * (current_state[2] - scen.vel_min) current_state[3] = -1 + vel_slope * (current_state[3] - scen.vel_min) current_state[4] = -1 + x_slope * (current_state[4] - scen.x_min) current_state[5] = -1 + y_slope * (current_state[5] - scen.y_min) s.update_location(weight, sigma, np.array(current_state)) estimate = tracker_object.x_k_k episode_state.append(current_state) truth = t.current_location x_est.append(estimate[0]) y_est.append(estimate[1]) x_vel_est.append(estimate[2]) y_vel_est.append(estimate[3]) x_truth.append(truth[0]) y_truth.append(truth[1]) x_vel_truth.append(t.current_velocity[0]) y_vel_truth.append(t.current_velocity[1]) #print(estimate[-1]) #print(np.linalg.norm(estimate[2:4]-np.array([t.current_velocity[0],t.current_velocity[1]]))) vel_error.append( np.linalg.norm(estimate[2:4] - np.array([ t.current_velocity[0], t.current_velocity[1] ]).reshape(2, 1))) pos_error.append( np.linalg.norm(estimate[0:2] - np.array(truth).reshape(2, 1))) innovation.append(normalized_innovation[0]) unormalized_uncertainty = np.sum( tracker_object.p_k_k.diagonal()) #if unormalized_uncertainty>MAX_UNCERTAINTY: # normalized_uncertainty = 1 #else: # normalized_uncertainty = (1.0/MAX_UNCERTAINTY)*unormalized_uncertainty uncertainty.append( (1.0 / MAX_UNCERTAINTY) * unormalized_uncertainty) if len(uncertainty) < window_size + window_lag: reward.append(0) else: current_avg = np.mean(uncertainty[-window_size:]) prev_avg = np.mean( uncertainty[-(window_size + window_lag):-window_lag]) if current_avg < prev_avg or uncertainty[-1] < .1: #if current_avg < prev_avg: reward.append(1) else: reward.append(0) #reward.append(-1*uncertainty[-1]) #update return discount_vector = gamma * np.array(discount_vector) #discount_vector = list(discount_vector) #discount_vector.append(1) discounted_return += (1.0 * reward[-1]) * discount_vector new_return = 1.0 * reward[-1] list_discounted_return = list(discounted_return) list_discounted_return.append(new_return) discounted_return = np.array(list_discounted_return) list_discount_vector = list(discount_vector) list_discount_vector.append(1) discount_vector = np.array(list_discount_vector) iteration.append(n) if n > episode_length: break n += 1 num_episodes.append(n) error_saver.append(np.mean(pos_error)) return_saver.append(sum(reward)) episode_counter += 1 total_error[str(xdot_sensor) + "|" + str(ydot_sensor)] = np.mean( sorted(error_saver)[0:int(.95 * N_max)]) total_reward[str(xdot_sensor) + "|" + str(ydot_sensor)] = np.mean( sorted(return_saver, reverse=True)[0:int(.95 * N_max)]) total_error_variance[str(xdot_sensor) + "|" + str(ydot_sensor)] = np.var( sorted(error_saver)[0:int(.95 * N_max)]) sorted_error = sorted(total_error.items(), key=operator.itemgetter(1)) key = sorted_error[0][0] file.write("Min Error=" + str(sorted_error[0][1]) + "\n") file.write("Best params=" + str(key) + "\n") file.close()