def __init__(self,A,B,x_var,y_var,bearing_var,N,initial_state):
        self.bearing_var = bearing_var
        self.A = A
        self.B = B
        self.x_var = x_var
        self.y_var = y_var
        self.num_particles = N
        self.innovation = None

        scen = scenario(1,1)
        loc_min = np.array([scen.x_min, scen.y_min]).reshape(2,1)
        loc_max = np.array([scen.x_max - scen.x_min, scen.y_max - scen.y_min]).reshape(2, 1)
        a = np.kron(loc_min, np.ones([1, self.num_particles]))
        b = np.kron(loc_max, np.ones([1, self.num_particles]))

        vel_min = np.array([scen.vel_min, scen.vel_min]).reshape(2, 1)
        vel_max = np.array([scen.vel_max - scen.vel_min, scen.vel_max - scen.vel_min]).reshape(2, 1)
        aa = np.kron(vel_min, np.ones([1, self.num_particles]))
        bb = np.kron(vel_max, np.ones([1, self.num_particles]))

        #initial_loc_particles = b*np.random.rand(2,self.num_particles)+a
        temp_cov = np.eye(2)
        temp_cov[0, 0] = 0
        temp_cov[1, 1] = 0
        initial_loc_particles = np.kron(np.array(initial_state[0:2]).reshape(2,1),np.ones([1, self.num_particles])) + 0*np.random.multivariate_normal(np.zeros([2]),temp_cov,self.num_particles).transpose()
        initial_vel_particles = bb*np.random.rand(2,self.num_particles)+aa
        initial_state_particles = np.concatenate((initial_loc_particles,initial_vel_particles))



        """
        temp_cov = np.eye(4)
        temp_cov[0,0] = 20
        temp_cov[1,1] = 20
        temp_cov[2,2] = 1
        temp_cov[3,3] = 1
        initial_state = initial_state.reshape(4,1)
        a = np.kron(initial_state,np.ones([1, self.num_particles]))
        initial_state_particles = a + np.random.multivariate_normal(np.zeros([4]),temp_cov,self.num_particles).transpose()
        """



        self.particles_k_km1 = initial_state_particles
        self.particles_k_k = initial_state_particles
        self.bearing_k_km1 = np.zeros([self.num_particles,1])
        self.weight_k_km1 = (1.0/self.num_particles)*np.ones([self.num_particles,1])
        self.weight_k_k = (1.0 / self.num_particles) * np.ones([self.num_particles, 1])

        Q = np.eye(2)
        Q[0, 0] = .01
        Q[1, 1] = .01


        self.predicted_noise_covariance = (self.B.dot(Q)).dot(self.B.transpose())
        Z = np.eye(4)
        Z[2,2] = .01
        Z[3,3] = .01
        Z[0,0] = 5
        Z[1,1] = 5
def main(scen_file):
  import scenario
  struct = network_structure(scenario.scenario(scen_file))
  string = ''
  for item in struct:
    item = map(lambda x: str(x) if x is not None else '', list(item))
    string += '(' + ','.join(item) + ')' + ','
  print string[:-1]
Example #3
0
def output_from_settings(settings):
    scen = scenario(settings.scenario)
    ref_links = load_reference_links(scen)
    ml, ors, srcs = zip(*network_structure(scen))
    ml_idxs = link_indices(ml, ref_links)
    or_idxs = link_indices(ors, ref_links)
    src_idxs = link_indices(srcs, ref_links)    
    ml_lengths = link_lengths(ml, ref_links)
    ml_lanes = link_lanes(ml, ref_links)
    return scen, Output(ml, ors, ml_idxs, ml_lengths, ml_lanes, or_idxs, src_idxs)
 def on_exec_scenario(self, *args):
     """Récupération des paramètres d'analyse"""
     Scen = scenario()
     Scen.beta=self.app.beta.get_value()
     Scen.sigmaH=self.app.sigmah.get_value()
     #Scen.v_h_facteur= self.app.vh.get_value()
     #Scen.v_h_facteur=(Scen.v_h_facteur)
     Scen.dt=self.app.dt.get_value_as_int()
     Scen.Thrf=self.app.thrf.get_value_as_int()
     Scen.TR=self.app.tr.get_value_as_int()
     Scen.K=self.app.k.get_value_as_int()
     Scen.M=self.app.m.get_value_as_int()
     Scen.scale=self.app.scale.get_value_as_int()
     return Scen
    t.hiddenDuplexCollision = hiddenDuplexCollision[k]
    t.exposedSpatialReuse = exposedSpatialReuse[k]

    nodes.append(t)

print nodes

"-------- print ENTER to confirm the input--------------"
#confirmKey = raw_input("If setting is ready, press ENTER to continue, any other key to abort ... ")
#assert confirmKey == '', "setting wrong, programs abort :("

#nodes[2].goodChans = np.array( [0,0,0,1] )
#nodes[1].goodChans = np.array( [0,1,1,0] )
#nodes[2].goodChans = np.array( [0,0,1,1] )

simulationScenario = scenario(numSteps, 'fixed', 3)

# Vector and Matrix Initializations
actions = np.zeros((numNodes, numChans))
collisions = np.zeros(numNodes)
collisionTally = np.zeros((numNodes, numNodes))  #TODO
collisionHist = np.zeros((numSteps, numNodes))
cumulativeCollisions = np.zeros((numSteps, numNodes))
cumulativeAbsents = np.zeros((numSteps, numNodes))

mdpLearnTime = np.zeros(numSteps)
dqnLearnTime = np.zeros(numSteps)
''' ===================================================================  '''
'''                 MAIN LOOP BEGIN                               '''
''' ===================================================================  '''
def run(args):
    # initialize parameters of interest
    # Method:
    # 0: linear policy
    # 1: RBF policy
    # 2: MLP policy
    
    method = args[0]
    RBF_components = args[1]
    MLP_neurons = args[2]
    process_index = args[3]
    folder_name = args[4]
    np.random.seed(1 + 100)
    vel_var = args[5]
    
    np.random.seed(process_index)
    print("Starting Thread:" + str(process_index))

    #Initialize all the parameters
    params ={0:{},1:{},2:{}}
    if method==0:
        params[0]["weight"] = np.random.normal(0, .3, [2, num_states])
        #params[0]["weight"] = np.array([[ 1.45702249, -1.17664153, -0.11593174,  1.02967173, -0.25321044,
         #0.09052774],
       #[ 0.67730786,  0.3213561 ,  0.99580938, -2.39007038, -1.16340594,
        #-1.77515938]])
    elif method==1:
        featurizer = sklearn.pipeline.FeatureUnion([("rbf1", RBFSampler(gamma=rbf_var, n_components=RBF_components, random_state=1))])
        featurizer.fit(np.array(list_of_states))  # Use this featurizer for normalization
        params[1]["weight"] = np.random.normal(0, 1, [2, RBF_components])
    elif method==2:
        params[2]["weigh1"] = np.random.normal(0, 1, [MLP_neurons, num_states])
        params[2]["bias1"] = np.random.normal(0,1,[MLP_neurons,1])
        params[2]["weigh2"] = np.random.normal(0, 1, [2, MLP_neurons])
        params[2]["bias2"] = np.random.normal(0, 1, [2, 1])

    return_saver = []
    error_saver = []
    episode_counter = 0
    weight_saver1 = []
    weight_saver2 = []
    #for episode_counter in range(0,N_max):
    #Training parameters
    avg_reward = []
    avg_error = []
    var_reward = []
    training = True


    result_folder = base_path+folder_name+"/"
    reward_file = open(result_folder+"reward_noise:"+str(vel_var)+"_"+str(process_index)+  "_linear_6states.txt","a")
    error_file = open(result_folder + "error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    error_file_median = open(result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                      "a")
    var_file = open(result_folder + "var_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    var_error_file = open(result_folder + "var_error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    weight_file = open(result_folder + "weight_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")

    #flatten initial weight and store the values
    if method==0:
        weight = params[0]['weight']
        flatted_weights = list(weight[0, :]) + list(weight[1, :])
        temp = []
        [temp.append(str(x)) for x in flatted_weights]
        weight_file.write("\t".join(temp)+"\n")
    elif method==1:
        weight = params[1]['weight']
        flatted_weights = list(weight[0, :]) + list(weight[1, :])
        temp = []
        [temp.append(str(x)) for x in flatted_weights]
        weight_file.write("\t".join(temp) + "\n")
    elif method==2:
        pass

    #weight = np.reshape(np.array(weights[0]), [2, 6])
    sigma = sigma_max
    while episode_counter<N_max:
        #sigma = gen_learning_rate(episode_counter,sigma_max,.1,20000)
        if episode_counter%1500==0 and episode_counter>0:
            sigma-= .15
            sigma = max(.1,sigma)
        #sigma = sigma_max
        discounted_return = np.array([])
        discount_vector = np.array([])
        #print(episodes_counter)
        scen = scenario(1,1)
        bearing_var = 1E-2#variance of bearing measurement
        #Target information
        x = 10000*random.random()-5000#initial x-location
        y = 10000 * random.random() - 5000#initial y-location
        xdot = 10*random.random()-5#initial xdot-value
        ydot = 10 * random.random() - 5#initial ydot-value
        init_target_state = [x,y,xdot,ydot]#initialize target state
        init_for_smc = [x+np.random.normal(0,5),y+np.random.normal(0,5),np.random.normal(0,5),np.random.normal(0,5)]#init state for the tracker (tracker doesn't know about the initial state)
        #init_for_smc = [x, y, xdot, ydot]
        init_sensor_state = [10000*random.random()-5000,10000 * random.random() - 5000,3,-2]#initial sensor-state

        temp_loc = np.array(init_target_state[0:2]).reshape(2,1)
        init_location_estimate = temp_loc+0*np.random.normal(np.zeros([2,1]),10)
        init_location_estimate = [init_location_estimate[0][0],init_location_estimate[1][0]]
        init_velocity_estimate = [6*random.random()-3,6*random.random()-3]
        init_velocity_estimate = [init_target_state[2],init_target_state[3]]
        init_estimate = init_location_estimate+init_velocity_estimate
        init_covariance = np.diag([MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY])#initial covariance of state estimation

        t = target(init_target_state[0:2], init_target_state[2], init_target_state[3], vel_var, vel_var, "CONS_V")#constant-velocity model for target motion
        A, B = t.constant_velocity(1E-10)#Get motion model
        x_var = t.x_var
        y_var = t.y_var

        tracker_object = EKF_tracker(init_for_smc, init_covariance, A,B,x_var,y_var,bearing_var)#create tracker object
        #smc_object = smc_tracker(A,B,x_var,y_var,bearing_var,1000,np.array(init_for_smc))

        #Initialize sensor object
        if method==0:
            s = sensor("POLICY_COMM_LINEAR")#create sensor object (stochastic policy)
        elif method==1:
            s = sensor("POLICY_COMM_RBF")
        elif method==2:
            s = sensor("POLICY_COMM_MLP")
        measure = measurement(bearing_var)#create measurement object

        m = []
        x_est = []; y_est = []; x_vel_est = []; y_vel_est = []
        x_truth = [];
        y_truth = [];
        x_vel_truth = [];
        y_vel_truth = []
        uncertainty = []
        vel_error = []
        pos_error = []
        iteration = []
        innovation = []
        reward = []
        episode_condition = True
        n=0
        violation = 0
        #store required information
        episode_state = []
        episode_MLP_state = []
        episode_actions = []

        while episode_condition:
            t.update_location()
            m.append(measure.generate_bearing(t.current_location,s.current_location))
            tracker_object.update_states(s.current_location, m[-1])
            normalized_innovation = (tracker_object.innovation_list[-1])/tracker_object.innovation_var[-1]
            #print(normalized_innovation)
            #if (normalized_innovation<1E-4 or n<10) and n<200:
                #end of episode
            current_state = list(tracker_object.x_k_k.reshape(len(tracker_object.x_k_k))) + list(s.current_location)

            #print(current_state)
            #state normalization
            x_slope = 2.0/(scen.x_max-scen.x_min)
            y_slope = 2.0 / (scen.y_max - scen.y_min)

            x_slope_sensor = 2.0 / (40000)
            y_slope_sensor = 2.0 / (40000)

            vel_slope = 2.0/(scen.vel_max-scen.vel_min)
            #normalization
            current_state[0] = -1+x_slope*(current_state[0]-scen.x_min)
            current_state[1] = -1 + y_slope * (current_state[1] - scen.y_min)
            current_state[2] = -1 + vel_slope * (current_state[2] - scen.vel_min)
            current_state[3] = -1 + vel_slope * (current_state[3] - scen.vel_min)
            current_state[4] = -1 + x_slope * (current_state[4] -scen.x_min)
            current_state[5] = -1 + y_slope * (current_state[5] - scen.y_min)


            #Refactor states based on the usage
            if method==0 or method==2:
                input_state = current_state
            elif method==1:
                #Generate states for the RBF input
                input_state =  featurizer.transform(np.array(current_state).reshape(1,len(current_state)))
                input_state = list(input_state[0])


            extra_information = s.update_location_new(params,input_state,sigma)
            estimate = tracker_object.x_k_k
            episode_state.append(input_state)
            if method==2: episode_MLP_state.append(extra_information) #Output of the first layer for Gradient calculation
            truth = t.current_location
            x_est.append(estimate[0])
            y_est.append(estimate[1])
            x_vel_est.append(estimate[2])
            y_vel_est.append(estimate[3])
            x_truth.append(truth[0])
            y_truth.append(truth[1])
            x_vel_truth.append(t.current_velocity[0])
            y_vel_truth.append(t.current_velocity[1])
            vel_error.append(np.linalg.norm(estimate[2:4]-np.array([t.current_velocity[0],t.current_velocity[1]]).reshape(2,1)))
            pos_error.append(np.linalg.norm(estimate[0:2]-np.array(truth).reshape(2,1)))
            innovation.append(normalized_innovation[0])
            unormalized_uncertainty = np.sum(tracker_object.p_k_k.diagonal())
            #if unormalized_uncertainty>MAX_UNCERTAINTY:
             #   normalized_uncertainty = 1
            #else:
             #   normalized_uncertainty = (1.0/MAX_UNCERTAINTY)*unormalized_uncertainty
            uncertainty.append((1.0 / MAX_UNCERTAINTY) * unormalized_uncertainty)
            if len(uncertainty)<window_size+window_lag:
                reward.append(0)
            else:
                current_avg = np.mean(uncertainty[-window_size:])
                prev_avg = np.mean(uncertainty[-(window_size+window_lag):-window_lag])
                if current_avg<prev_avg or uncertainty[-1]<.1:
                #if current_avg < prev_avg:
                    reward.append(1)
                else:
                    reward.append(0)

            #reward.append(-1*uncertainty[-1])
            #update return
            discount_vector = gamma*np.array(discount_vector)
            discounted_return+= (1.0*reward[-1])*discount_vector
            new_return = 1.0*reward[-1]
            list_discounted_return = list(discounted_return)
            list_discounted_return.append(new_return)
            discounted_return = np.array(list_discounted_return)

            list_discount_vector = list(discount_vector)
            list_discount_vector.append(1)
            discount_vector = np.array(list_discount_vector)
            iteration.append(n)
            if n>episode_length: break
            n+=1

        #Based on the return from the episode, update parameters of the policy model
        #Normalize returns by the length of episode
        #if episode_counter%10==0 and episode_counter>0: print(weight_saver[-1])

        prev_params = dict(params)
        condition = True
        if np.mean(pos_error)>10000:
            continue
            episode_condition = False
            episode_counter-=1
        #if episode_counter%100==0 and training:
            #print("Starting the evaluation phase...")
            #training = False
            #episode_condition = False
        condition = True
        if episode_condition and training:
            normalized_discounted_return = discounted_return
            episode_actions = s.sensor_actions
            #init_weight = np.array(weight)
            rate = gen_learning_rate(episode_counter,learning_rate,1E-8,10000)
            total_adjustment = np.zeros(np.shape(weight))
            for e in range(0,len(episode_actions)):
                #calculate gradiant
                state = np.array(episode_state[e]).reshape(len(episode_state[e]),1)

                #calculate gradient
                if method==0:
                    gradiant = ((episode_actions[e].reshape(2,1)-params[0]['weight'].dot(state)).dot(state.transpose()))/sigma**2#This is the gradiant
                elif method==1:
                    gradiant = ((episode_actions[e].reshape(2, 1) - params[1]['weight'].dot(state)).dot(
                        state.transpose())) / sigma ** 2  # This is the gradiant
                elif method==2:
                    #Gradient for MLP
                    pass

                if np.max(np.abs(gradiant))>1E2: continue #clip large gradients

                if method==0:
                    adjustment_term = gradiant*normalized_discounted_return[e]#an unbiased sample of return
                    params[0]['weight'] += rate * adjustment_term
                elif method==1:
                    adjustment_term = gradiant * normalized_discounted_return[e]  # an unbiased sample of return
                    params[1]['weight'] += rate * adjustment_term
                elif method==2:
                    #Gradient for MLP
                    pass

            #if not condition:
             #   weight = prev_weight
              #  continue

            episode_counter+=1
            #flatted_weights = list(weight[0, :]) + list(weight[1, :])
            #temp = []
            #[temp.append(str(x)) for x in flatted_weights]
            #weight_file.write("\t".join(temp)+"\n")
            #weight_saver1.append(weight[0][0])
            #weight_saver2.append(weight[0][1])
        else:
            #print("garbage trajectory: no-update")
            pass


        #if not training:
        return_saver.append(sum(reward))

        error_saver.append(np.mean(pos_error))

        #print(len(return_saver),n)
        if episode_counter%100 == 0 and episode_counter>0:
            # if episode_counter%100==0 and episode_counter>0:
            print(episode_counter, np.mean(return_saver), sigma)
            #print(params[method]['weight'])
            #weight = np.reshape(np.array(weights[episode_counter]), [2, 6])
            #print(weight)
            reward_file.write(str(np.mean(sorted(return_saver)[0:int(.95*len(return_saver))]))+"\n")
            error_file.write(str(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            error_file_median.write(str(np.median(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            var_error_file.write(str(np.var(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            var_file.write(str(np.var(sorted(return_saver)[0:int(.95*len(return_saver))]))+"\n")
            #weight_file.write(str(np.mean(return_saver)) + "\n")

            avg_reward.append(np.mean(sorted(return_saver)[0:int(.95*len(return_saver))]))
            avg_error.append(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))]))
            var_reward.append(np.var(return_saver))
            reward_file.close()
            var_file.close()
            error_file.close()
            error_file_median.close()
            var_error_file.close()
            weight_file.close()

            reward_file = open(
                result_folder + "reward_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            error_file = open(
                result_folder + "error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            var_file = open(
                result_folder + "var_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            var_error_file = open(
                result_folder + "var_error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                "a")
            weight_file = open(
                result_folder + "weight_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            error_file_median = open(
                result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                "a")

            return_saver = []
            error_saver = []
        num_episodes.append(n)
            xt1 = []
            yt1 = []
            xt2 = []
            yt2 = []
            xt3 = []
            yt3 = []
            xt4 = []
            yt4 = []

            while episode_counter < N_max:
                sensor_locations = {}
                for weight_index in range(0, 1):
                    discounted_return = np.array([])
                    discount_vector = np.array([])
                    #print(episodes_counter)
                    scen = scenario(1, 1)
                    bearing_var = 1E-2  #variance of bearing measurement
                    #Target information
                    x = 10000 * np.random.random(
                        [num_targets]) - 5000  #initial x-location
                    y = 10000 * np.random.random(
                        [num_targets]) - 5000  #initial y-location
                    xdot = 10 * np.random.random([num_targets
                                                  ]) - 5  #initial xdot-value
                    ydot = 10 * np.random.random([num_targets
                                                  ]) - 5  #initial ydot-value
                    #TEMP

                    x = np.array([-2000, 2000, 4000, 2000])
                    y = np.array([-4000, -4000, -1000, -2000])
                    xdot = [2, -2, -4, -2]
def run(args):
#if __name__=="__main__":
    # initialize parameters of interest
    # Method:
    # 0: linear policy
    # 1: RBF policy
    # 2: MLP policy

    #method = args[0]
    #RBF_components = args[1]
    #MLP_neurons = args[2]
    process_index = args[3]
    folder_name = args[4]
    np.random.seed(process_index+100)
    #process_index = 0
    #np.random.seed(process_index + 100)
    #vel_var = args[5]
    #num_targets = args[6]

    method = 0
    RBF_components = 20
    MLP_neurons = 50
    vel_var = .001
    num_targets = min(6,max(2,np.random.poisson(3)))
    num_targets = np.random.randint(2,10)
    #num_targets = 4


    print("Starting Thread:" + str(process_index))

    #Initialize all the parameters
    params ={0:{},1:{},2:{}}
    if method==0:
        params[0]["weight2"] = np.random.normal(0, .3, [2, num_states_layer2])
        #params[0]["weight2"] = np.array([[  3.97573312,   0.4639474 ,   2.27280486,  12.9085868 ,
         #   3.45722461,   6.36735166],
         #[-11.87940874,   2.59549414,  -5.68556954,   2.87746786,
          #  7.08059984,   5.5631133 ]])

        params[0]["weight"] = np.array([[7.18777985, -13.68815256, 1.69010242, -5.62483187,
                           -4.30451483, 10.09592853],
                         [13.33104057, 13.60537864, 3.46939294, 0.8446329,
                         -14.79733566, -4.78599648]])

        #params[0]["weight"] = np.array([[ 1.45702249, -1.17664153, -0.11593174,  1.02967173, -0.25321044,
         #0.09052774],
       #[ 0.67730786,  0.3213561 ,  0.99580938, -2.39007038, -1.16340594,
        #-1.77515938]])
    elif method==1:
        featurizer = sklearn.pipeline.FeatureUnion([("rbf1", RBFSampler(gamma=rbf_var, n_components=RBF_components, random_state=1))])
        featurizer.fit(np.array(list_of_states))  # Use this featurizer for normalization
        params[1]["weight"] = np.random.normal(0, 1, [2, RBF_components])
    elif method==2:
        params[2]["weigh1"] = np.random.normal(0, 1, [MLP_neurons, num_states])
        params[2]["bias1"] = np.random.normal(0,1,[MLP_neurons,1])
        params[2]["weigh2"] = np.random.normal(0, 1, [2, MLP_neurons])
        params[2]["bias2"] = np.random.normal(0, 1, [2, 1])

    return_saver = []
    error_saver = []
    episode_counter = 0
    weight_saver1 = []
    weight_saver2 = []
    weight_saver2_1 = []
    weight_saver2_2 = []
    #for episode_counter in range(0,N_max):
    #Training parameters
    avg_reward = []
    avg_error = []
    var_reward = []
    training = True


    result_folder = base_path+folder_name+"/"
    reward_file = open(result_folder+"reward_noise:"+str(vel_var)+"_"+str(process_index)+  "_linear_6states.txt","a")
    error_file = open(result_folder + "error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    error_file_median = open(result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                      "a")
    var_file = open(result_folder + "var_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    var_error_file = open(result_folder + "var_error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    weight_file = open(result_folder + "weight_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")

    #flatten initial weight and store the values
    if method==0:
        weight = params[0]['weight']
        flatted_weights = list(weight[0, :]) + list(weight[1, :])
        temp = []
        [temp.append(str(x)) for x in flatted_weights]
        weight_file.write("\t".join(temp)+"\n")
    elif method==1:
        weight = params[1]['weight']
        flatted_weights = list(weight[0, :]) + list(weight[1, :])
        temp = []
        [temp.append(str(x)) for x in flatted_weights]
        weight_file.write("\t".join(temp) + "\n")
    elif method==2:
        pass

    #weight = np.reshape(np.array(weights[0]), [2, 6])
    init_max_target = 3
    num_targets = init_max_target
    while episode_counter<N_max:
        if episode_counter%1000==0 and episode_counter>0:
            init_max_target +=1
            init_max_target = min(20,init_max_target)
        
        if episode_counter%100==0 and episode_counter>0:
            num_targets = np.random.randint(3,init_max_target+1)
        sigma = gen_learning_rate(episode_counter,sigma_max,.1,5000)
        sigma = sigma_max
        discounted_return = np.array([])
        discount_vector = np.array([])
        #print(episodes_counter)
        scen = scenario(1,1)
        bearing_var = 1E-2#variance of bearing measurement
        #Target information
        x = 10000*np.random.random([num_targets])-5000#initial x-location
        y = 10000 * np.random.random([num_targets]) - 5000#initial y-location
        xdot = 10*np.random.random([num_targets])-5#initial xdot-value
        ydot = 10 * np.random.random([num_targets]) - 5#initial ydot-value

        #TEMP
        #x = [2000,-2000]
        #y = [2000,2000]
        #xdot = [1,1]
        #ydot = [-1,-1]

        init_target_state = []
        init_for_smc = []
        for target_counter in range(0,num_targets):
            init_target_state.append([x[target_counter],y[target_counter],xdot[target_counter],ydot[target_counter]])#initialize target state
            init_for_smc.append([x[target_counter]+np.random.normal(0,5),y[target_counter]
                                 +np.random.normal(0,5),np.random.normal(0,5),np.random.normal(0,5)])#init state for the tracker (tracker doesn't know about the initial state)


        #temp_loc = np.array(init_target_state[0:2]).reshape(2,1)
        #init_location_estimate = temp_loc+0*np.random.normal(np.zeros([2,1]),10)
        #init_location_estimate = [init_location_estimate[0][0],init_location_estimate[1][0]]
        #init_velocity_estimate = [6*random.random()-3,6*random.random()-3]
        #init_velocity_estimate = [init_target_state[2],init_target_state[3]]
        #init_estimate = init_location_estimate+init_velocity_estimate
        init_covariance = np.diag([MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY])#initial covariance of state estimation
        t = []
        for i in range(0,num_targets):
            t.append(target(init_target_state[i][0:2], init_target_state[i][2],
                            init_target_state[i][3], vel_var, vel_var, "CONS_V"))#constant-velocity model for target motion
        A, B = t[0].constant_velocity(1E-10)#Get motion model
        x_var = t[0].x_var
        y_var = t[0].y_var

        tracker_object = []
        for i in range(0,num_targets):
            tracker_object.append(EKF_tracker(init_for_smc[i], np.array(init_covariance), A,B,x_var,y_var,bearing_var))#create tracker object
            #smc_object = smc_tracker(A,B,x_var,y_var,bearing_var,1000,np.array(init_for_smc))

        #Initialize sensor object
        if method==0:
            s = sensor("POLICY_COMM_LINEAR")#create sensor object (stochastic policy)
        elif method==1:
            s = sensor("POLICY_COMM_RBF")
        elif method==2:
            s = sensor("POLICY_COMM_MLP")
        measure = measurement(bearing_var)#create measurement object

        m = []
        x_est = []; y_est = []; x_vel_est = []; y_vel_est = []
        x_truth = [];
        y_truth = [];
        x_vel_truth = [];
        y_vel_truth = []
        uncertainty = []
        vel_error = []
        pos_error = []
        iteration = []
        innovation = []
        for i in range(0,num_targets):
            x_truth.append([])
            y_truth.append([])
            x_vel_truth.append([])
            y_vel_truth.append([])
            uncertainty.append([])
            vel_error.append([])
            x_est.append([])
            y_est.append([])
            x_vel_est.append([])
            y_vel_est.append([])
            pos_error.append([])
            innovation.append([])
        reward = []
        episode_condition = True
        n=0
        violation = 0
        #store required information
        episode_state = []
        episode_state_out_layer = []
        episode_MLP_state = []
        episode_actions = []
        avg_uncertainty= []
        max_uncertainty = []

        while episode_condition:
            temp_m = []
            input_state_temp = []
            for i in range(0,num_targets):
                t[i].update_location()
                temp_m.append(measure.generate_bearing(t[i].current_location,s.current_location))

            m.append(temp_m)
            temp_reward = []
            target_actions = []
            for i in range(0,num_targets):
                tracker_object[i].update_states(s.current_location, m[-1][i])
                normalized_innovation = (tracker_object[i].innovation_list[-1])/tracker_object[i].innovation_var[-1]
                #print(normalized_innovation)
                #if (normalized_innovation<1E-4 or n<10) and n<200:
                    #end of episode
                current_state = list(tracker_object[i].x_k_k.reshape(len(tracker_object[i].x_k_k))) + list(s.current_location)

                #print(current_state)
                #state normalization
                x_slope = 2.0/(scen.x_max-scen.x_min)
                y_slope = 2.0 / (scen.y_max - scen.y_min)

                x_slope_sensor = 2.0 / (40000)
                y_slope_sensor = 2.0 / (40000)

                vel_slope = 2.0/(scen.vel_max-scen.vel_min)
                #normalization
                current_state[0] = -1+x_slope*(current_state[0]-scen.x_min)
                current_state[1] = -1 + y_slope * (current_state[1] - scen.y_min)
                current_state[2] = -1 + vel_slope * (current_state[2] - scen.vel_min)
                current_state[3] = -1 + vel_slope * (current_state[3] - scen.vel_min)
                current_state[4] = -1 + x_slope * (current_state[4] -scen.x_min)
                current_state[5] = -1 + y_slope * (current_state[5] - scen.y_min)


                #Refactor states based on the usage
                if method==0 or method==2:
                    input_state = current_state
                    input_state_temp.append(input_state) #store input-sates
                elif method==1:
                    #Generate states for the RBF input
                    input_state =  featurizer.transform(np.array(current_state).reshape(1,len(current_state)))
                    input_state = list(input_state[0])


                target_actions.append(s.generate_action(params,input_state,.01))
                estimate = tracker_object[i].x_k_k
                episode_state.append(input_state) ####Neeed to get modified
                if method==2: episode_MLP_state.append(extra_information) #need to get modified
                truth = t[i].current_location
                x_est[i].append(estimate[0])
                y_est[i].append(estimate[1])
                x_vel_est[i].append(estimate[2])
                y_vel_est[i].append(estimate[3])
                x_truth[i].append(truth[0])
                y_truth[i].append(truth[1])
                x_vel_truth[i].append(t[i].current_velocity[0])
                y_vel_truth[i].append(t[i].current_velocity[1])
                vel_error[i].append(np.linalg.norm(estimate[2:4]-np.array([t[i].current_velocity[0],t[i].current_velocity[1]]).reshape(2,1)))
                pos_error[i].append(np.linalg.norm(estimate[0:2]-np.array(truth).reshape(2,1)))
                innovation[i].append(normalized_innovation[0])
                unormalized_uncertainty = np.sum(tracker_object[i].p_k_k.diagonal())
                #if unormalized_uncertainty>MAX_UNCERTAINTY:
                #   normalized_uncertainty = 1
                #else:
                #   normalized_uncertainty = (1.0/MAX_UNCERTAINTY)*unormalized_uncertainty
                uncertainty[i].append((1.0 / MAX_UNCERTAINTY) * unormalized_uncertainty)
                #if len(uncertainty[i])<window_size+window_lag:
                 #   temp_reward.append(0)
                #else:
                 #   current_avg = np.mean(uncertainty[i][-window_size:])
                  #  prev_avg = np.mean(uncertainty[i][-(window_size+window_lag):-window_lag])
                   # if current_avg<prev_avg or uncertainty[i][-1]<.1:
                    #if current_avg < prev_avg:
                    #    temp_reward.append(1)
                    #else:
                     #   temp_reward.append(0)

            this_uncertainty = []
            [this_uncertainty.append(uncertainty[x][-1]) for x in range(0, num_targets)]
            avg_uncertainty.append(np.mean(this_uncertainty))
            max_uncertainty.append(np.max(this_uncertainty))
            if len(avg_uncertainty) < window_size + window_lag:
                reward.append(0)
            else:
                current_avg = np.mean(avg_uncertainty[-window_size:])
                prev_avg = np.mean(avg_uncertainty[-(window_size + window_lag):-window_lag])
                if current_avg < prev_avg or avg_uncertainty[-1] < .1:
                    # if current_avg < prev_avg:
                    reward.append(1)
                else:
                    reward.append(0)

            #voting
            #if np.mean(temp_reward)>.5:
             #   reward.append(np.mean(temp_reward))
            #else:
             #   reward.append(np.mean(temp_reward))

            #if sum(reward)>1100 and num_targets>2: sys.exit(1)

            #Do something on target_actions
            #Create feature-vector from generated target actions

            normalized_state,index_matrix1,index_matrix2,slope = s.update_location_decentralized(target_actions,sigma,params) #Update the sensor location based on all individual actions
            #index_matrix: an n_s \times T matrix that shows the derivative of state in the output layer to the action space in the internal-layer

            backpropagated_to_internal_1 = index_matrix1.dot(np.array(input_state_temp))#8 by 6
            backpropagated_to_internal_2 = index_matrix2.dot(np.array(input_state_temp))# 8 by 6

            episode_state_out_layer.append(normalized_state)
            episode_state.append([backpropagated_to_internal_1,backpropagated_to_internal_2]) #each entry would be a T \times 6 matrix with T being the number of targets
            #reward.append(-1*uncertainty[-1])
            #update return
            discount_vector = gamma*np.array(discount_vector)
            discounted_return+= (1.0*reward[-1])*discount_vector
            new_return = 1.0*reward[-1]
            list_discounted_return = list(discounted_return)
            list_discounted_return.append(new_return)
            discounted_return = np.array(list_discounted_return)

            list_discount_vector = list(discount_vector)
            list_discount_vector.append(1)
            discount_vector = np.array(list_discount_vector)
            iteration.append(n)
            if n>episode_length: break
            n+=1

        #Based on the return from the episode, update parameters of the policy model
        #Normalize returns by the length of episode
        #if episode_counter%10==0 and episode_counter>0: print(weight_saver[-1])

        prev_params = dict(params)
        condition = True
        for i in range(0,num_targets):
            if np.mean(pos_error[i])>10000:
                condition = False
                break
                episode_condition = False
                episode_counter-=1

        if not condition:
            #print("OOPSSSS...")
            continue
        #if episode_counter%100==0 and training:
            #print("Starting the evaluation phase...")
            #training = False
            #episode_condition = False


        condition = True
        if episode_condition and training:
            normalized_discounted_return = discounted_return
            episode_actions = s.sensor_actions
            #init_weight = np.array(weight)
            rate = gen_learning_rate(episode_counter,learning_rate,1E-12,20000)
            internal_rate = gen_learning_rate(episode_counter, 3*1E-5, 1E-15, 20000)
            total_adjustment = np.zeros(np.shape(weight))
            for e in range(0,len(episode_actions)):
                #calculate gradiant
                #state = np.array(episode_state[e]).reshape(len(episode_state[e]),1)
                out_state = np.array(episode_state_out_layer[e]).reshape(len(episode_state_out_layer[e]),1)
                backpropagated_terms = episode_state[e]

                #calculate gradient
                if method==0:
                    deriv_with_out_state = (episode_actions[e].reshape(2, 1) - params[0]['weight2'].dot(out_state)).transpose().dot(params[0]['weight2']) #1 by n_s==> derivative of F with respect to the output state-vector
                    internal_gradiant1 = deriv_with_out_state.dot(backpropagated_terms[0]) #1 by 6
                    internal_gradiant2 = deriv_with_out_state.dot(backpropagated_terms[1]) #1 by 6
                    internal_gradiant = np.concatenate([internal_gradiant1,internal_gradiant2])

                    #gradiant = ((episode_actions[e].reshape(2,1)-params[0]['weight'].dot(state)).dot(state.transpose()))/sigma**2#This is the gradiant
                    gradiant_out_layer = ((episode_actions[e].reshape(2, 1) - params[0]['weight2'].dot(out_state)).dot(
                        out_state.transpose())) / sigma ** 2  # This is the gradiant
                elif method==1:
                    gradiant = ((episode_actions[e].reshape(2, 1) - params[1]['weight'].dot(state)).dot(
                        state.transpose())) / sigma ** 2  # This is the gradiant
                elif method==2:
                    #Gradient for MLP
                    pass

                if np.max(np.abs(gradiant_out_layer))>1E2 or np.max(np.abs(internal_gradiant))>1E2:
                    #print("OOPPSSSS...")
                    continue #clip large gradients

                if method==0:
                    adjustment_term_out_layer = gradiant_out_layer*normalized_discounted_return[e]#an unbiased sample of return
                    adjustment_term_internal_layer = internal_gradiant*normalized_discounted_return[e]
                    params[0]['weight2'] += rate * adjustment_term_out_layer
                    params[0]['weight'] += internal_rate* adjustment_term_internal_layer
                elif method==1:
                    adjustment_term = gradiant * normalized_discounted_return[e]  # an unbiased sample of return
                    params[1]['weight'] += rate * adjustment_term
                elif method==2:
                    #Gradient for MLP
                    pass

            #if not condition:
             #   weight = prev_weight
              #  continue

            episode_counter+=1
            flatted_weights1 = list(params[0]['weight'][0, :]) + list(params[0]['weight'][1, :])
            flatted_weights2 = list(params[0]['weight2'][0, :]) + list(params[0]['weight2'][1, :])
            temp1 = []
            [temp1.append(str(x)) for x in flatted_weights1]
            temp2 = []
            [temp2.append(str(x)) for x in flatted_weights2]

            weight_file.write("\t".join(temp1)+"$$$"+"\t".join(temp2)+"\n")
            #flatted_weights = list(weight[0, :]) + list(weight[1, :])
            #temp = []
            #[temp.append(str(x)) for x in flatted_weights]
            #weight_file.write("\t".join(temp)+"\n")
            weight_saver1.append(params[0]['weight'][0][0])
            weight_saver2.append(params[0]['weight'][1][0])

            weight_saver2_1.append(params[0]['weight2'][0][0])
            weight_saver2_2.append(params[0]['weight2'][1][0])
        else:
            #print("garbage trajectory: no-update")
            pass


        #if not training:
        return_saver.append(sum(reward))

        error_saver.append(np.mean(pos_error))

        #print(len(return_saver),n)
        if episode_counter%100 == 0 and episode_counter>0:
            # if episode_counter%100==0 and episode_counter>0:
            print(episode_counter, np.mean(return_saver), sigma)
            #print(params[method]['weight'])
            #weight = np.reshape(np.array(weights[episode_counter]), [2, 6])
            #print(weight)
            reward_file.write(str(np.mean(sorted(return_saver,reverse=True)[0:int(.95*len(return_saver))]))+"\n")
            error_file.write(str(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            error_file_median.write(str(np.median(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            var_error_file.write(str(np.var(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            var_file.write(str(np.var(sorted(return_saver,reverse=True)[0:int(.95*len(return_saver))]))+"\n")
            #weight_file.write(str(np.mean(return_saver)) + "\n")

            avg_reward.append(np.mean(sorted(return_saver)[0:int(.95*len(return_saver))]))
            avg_error.append(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))]))
            var_reward.append(np.var(return_saver))
            reward_file.close()
            var_file.close()
            error_file.close()
            error_file_median.close()
            var_error_file.close()
            weight_file.close()

            reward_file = open(
                result_folder + "reward_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            error_file = open(
                result_folder + "error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            var_file = open(
                result_folder + "var_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            var_error_file = open(
                result_folder + "var_error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                "a")
            weight_file = open(
                result_folder + "weight_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            error_file_median = open(
                result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                "a")

            return_saver = []
            error_saver = []
        num_episodes.append(n)
if __name__ == '__main__':
		
	robot_name='TIAGo'
	rospy.init_node('ltl_planner_%s' %robot_name)
    ########
	world = rospy.get_param('world_name')
	if len(sys.argv) == 2:
		world = str(sys.argv[1])
		print('Argument: %s.' %(str(sys.argv[1])))
	else:
		print 'No argument : world set automatically at %s' %str(world)
        # to run: python ltl_planner.py '<> (r2 && <>r3)'
        # to run: python ltl_planner.py '([]<> r2) && ([]<> r3) && ([]<> r1)'
        # to run: python ltl_planner.py '<> r2 && ([]<> r3) && ([]<> r1)'
    ###############
	while 1:
		if world == 'small_office' or 'tutorial_office' or 'tabletop_cube':
			scenario.scenario(world)

		try:
			robot_task = rospy.get_param('plan')
			#print 'param = %s' %(str('plan'))
			if str(robot_task) == 'none':
				sys.exit()
			print('Robot task: %s.' %(str(robot_task)))
			[robot_motion, init_pose, robot_action] = robot_model
			planner(robot_motion, init_pose, robot_action, robot_task)
		except rospy.ROSInterruptException:
			pass
Example #10
0
  'ssh': {
   'username': '******',
   'key': '/home/bastien/.ssh/id_rsa_grid5k'
  }
 }
})

if '__main__' == __name__:
 pw = getpass.getpass()
 config['iotlab']['ssh']['password'] = pw
 config['g5k']['ssh']['password'] = pw

 iotlab = iotlab_testbed(config.iotlab)
 g5k = g5k_testbed(config.g5k)

 sc = scenario(g5k, iotlab)
 sc.deploy()
 sc.play()

 # book nodes (2 nodes m3 and 1 a8 on iotlab)
# g5k.book_nodes()
# g5k.wait_nodes()
# iotlab.deploy('')


 # deploy g5k / install ipfs
 # deploy iotlab flash m3!
 # set ssh tunnel between g5k andn iotlab
 # put an object
 # read it
def run(args):
#if __name__=="__main__":
    # initialize parameters of interest
    # Method:
    # 0: linear policy
    # 1: RBF policy
    # 2: MLP policy

    #args = [0,20,50,0,"TEST1",.001,10]

    method = args[0]
    RBF_components = args[1]
    MLP_neurons = args[2]
    process_index = args[3]
    folder_name = args[4]
    #process_index = args[4]
    np.random.seed(process_index + 100)
    vel_var = args[5]
    num_targets = args[6]




    #method = 0
    #RBF_components = 20
    #MLP_neurons = 50
    #vel_var = .001
    #num_targets = min(6,max(2,np.random.poisson(3)))
    #num_targets = 2

    print("Starting Thread:" + str(process_index))

    #Initialize all the parameters (input && output-layers)
    params ={0:{},1:{},2:{}}
    if method==0:
        params[0]["weight"] = np.random.normal(0, .3, [2, output_size]) #Output-layer (maps flattened states to the actions)
        #params[0]["weight"] = []
        #for f in range(0,filter_size):
         #   params[0]["weight"].append(np.random.normal(0,1,[spatial_weight_size,temporal_weight_size])) #Convolution weith matrix

        #params[0]["weight"] = np.array([[ 1.45702249, -1.17664153, -0.11593174,  1.02967173, -0.25321044,
         #0.09052774],
       #[ 0.67730786,  0.3213561 ,  0.99580938, -2.39007038, -1.16340594,
        #-1.77515938]])
    elif method==1:
        featurizer = sklearn.pipeline.FeatureUnion([("rbf1", RBFSampler(gamma=rbf_var, n_components=RBF_components, random_state=1))])
        featurizer.fit(np.array(list_of_states))  # Use this featurizer for normalization
        params[1]["weight"] = np.random.normal(0, 1, [2, RBF_components])
    elif method==2:
        params[2]["weigh1"] = np.random.normal(0, 1, [MLP_neurons, num_states])
        params[2]["bias1"] = np.random.normal(0,1,[MLP_neurons,1])
        params[2]["weigh2"] = np.random.normal(0, 1, [2, MLP_neurons])
        params[2]["bias2"] = np.random.normal(0, 1, [2, 1])

    return_saver = []
    error_saver = []
    episode_counter = 0
    weight_saver1 = []
    weight_saver2 = []
    #for episode_counter in range(0,N_max):
    #Training parameters
    avg_reward = []
    avg_error = []
    var_reward = []
    training = True


    result_folder = base_path+folder_name+"/"
    reward_file = open(result_folder+"reward_noise:"+str(vel_var)+"_"+str(process_index)+  "_linear_6states.txt","a")
    error_file = open(result_folder + "error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    error_file_median = open(result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                      "a")
    var_file = open(result_folder + "var_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    var_error_file = open(result_folder + "var_error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    weight_file = open(result_folder + "weight_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")

    #flatten initial weight and store the values
    if method==0:
        weight = params[0]['weight']
        flatted_weights = list(weight[0, :]) + list(weight[1, :])
        temp = []
        [temp.append(str(x)) for x in flatted_weights]
        weight_file.write("\t".join(temp)+"\n")
    elif method==1:
        weight = params[1]['weight']
        flatted_weights = list(weight[0, :]) + list(weight[1, :])
        temp = []
        [temp.append(str(x)) for x in flatted_weights]
        weight_file.write("\t".join(temp) + "\n")
    elif method==2:
        pass

    #weight = np.reshape(np.array(weights[0]), [2, 6])
    init_max_target = 3
    num_targets = 3
    while episode_counter<N_max:
        if episode_counter%1000==0 and episode_counter>0:
            init_max_target +=1
            init_max_target = min(10,init_max_target)

        if episode_counter%100==0 and episode_counter>0:
            num_targets = np.random.randint(3,init_max_target+1)
        num_targets = 3
        sigma = gen_learning_rate(episode_counter,sigma_max,.1,5000)
        sigma = sigma_max
        discounted_return = np.array([])
        discount_vector = np.array([])
        #print(episodes_counter)
        scen = scenario(1,1)
        bearing_var = 1E-2#variance of bearing measurement
        #Target information
        x = 10000*np.random.random([num_targets])-5000#initial x-location
        y = 10000 * np.random.random([num_targets]) - 5000#initial y-location
        xdot = 10*np.random.random([num_targets])-5#initial xdot-value
        ydot = 10 * np.random.random([num_targets]) - 5#initial ydot-value

        init_target_state = []
        init_for_smc = []
        for target_counter in range(0,num_targets):
            init_target_state.append([x[target_counter],y[target_counter],xdot[target_counter],ydot[target_counter]])#initialize target state
            init_for_smc.append([x[target_counter]+np.random.normal(0,5),y[target_counter]
                                 +np.random.normal(0,5),np.random.normal(0,5),np.random.normal(0,5)])#init state for the tracker (tracker doesn't know about the initial state)


        #temp_loc = np.array(init_target_state[0:2]).reshape(2,1)
        #init_location_estimate = temp_loc+0*np.random.normal(np.zeros([2,1]),10)
        #init_location_estimate = [init_location_estimate[0][0],init_location_estimate[1][0]]
        #init_velocity_estimate = [6*random.random()-3,6*random.random()-3]
        #init_velocity_estimate = [init_target_state[2],init_target_state[3]]
        #init_estimate = init_location_estimate+init_velocity_estimate
        init_covariance = np.diag([MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY])#initial covariance of state estimation
        t = []
        for i in range(0,num_targets):
            t.append(target(init_target_state[i][0:2], init_target_state[i][2],
                            init_target_state[i][3], vel_var, vel_var, "CONS_V"))#constant-velocity model for target motion
        A, B = t[0].constant_velocity(1E-10)#Get motion model
        x_var = t[0].x_var
        y_var = t[0].y_var

        tracker_object = []
        for i in range(0,num_targets):
            tracker_object.append(EKF_tracker(init_for_smc[i], np.array(init_covariance), A,B,x_var,y_var,bearing_var))#create tracker object
            #smc_object = smc_tracker(A,B,x_var,y_var,bearing_var,1000,np.array(init_for_smc))

        #Initialize sensor object
        if method==0:
            s = sensor("POLICY_COMM_LINEAR")#create sensor object (stochastic policy)
        elif method==1:
            s = sensor("POLICY_COMM_RBF")
        elif method==2:
            s = sensor("POLICY_COMM_MLP")
        measure = measurement(bearing_var)#create measurement object

        m = []
        x_est = []; y_est = []; x_vel_est = []; y_vel_est = []
        x_truth = [];
        y_truth = [];
        x_vel_truth = [];
        y_vel_truth = []
        uncertainty = []
        avg_uncertainty = []
        max_uncertainty = []

        vel_error = []
        pos_error = []
        iteration = []
        innovation = []
        for i in range(0,num_targets):
            x_truth.append([])
            y_truth.append([])
            x_vel_truth.append([])
            y_vel_truth.append([])
            uncertainty.append([])
            vel_error.append([])
            x_est.append([])
            y_est.append([])
            x_vel_est.append([])
            y_vel_est.append([])
            pos_error.append([])
            innovation.append([])
        reward = []
        episode_condition = True
        n=0
        violation = 0
        #store required information
        episode_state = []
        episode_state_out_layer = []
        episode_grad_with_state_w1 = []
        episode_grad_with_state_w2 = []

        episode_MLP_state = []
        episode_actions = []


        while episode_condition:
            temp_m = []
            for i in range(0,num_targets):
                t[i].update_location()
                temp_m.append(measure.generate_bearing(t[i].current_location,s.current_location))

            m.append(temp_m)
            temp_reward = []
            target_actions = []

            #create input-feature matrix
            input_state = np.zeros([num_states,num_targets]) #create a fixed-size matrix for input states


            for i in range(0,num_targets):
                tracker_object[i].update_states(s.current_location, m[-1][i])
                normalized_innovation = (tracker_object[i].innovation_list[-1])/tracker_object[i].innovation_var[-1]
                #print(normalized_innovation)
                #if (normalized_innovation<1E-4 or n<10) and n<200:
                    #end of episode
                current_state = list(tracker_object[i].x_k_k.reshape(len(tracker_object[i].x_k_k))) + list(s.current_location)

                #print(current_state)
                #state normalization
                x_slope = 2.0/(scen.x_max-scen.x_min)
                y_slope = 2.0 / (scen.y_max - scen.y_min)

                x_slope_sensor = 2.0 / (40000)
                y_slope_sensor = 2.0 / (40000)

                vel_slope = 2.0/(scen.vel_max-scen.vel_min)
                #normalization
                current_state[0] = -1+x_slope*(current_state[0]-scen.x_min)
                current_state[1] = -1 + y_slope * (current_state[1] - scen.y_min)
                current_state[2] = -1 + vel_slope * (current_state[2] - scen.vel_min)
                current_state[3] = -1 + vel_slope * (current_state[3] - scen.vel_min)
                current_state[4] = -1 + x_slope * (current_state[4] -scen.x_min)
                current_state[5] = -1 + y_slope * (current_state[5] - scen.y_min)

                if method==0 or method==2:input_state[:,i] = current_state


                #target_actions.append(s.generate_action(params,input_state,.1))
                estimate = tracker_object[i].x_k_k
                episode_state.append(input_state) ####Neeed to get modified
                if method==2: episode_MLP_state.append(extra_information) #need to get modified
                truth = t[i].current_location
                x_est[i].append(estimate[0])
                y_est[i].append(estimate[1])
                x_vel_est[i].append(estimate[2])
                y_vel_est[i].append(estimate[3])
                x_truth[i].append(truth[0])
                y_truth[i].append(truth[1])
                x_vel_truth[i].append(t[i].current_velocity[0])
                y_vel_truth[i].append(t[i].current_velocity[1])
                vel_error[i].append(np.linalg.norm(estimate[2:4]-np.array([t[i].current_velocity[0],t[i].current_velocity[1]]).reshape(2,1)))
                pos_error[i].append(np.linalg.norm(estimate[0:2]-np.array(truth).reshape(2,1)))
                innovation[i].append(normalized_innovation[0])
                unormalized_uncertainty = np.sum(tracker_object[i].p_k_k.diagonal())
                #if unormalized_uncertainty>MAX_UNCERTAINTY:
                #   normalized_uncertainty = 1
                #else:
                #   normalized_uncertainty = (1.0/MAX_UNCERTAINTY)*unormalized_uncertainty
                uncertainty[i].append((1.0 / MAX_UNCERTAINTY) * unormalized_uncertainty)

            #Featurize input-state using pooling
            input_state = list(np.max(input_state,axis=1))+list(np.min(input_state,axis=1))+\
                          list(np.mean(input_state, axis=1))+list(np.median(input_state,axis=1))

            this_uncertainty = []
            [this_uncertainty.append(uncertainty[x][-1]) for x in range(0,num_targets)]
            avg_uncertainty.append(np.mean(this_uncertainty))
            max_uncertainty.append(np.max(this_uncertainty))
            if len(avg_uncertainty) < window_size + window_lag:
                reward.append(0)
            else:
                current_avg = np.mean(avg_uncertainty[-window_size:])
                prev_avg = np.mean(avg_uncertainty[-(window_size + window_lag):-window_lag])
                if current_avg < prev_avg or avg_uncertainty[-1] < .1:
                    # if current_avg < prev_avg:
                    reward.append(1)
                else:
                    reward.append(0)

            #voting
            #if np.mean(temp_reward)>.5:
              #  reward.append(np.mean(temp_reward))
            #else:
             #   reward.append(np.mean(temp_reward))

            #if sum(reward)>1100 and num_targets>2: sys.exit(1)

            #Do something on target_actions
            #Create feature-vector from generated target actions

            s.update_location_new(params,np.array(input_state).reshape([len(input_state),1]),sigma)

            #Output created by the CNN
            episode_state_out_layer.append(input_state)


            #reward.append(-1*uncertainty[-1])
            #update return
            discount_vector = gamma*np.array(discount_vector)
            discounted_return+= (1.0*reward[-1])*discount_vector
            new_return = 1.0*reward[-1]
            list_discounted_return = list(discounted_return)
            list_discounted_return.append(new_return)
            discounted_return = np.array(list_discounted_return)

            list_discount_vector = list(discount_vector)
            list_discount_vector.append(1)
            discount_vector = np.array(list_discount_vector)
            iteration.append(n)
            if n>episode_length: break
            n+=1

        #Based on the return from the episode, update parameters of the policy model
        #Normalize returns by the length of episode
        #if episode_counter%10==0 and episode_counter>0: print(weight_saver[-1])


        prev_params = dict(params)
        condition = True
        for i in range(0,num_targets):
            if np.mean(pos_error[i])>10000:
                condition = False
                break
                episode_condition = False
                episode_counter-=1

        if not condition:
            #print("OOPSSSS...")
            continue
        condition = True

        prev_params = dict(params)
        if episode_condition and training:
            normalized_discounted_return = discounted_return
            episode_actions = s.sensor_actions
            #init_weight = np.array(weight)
            rate = gen_learning_rate(episode_counter,learning_rate,1E-8,10000)
            internal_rate = gen_learning_rate(episode_counter, 5*1E-5, 1E-9, 10000)
            total_adjustment = np.zeros(np.shape(weight))
            for e in range(0,len(episode_actions)):
                #calculate gradiant
                #state = np.array(episode_state[e]).reshape(len(episode_state[e]),1)
                out_state = np.array(episode_state_out_layer[e]).reshape(len(episode_state_out_layer[e]),1)

                #calculate gradient
                if method==0:
                    predicted_action = params[0]['weight'].dot(out_state)
                    #gradiant = ((episode_actions[e].reshape(2,1)-params[0]['weight'].dot(state)).dot(state.transpose()))/sigma**2#This is the gradiant
                    gradiant_out_layer = ((episode_actions[e].reshape(2, 1) - predicted_action).dot(
                        out_state.transpose())) / sigma ** 2  # This is the gradiant

                elif method==1:
                    gradiant = ((episode_actions[e].reshape(2, 1) - params[1]['weight'].dot(state)).dot(
                        state.transpose())) / sigma ** 2  # This is the gradiant
                elif method==2:
                    #Gradient for MLP
                    pass

                if np.max(np.abs(gradiant_out_layer))>1E2:# or np.max(np.abs(gradiant_internal[0]))>1E2:
                    #print("OOPPSSSS...")
                    continue #clip large gradients

                if method==0:
                    adjustment_term_out_layer = gradiant_out_layer*normalized_discounted_return[e]#an unbiased sample of return

                    params[0]['weight'] += rate * adjustment_term_out_layer
                elif method==1:
                    adjustment_term = gradiant * normalized_discounted_return[e]  # an unbiased sample of return
                    params[1]['weight'] += rate * adjustment_term
                elif method==2:
                    #Gradient for MLP
                    pass

            #if not condition:
             #   weight = prev_weight
              #  continue

            episode_counter+=1
            flatted_weights = list(params[0]['weight'][0,:]) + list(params[0]['weight'][1,:])
            temp = []
            [temp.append(str(x)) for x in flatted_weights]
            weight_file.write("\t".join(temp)+"\n")
            #weight_saver1.append(params[0]['weight'][0][0][0])
            #weight_saver2.append(params[0]['weight'][0][1][0])
        else:
            #print("garbage trajectory: no-update")
            pass


        #if not training:
        return_saver.append(sum(reward))

        error_saver.append(np.mean(pos_error))

        #print(len(return_saver),n)
        if episode_counter%100 == 0 and episode_counter>0:
            # if episode_counter%100==0 and episode_counter>0:
            print(episode_counter, np.mean(return_saver), sigma)
            #print(params[method]['weight'])
            #weight = np.reshape(np.array(weights[episode_counter]), [2, 6])
            #print(weight)
            reward_file.write(str(np.mean(sorted(return_saver)[0:int(.95*len(return_saver))]))+"\n")
            error_file.write(str(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            error_file_median.write(str(np.median(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            var_error_file.write(str(np.var(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            var_file.write(str(np.var(sorted(return_saver)[0:int(.95*len(return_saver))]))+"\n")
            #weight_file.write(str(np.mean(return_saver)) + "\n")

            avg_reward.append(np.mean(sorted(return_saver)[0:int(.95*len(return_saver))]))
            avg_error.append(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))]))
            var_reward.append(np.var(return_saver))
            reward_file.close()
            var_file.close()
            error_file.close()
            error_file_median.close()
            var_error_file.close()
            weight_file.close()

            reward_file = open(
                result_folder + "reward_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            error_file = open(
                result_folder + "error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            var_file = open(
                result_folder + "var_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            var_error_file = open(
                result_folder + "var_error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                "a")
            weight_file = open(
                result_folder + "weight_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            error_file_median = open(
                result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                "a")

            return_saver = []
            error_saver = []
        num_episodes.append(n)
Example #12
0
    for i in range(n_plat):
        liste[i] =m
    return liste
#PARTIE SIMULATION DES PREMIERES PERIODES AVEC PRIX AU HASARD
#définir listePRIX
liste_tab_Y = [[] for i in range(n_plat)]
liste_tab_X = [[] for i in range(n_plat)]
listeRevenus1 = [[] for i in range(n_plat)]
listeRevenus2 = [[] for i in range(n_plat)]
listeRevenus3 = [[] for i in range(n_plat)]

"""matriceOriginelle = matricefacto."""
for i in range(n_jours_training):
    print("Jour ", i)
    state = [stock_beg for i in range(n_plat)]
    scen = scenario.scenario(n_clients, n_periodes, n_plat)
    for pe in range(n_periodes-1):
        liste_prix = remplirListePrix(pe)

        listeDemandes, tab_notes = scen.simuler(pe, liste_prix)
        for plat in range(n_plat):
            state[plat] -= listeDemandes[plat]
            a = 0
            if state[plat] < 0:
                a = listeDemandes[plat]+state[plat]
            else:
                a = listeDemandes[plat]
            state[plat] = max(0, state[plat])
            listeRevenus1[plat].append(a*liste_prix[plat])

        # rajouter la partie NOTES....
Example #13
0
def load_reference_links(scen):
    if type(scen) is str:
        scen = scenario(scen)
    ReferenceLinks = collections.namedtuple("ReferenceLinks", "ids lengths lanes")
    links = scen.findAll('link')
    return ReferenceLinks([int(link['id']) for link in links], [float(link['length']) for link in links], [float(link['lanes']) for link in links])
    episode_counter = 0
    weight_saver1 = []
    weight_saver2 = []
    avg_reward = []
    var_reward = []
    list_of_states = []

    #Main loop to count number of valid episodes
    while episode_counter<N_max:
        #variable variance?
        #sigma = gen_learning_rate(episode_counter,sigma_max,.1,20000)
        sigma = sigma_max
        discounted_return = np.array([])
        discount_vector = np.array([])
        #print(episodes_counter)
        scen = scenario(1,1) #create scenario object (single-target, single-sensor)
        bearing_var = 1E-2#variance of bearing measurement
        #Initialize target location + velocity randomly
        x = 2000*random.random()-1000#initial x-location
        y = 2000 * random.random() - 1000#initial y-location
        xdot = 10*random.random()-5#initial xdot-value
        ydot = 10 * random.random() - 5#initial ydot-value

        init_target_state = [x,y,xdot,ydot]#initialize target state
        #Add noise to initial target location since the tracker doesn't know about the initial location
        init_for_smc = [x+np.random.normal(0,5),y+np.random.normal(0,5),np.random.normal(0,5),np.random.normal(0,5)]#init state for the tracker (tracker doesn't know about the initial state)
        #initialize sensor location randomly too
        init_sensor_state = [2000*random.random()-1000,2000 * random.random() - 1000,3,-2]#initial sensor-state
        temp_loc = np.array(init_target_state[0:2]).reshape(2,1)
        init_location_estimate = temp_loc+0*np.random.normal(np.zeros([2,1]),10)
        init_location_estimate = [init_location_estimate[0][0],init_location_estimate[1][0]]
Example #15
0
import scenario

scen = scenario.scenario(10000, 24, 3)
somme1 = 0
somme2 = 0
somme3 = 0
for i in range(10000):
    tab = scen.simuler(12, [10, 10, 10])
    somme1 += tab[0]
    somme2 += tab[1]
    somme3 += tab[2]
print(somme1 / 10000)
print(somme2 / 10000)
print(somme3 / 10000)
Example #16
0
def run(args):
    # initialize parameters of interest
    # Method:
    # 0: linear policy
    # 1: RBF policy
    # 2: MLP policy

    vel_var = args[0]
    heading_rate = args[1]
    experiment_folder_name = args[2]
    file = open(
        base_path + "/" + experiment_folder_name + "/best_data_" +
        str(heading_rate), "w")
    # initialize actor parameters
    MAX_UNCERTAINTY = 1E9

    num_states = 6
    weight = np.random.normal(0, 1, [2, num_states])

    sigma_max = 1
    num_episodes = []
    gamma = .99

    episode_length = 1500
    learning_rate = 1E-3
    min_learning_rate = 1E-6
    N_max = 200

    window_size = 50
    window_lag = 10
    return_saver = []

    weight_saver1 = []
    weight_saver2 = []

    total_error = {}
    total_error_variance = {}
    total_reward = {}
    #for episode_counter in range(0,N_max):
    #Training parameters
    print("heading-rate=" + str(heading_rate))
    for xdot_sensor in np.arange(-15, 16, 1):
        for ydot_sensor in np.arange(-15, 16, 1):

            episode_counter = 0
            avg_reward = []
            var_reward = []
            error_saver = []
            while episode_counter < N_max:
                sigma = gen_learning_rate(episode_counter, sigma_max, .1, 5000)
                sigma = sigma_max
                discounted_return = np.array([])
                discount_vector = np.array([])
                #print(episodes_counter)
                scen = scenario(1, 1)
                bearing_var = 1E-2  #variance of bearing measurement
                #Target information
                x = 10000 * random.random() - 5000  #initial x-location
                y = 10000 * random.random() - 5000  #initial y-location
                xdot = 10 * random.random() - 5  #initial xdot-value
                ydot = 10 * random.random() - 5  #initial ydot-value
                #x = 250; y = 50; xdot = 7; ydot = -5

                init_target_state = [x, y, xdot,
                                     ydot]  #initialize target state
                init_for_smc = [
                    x + np.random.normal(0, 5), y + np.random.normal(0, 5),
                    np.random.normal(0, 5),
                    np.random.normal(0, 5)
                ]  #init state for the tracker (tracker doesn't know about the initial state)
                #init_for_smc = [x, y, xdot, ydot]
                init_sensor_state = [
                    10000 * random.random() - 5000,
                    10000 * random.random() - 5000, 3, -2
                ]  #initial sensor-state

                temp_loc = np.array(init_target_state[0:2]).reshape(2, 1)
                init_location_estimate = temp_loc + 0 * np.random.normal(
                    np.zeros([2, 1]), 10)
                init_location_estimate = [
                    init_location_estimate[0][0], init_location_estimate[1][0]
                ]
                init_velocity_estimate = [
                    6 * random.random() - 3, 6 * random.random() - 3
                ]
                init_velocity_estimate = [
                    init_target_state[2], init_target_state[3]
                ]

                init_estimate = init_location_estimate + init_velocity_estimate
                init_covariance = np.diag([
                    MAX_UNCERTAINTY, MAX_UNCERTAINTY, MAX_UNCERTAINTY,
                    MAX_UNCERTAINTY
                ])  #initial covariance of state estimation

                t = target(
                    init_target_state[0:2], init_target_state[2],
                    init_target_state[3], vel_var, vel_var,
                    "CONS_V")  #constant-velocity model for target motion
                A, B = t.constant_velocity(1E-10)  #Get motion model
                x_var = t.x_var
                y_var = t.y_var

                tracker_object = EKF_tracker(
                    init_for_smc, init_covariance, A, B, x_var, y_var,
                    bearing_var)  #create tracker object
                #smc_object = smc_tracker(A,B,x_var,y_var,bearing_var,1000,np.array(init_for_smc))

                s = sensor(
                    "CONS_V", [0, 0], [xdot_sensor, ydot_sensor],
                    heading_rate)  #create sensor object (stochastic policy)
                #s = sensor("CONS_V")
                measure = measurement(bearing_var)  #create measurement object

                m = []
                x_est = []
                y_est = []
                x_vel_est = []
                y_vel_est = []
                x_truth = []
                y_truth = []
                x_vel_truth = []
                y_vel_truth = []
                uncertainty = []

                vel_error = []
                pos_error = []
                iteration = []
                innovation = []

                reward = []
                episode_condition = True
                n = 0
                violation = 0
                #store required information
                episode_state = []
                episode_actions = []

                while episode_condition:

                    #if n>50: episode_condition=False
                    #update location of target and sensor + generate new measurement
                    #Also, run tracker object
                    t.update_location()
                    m.append(
                        measure.generate_bearing(t.current_location,
                                                 s.current_location))
                    tracker_object.update_states(s.current_location, m[-1])
                    #if len(tracker_object.meas_vec)>20:
                    #   tmp = np.zeros([2,2])
                    #  for n in range(0,10):
                    #     vector = tracker_object.meas_vec[-1-n]
                    #    cov = (vector.transpose().dot(vector))/bearing_var
                    #   sliced_cov = np.array([[cov[0,0],cov[0,1]],[cov[1,0],cov[1,1]]])
                    #  tmp+= sliced_cov

                    #Fisher_matrix = tmp/10.0
                    #crlb = np.linalg.inv(Fisher_matrix)
                    #print(crlb.diagonal())

                    #create state-vector

                    normalized_innovation = (
                        tracker_object.innovation_list[-1]
                    ) / tracker_object.innovation_var[-1]

                    #print(normalized_innovation)
                    #if (normalized_innovation<1E-4 or n<10) and n<200:
                    #end of episode
                    current_state = list(
                        tracker_object.x_k_k.reshape(len(
                            tracker_object.x_k_k))) + list(s.current_location)

                    #print(current_state)
                    #state normalization
                    x_slope = 2.0 / (scen.x_max - scen.x_min)
                    y_slope = 2.0 / (scen.y_max - scen.y_min)
                    vel_slope = 2.0 / (scen.vel_max - scen.vel_min)
                    #normalization
                    current_state[0] = -1 + x_slope * (current_state[0] -
                                                       scen.x_min)
                    current_state[1] = -1 + y_slope * (current_state[1] -
                                                       scen.y_min)
                    current_state[2] = -1 + vel_slope * (current_state[2] -
                                                         scen.vel_min)
                    current_state[3] = -1 + vel_slope * (current_state[3] -
                                                         scen.vel_min)
                    current_state[4] = -1 + x_slope * (current_state[4] -
                                                       scen.x_min)
                    current_state[5] = -1 + y_slope * (current_state[5] -
                                                       scen.y_min)
                    s.update_location(weight, sigma, np.array(current_state))
                    estimate = tracker_object.x_k_k
                    episode_state.append(current_state)

                    truth = t.current_location
                    x_est.append(estimate[0])
                    y_est.append(estimate[1])
                    x_vel_est.append(estimate[2])
                    y_vel_est.append(estimate[3])

                    x_truth.append(truth[0])
                    y_truth.append(truth[1])

                    x_vel_truth.append(t.current_velocity[0])
                    y_vel_truth.append(t.current_velocity[1])

                    #print(estimate[-1])
                    #print(np.linalg.norm(estimate[2:4]-np.array([t.current_velocity[0],t.current_velocity[1]])))
                    vel_error.append(
                        np.linalg.norm(estimate[2:4] - np.array([
                            t.current_velocity[0], t.current_velocity[1]
                        ]).reshape(2, 1)))
                    pos_error.append(
                        np.linalg.norm(estimate[0:2] -
                                       np.array(truth).reshape(2, 1)))
                    innovation.append(normalized_innovation[0])

                    unormalized_uncertainty = np.sum(
                        tracker_object.p_k_k.diagonal())
                    #if unormalized_uncertainty>MAX_UNCERTAINTY:
                    #   normalized_uncertainty = 1
                    #else:
                    #   normalized_uncertainty = (1.0/MAX_UNCERTAINTY)*unormalized_uncertainty

                    uncertainty.append(
                        (1.0 / MAX_UNCERTAINTY) * unormalized_uncertainty)
                    if len(uncertainty) < window_size + window_lag:
                        reward.append(0)
                    else:
                        current_avg = np.mean(uncertainty[-window_size:])
                        prev_avg = np.mean(
                            uncertainty[-(window_size +
                                          window_lag):-window_lag])
                        if current_avg < prev_avg or uncertainty[-1] < .1:
                            #if current_avg < prev_avg:
                            reward.append(1)
                        else:
                            reward.append(0)

                    #reward.append(-1*uncertainty[-1])
                    #update return

                    discount_vector = gamma * np.array(discount_vector)
                    #discount_vector = list(discount_vector)
                    #discount_vector.append(1)

                    discounted_return += (1.0 * reward[-1]) * discount_vector
                    new_return = 1.0 * reward[-1]
                    list_discounted_return = list(discounted_return)
                    list_discounted_return.append(new_return)
                    discounted_return = np.array(list_discounted_return)

                    list_discount_vector = list(discount_vector)
                    list_discount_vector.append(1)
                    discount_vector = np.array(list_discount_vector)

                    iteration.append(n)
                    if n > episode_length: break
                    n += 1
                num_episodes.append(n)
                error_saver.append(np.mean(pos_error))
                return_saver.append(sum(reward))
                episode_counter += 1

            total_error[str(xdot_sensor) + "|" + str(ydot_sensor)] = np.mean(
                sorted(error_saver)[0:int(.95 * N_max)])
            total_reward[str(xdot_sensor) + "|" + str(ydot_sensor)] = np.mean(
                sorted(return_saver, reverse=True)[0:int(.95 * N_max)])
            total_error_variance[str(xdot_sensor) + "|" +
                                 str(ydot_sensor)] = np.var(
                                     sorted(error_saver)[0:int(.95 * N_max)])

    sorted_error = sorted(total_error.items(), key=operator.itemgetter(1))
    key = sorted_error[0][0]

    file.write("Min Error=" + str(sorted_error[0][1]) + "\n")
    file.write("Best params=" + str(key) + "\n")
    file.close()