Python scenario Examples

Programming Language: Python

Namespace/Package Name: scenario

Method/Function: scenario

Examples at hotexamples.com: 16

Python scenario - 16 examples found. These are the top rated real world Python examples of scenario.scenario extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: tracker_discrete_reward_varying_var_version2.py Project: gorjida/MultiSensorMutiTargetRL

    def __init__(self,A,B,x_var,y_var,bearing_var,N,initial_state):
        self.bearing_var = bearing_var
        self.A = A
        self.B = B
        self.x_var = x_var
        self.y_var = y_var
        self.num_particles = N
        self.innovation = None

        scen = scenario(1,1)
        loc_min = np.array([scen.x_min, scen.y_min]).reshape(2,1)
        loc_max = np.array([scen.x_max - scen.x_min, scen.y_max - scen.y_min]).reshape(2, 1)
        a = np.kron(loc_min, np.ones([1, self.num_particles]))
        b = np.kron(loc_max, np.ones([1, self.num_particles]))

        vel_min = np.array([scen.vel_min, scen.vel_min]).reshape(2, 1)
        vel_max = np.array([scen.vel_max - scen.vel_min, scen.vel_max - scen.vel_min]).reshape(2, 1)
        aa = np.kron(vel_min, np.ones([1, self.num_particles]))
        bb = np.kron(vel_max, np.ones([1, self.num_particles]))

        #initial_loc_particles = b*np.random.rand(2,self.num_particles)+a
        temp_cov = np.eye(2)
        temp_cov[0, 0] = 0
        temp_cov[1, 1] = 0
        initial_loc_particles = np.kron(np.array(initial_state[0:2]).reshape(2,1),np.ones([1, self.num_particles])) + 0*np.random.multivariate_normal(np.zeros([2]),temp_cov,self.num_particles).transpose()
        initial_vel_particles = bb*np.random.rand(2,self.num_particles)+aa
        initial_state_particles = np.concatenate((initial_loc_particles,initial_vel_particles))



        """
        temp_cov = np.eye(4)
        temp_cov[0,0] = 20
        temp_cov[1,1] = 20
        temp_cov[2,2] = 1
        temp_cov[3,3] = 1
        initial_state = initial_state.reshape(4,1)
        a = np.kron(initial_state,np.ones([1, self.num_particles]))
        initial_state_particles = a + np.random.multivariate_normal(np.zeros([4]),temp_cov,self.num_particles).transpose()
        """



        self.particles_k_km1 = initial_state_particles
        self.particles_k_k = initial_state_particles
        self.bearing_k_km1 = np.zeros([self.num_particles,1])
        self.weight_k_km1 = (1.0/self.num_particles)*np.ones([self.num_particles,1])
        self.weight_k_k = (1.0 / self.num_particles) * np.ones([self.num_particles, 1])

        Q = np.eye(2)
        Q[0, 0] = .01
        Q[1, 1] = .01


        self.predicted_noise_covariance = (self.B.dot(Q)).dot(self.B.transpose())
        Z = np.eye(4)
        Z[2,2] = .01
        Z[3,3] = .01
        Z[0,0] = 5
        Z[1,1] = 5

Example #2

Show file

File: freeway_network.py Project: ggomes/beats_python_tools

def main(scen_file):
  import scenario
  struct = network_structure(scenario.scenario(scen_file))
  string = ''
  for item in struct:
    item = map(lambda x: str(x) if x is not None else '', list(item))
    string += '(' + ','.join(item) + ')' + ','
  print string[:-1]

Example #3

Show file

File: scen_output.py Project: ggomes/beats_python_tools

def output_from_settings(settings):
    scen = scenario(settings.scenario)
    ref_links = load_reference_links(scen)
    ml, ors, srcs = zip(*network_structure(scen))
    ml_idxs = link_indices(ml, ref_links)
    or_idxs = link_indices(ors, ref_links)
    src_idxs = link_indices(srcs, ref_links)    
    ml_lengths = link_lengths(ml, ref_links)
    ml_lanes = link_lanes(ml, ref_links)
    return scen, Output(ml, ors, ml_idxs, ml_lengths, ml_lanes, or_idxs, src_idxs)

Example #4

Show file

File: EventScenario.py Project: ImageAnalyser/satellite-analyzer

 def on_exec_scenario(self, *args):
     """Récupération des paramètres d'analyse"""
     Scen = scenario()
     Scen.beta=self.app.beta.get_value()
     Scen.sigmaH=self.app.sigmah.get_value()
     #Scen.v_h_facteur= self.app.vh.get_value()
     #Scen.v_h_facteur=(Scen.v_h_facteur)
     Scen.dt=self.app.dt.get_value_as_int()
     Scen.Thrf=self.app.thrf.get_value_as_int()
     Scen.TR=self.app.tr.get_value_as_int()
     Scen.K=self.app.k.get_value_as_int()
     Scen.M=self.app.m.get_value_as_int()
     Scen.scale=self.app.scale.get_value_as_int()
     return Scen

Example #5

Show file

File: multiNodeLearning.py Project: yujianyuanhaha/DQN-DSA

    t.hiddenDuplexCollision = hiddenDuplexCollision[k]
    t.exposedSpatialReuse = exposedSpatialReuse[k]

    nodes.append(t)

print nodes

"-------- print ENTER to confirm the input--------------"
#confirmKey = raw_input("If setting is ready, press ENTER to continue, any other key to abort ... ")
#assert confirmKey == '', "setting wrong, programs abort :("

#nodes[2].goodChans = np.array( [0,0,0,1] )
#nodes[1].goodChans = np.array( [0,1,1,0] )
#nodes[2].goodChans = np.array( [0,0,1,1] )

simulationScenario = scenario(numSteps, 'fixed', 3)

# Vector and Matrix Initializations
actions = np.zeros((numNodes, numChans))
collisions = np.zeros(numNodes)
collisionTally = np.zeros((numNodes, numNodes))  #TODO
collisionHist = np.zeros((numSteps, numNodes))
cumulativeCollisions = np.zeros((numSteps, numNodes))
cumulativeAbsents = np.zeros((numSteps, numNodes))

mdpLearnTime = np.zeros(numSteps)
dqnLearnTime = np.zeros(numSteps)
''' ===================================================================  '''
'''                 MAIN LOOP BEGIN                               '''
''' ===================================================================  '''

Example #6

Show file

File: tracker_discrete_reward_varying_var_version2.py Project: gorjida/MultiSensorMutiTargetRL

def run(args):
    # initialize parameters of interest
    # Method:
    # 0: linear policy
    # 1: RBF policy
    # 2: MLP policy
    
    method = args[0]
    RBF_components = args[1]
    MLP_neurons = args[2]
    process_index = args[3]
    folder_name = args[4]
    np.random.seed(1 + 100)
    vel_var = args[5]
    
    np.random.seed(process_index)
    print("Starting Thread:" + str(process_index))

    #Initialize all the parameters
    params ={0:{},1:{},2:{}}
    if method==0:
        params[0]["weight"] = np.random.normal(0, .3, [2, num_states])
        #params[0]["weight"] = np.array([[ 1.45702249, -1.17664153, -0.11593174,  1.02967173, -0.25321044,
         #0.09052774],
       #[ 0.67730786,  0.3213561 ,  0.99580938, -2.39007038, -1.16340594,
        #-1.77515938]])
    elif method==1:
        featurizer = sklearn.pipeline.FeatureUnion([("rbf1", RBFSampler(gamma=rbf_var, n_components=RBF_components, random_state=1))])
        featurizer.fit(np.array(list_of_states))  # Use this featurizer for normalization
        params[1]["weight"] = np.random.normal(0, 1, [2, RBF_components])
    elif method==2:
        params[2]["weigh1"] = np.random.normal(0, 1, [MLP_neurons, num_states])
        params[2]["bias1"] = np.random.normal(0,1,[MLP_neurons,1])
        params[2]["weigh2"] = np.random.normal(0, 1, [2, MLP_neurons])
        params[2]["bias2"] = np.random.normal(0, 1, [2, 1])

    return_saver = []
    error_saver = []
    episode_counter = 0
    weight_saver1 = []
    weight_saver2 = []
    #for episode_counter in range(0,N_max):
    #Training parameters
    avg_reward = []
    avg_error = []
    var_reward = []
    training = True


    result_folder = base_path+folder_name+"/"
    reward_file = open(result_folder+"reward_noise:"+str(vel_var)+"_"+str(process_index)+  "_linear_6states.txt","a")
    error_file = open(result_folder + "error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    error_file_median = open(result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                      "a")
    var_file = open(result_folder + "var_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    var_error_file = open(result_folder + "var_error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    weight_file = open(result_folder + "weight_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")

    #flatten initial weight and store the values
    if method==0:
        weight = params[0]['weight']
        flatted_weights = list(weight[0, :]) + list(weight[1, :])
        temp = []
        [temp.append(str(x)) for x in flatted_weights]
        weight_file.write("\t".join(temp)+"\n")
    elif method==1:
        weight = params[1]['weight']
        flatted_weights = list(weight[0, :]) + list(weight[1, :])
        temp = []
        [temp.append(str(x)) for x in flatted_weights]
        weight_file.write("\t".join(temp) + "\n")
    elif method==2:
        pass

    #weight = np.reshape(np.array(weights[0]), [2, 6])
    sigma = sigma_max
    while episode_counter<N_max:
        #sigma = gen_learning_rate(episode_counter,sigma_max,.1,20000)
        if episode_counter%1500==0 and episode_counter>0:
            sigma-= .15
            sigma = max(.1,sigma)
        #sigma = sigma_max
        discounted_return = np.array([])
        discount_vector = np.array([])
        #print(episodes_counter)
        scen = scenario(1,1)
        bearing_var = 1E-2#variance of bearing measurement
        #Target information
        x = 10000*random.random()-5000#initial x-location
        y = 10000 * random.random() - 5000#initial y-location
        xdot = 10*random.random()-5#initial xdot-value
        ydot = 10 * random.random() - 5#initial ydot-value
        init_target_state = [x,y,xdot,ydot]#initialize target state
        init_for_smc = [x+np.random.normal(0,5),y+np.random.normal(0,5),np.random.normal(0,5),np.random.normal(0,5)]#init state for the tracker (tracker doesn't know about the initial state)
        #init_for_smc = [x, y, xdot, ydot]
        init_sensor_state = [10000*random.random()-5000,10000 * random.random() - 5000,3,-2]#initial sensor-state

        temp_loc = np.array(init_target_state[0:2]).reshape(2,1)
        init_location_estimate = temp_loc+0*np.random.normal(np.zeros([2,1]),10)
        init_location_estimate = [init_location_estimate[0][0],init_location_estimate[1][0]]
        init_velocity_estimate = [6*random.random()-3,6*random.random()-3]
        init_velocity_estimate = [init_target_state[2],init_target_state[3]]
        init_estimate = init_location_estimate+init_velocity_estimate
        init_covariance = np.diag([MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY])#initial covariance of state estimation

        t = target(init_target_state[0:2], init_target_state[2], init_target_state[3], vel_var, vel_var, "CONS_V")#constant-velocity model for target motion
        A, B = t.constant_velocity(1E-10)#Get motion model
        x_var = t.x_var
        y_var = t.y_var

        tracker_object = EKF_tracker(init_for_smc, init_covariance, A,B,x_var,y_var,bearing_var)#create tracker object
        #smc_object = smc_tracker(A,B,x_var,y_var,bearing_var,1000,np.array(init_for_smc))

        #Initialize sensor object
        if method==0:
            s = sensor("POLICY_COMM_LINEAR")#create sensor object (stochastic policy)
        elif method==1:
            s = sensor("POLICY_COMM_RBF")
        elif method==2:
            s = sensor("POLICY_COMM_MLP")
        measure = measurement(bearing_var)#create measurement object

        m = []
        x_est = []; y_est = []; x_vel_est = []; y_vel_est = []
        x_truth = [];
        y_truth = [];
        x_vel_truth = [];
        y_vel_truth = []
        uncertainty = []
        vel_error = []
        pos_error = []
        iteration = []
        innovation = []
        reward = []
        episode_condition = True
        n=0
        violation = 0
        #store required information
        episode_state = []
        episode_MLP_state = []
        episode_actions = []

        while episode_condition:
            t.update_location()
            m.append(measure.generate_bearing(t.current_location,s.current_location))
            tracker_object.update_states(s.current_location, m[-1])
            normalized_innovation = (tracker_object.innovation_list[-1])/tracker_object.innovation_var[-1]
            #print(normalized_innovation)
            #if (normalized_innovation<1E-4 or n<10) and n<200:
                #end of episode
            current_state = list(tracker_object.x_k_k.reshape(len(tracker_object.x_k_k))) + list(s.current_location)

            #print(current_state)
            #state normalization
            x_slope = 2.0/(scen.x_max-scen.x_min)
            y_slope = 2.0 / (scen.y_max - scen.y_min)

            x_slope_sensor = 2.0 / (40000)
            y_slope_sensor = 2.0 / (40000)

            vel_slope = 2.0/(scen.vel_max-scen.vel_min)
            #normalization
            current_state[0] = -1+x_slope*(current_state[0]-scen.x_min)
            current_state[1] = -1 + y_slope * (current_state[1] - scen.y_min)
            current_state[2] = -1 + vel_slope * (current_state[2] - scen.vel_min)
            current_state[3] = -1 + vel_slope * (current_state[3] - scen.vel_min)
            current_state[4] = -1 + x_slope * (current_state[4] -scen.x_min)
            current_state[5] = -1 + y_slope * (current_state[5] - scen.y_min)


            #Refactor states based on the usage
            if method==0 or method==2:
                input_state = current_state
            elif method==1:
                #Generate states for the RBF input
                input_state =  featurizer.transform(np.array(current_state).reshape(1,len(current_state)))
                input_state = list(input_state[0])


            extra_information = s.update_location_new(params,input_state,sigma)
            estimate = tracker_object.x_k_k
            episode_state.append(input_state)
            if method==2: episode_MLP_state.append(extra_information) #Output of the first layer for Gradient calculation
            truth = t.current_location
            x_est.append(estimate[0])
            y_est.append(estimate[1])
            x_vel_est.append(estimate[2])
            y_vel_est.append(estimate[3])
            x_truth.append(truth[0])
            y_truth.append(truth[1])
            x_vel_truth.append(t.current_velocity[0])
            y_vel_truth.append(t.current_velocity[1])
            vel_error.append(np.linalg.norm(estimate[2:4]-np.array([t.current_velocity[0],t.current_velocity[1]]).reshape(2,1)))
            pos_error.append(np.linalg.norm(estimate[0:2]-np.array(truth).reshape(2,1)))
            innovation.append(normalized_innovation[0])
            unormalized_uncertainty = np.sum(tracker_object.p_k_k.diagonal())
            #if unormalized_uncertainty>MAX_UNCERTAINTY:
             #   normalized_uncertainty = 1
            #else:
             #   normalized_uncertainty = (1.0/MAX_UNCERTAINTY)*unormalized_uncertainty
            uncertainty.append((1.0 / MAX_UNCERTAINTY) * unormalized_uncertainty)
            if len(uncertainty)<window_size+window_lag:
                reward.append(0)
            else:
                current_avg = np.mean(uncertainty[-window_size:])
                prev_avg = np.mean(uncertainty[-(window_size+window_lag):-window_lag])
                if current_avg<prev_avg or uncertainty[-1]<.1:
                #if current_avg < prev_avg:
                    reward.append(1)
                else:
                    reward.append(0)

            #reward.append(-1*uncertainty[-1])
            #update return
            discount_vector = gamma*np.array(discount_vector)
            discounted_return+= (1.0*reward[-1])*discount_vector
            new_return = 1.0*reward[-1]
            list_discounted_return = list(discounted_return)
            list_discounted_return.append(new_return)
            discounted_return = np.array(list_discounted_return)

            list_discount_vector = list(discount_vector)
            list_discount_vector.append(1)
            discount_vector = np.array(list_discount_vector)
            iteration.append(n)
            if n>episode_length: break
            n+=1

        #Based on the return from the episode, update parameters of the policy model
        #Normalize returns by the length of episode
        #if episode_counter%10==0 and episode_counter>0: print(weight_saver[-1])

        prev_params = dict(params)
        condition = True
        if np.mean(pos_error)>10000:
            continue
            episode_condition = False
            episode_counter-=1
        #if episode_counter%100==0 and training:
            #print("Starting the evaluation phase...")
            #training = False
            #episode_condition = False
        condition = True
        if episode_condition and training:
            normalized_discounted_return = discounted_return
            episode_actions = s.sensor_actions
            #init_weight = np.array(weight)
            rate = gen_learning_rate(episode_counter,learning_rate,1E-8,10000)
            total_adjustment = np.zeros(np.shape(weight))
            for e in range(0,len(episode_actions)):
                #calculate gradiant
                state = np.array(episode_state[e]).reshape(len(episode_state[e]),1)

                #calculate gradient
                if method==0:
                    gradiant = ((episode_actions[e].reshape(2,1)-params[0]['weight'].dot(state)).dot(state.transpose()))/sigma**2#This is the gradiant
                elif method==1:
                    gradiant = ((episode_actions[e].reshape(2, 1) - params[1]['weight'].dot(state)).dot(
                        state.transpose())) / sigma ** 2  # This is the gradiant
                elif method==2:
                    #Gradient for MLP
                    pass

                if np.max(np.abs(gradiant))>1E2: continue #clip large gradients

                if method==0:
                    adjustment_term = gradiant*normalized_discounted_return[e]#an unbiased sample of return
                    params[0]['weight'] += rate * adjustment_term
                elif method==1:
                    adjustment_term = gradiant * normalized_discounted_return[e]  # an unbiased sample of return
                    params[1]['weight'] += rate * adjustment_term
                elif method==2:
                    #Gradient for MLP
                    pass

            #if not condition:
             #   weight = prev_weight
              #  continue

            episode_counter+=1
            #flatted_weights = list(weight[0, :]) + list(weight[1, :])
            #temp = []
            #[temp.append(str(x)) for x in flatted_weights]
            #weight_file.write("\t".join(temp)+"\n")
            #weight_saver1.append(weight[0][0])
            #weight_saver2.append(weight[0][1])
        else:
            #print("garbage trajectory: no-update")
            pass


        #if not training:
        return_saver.append(sum(reward))

        error_saver.append(np.mean(pos_error))

        #print(len(return_saver),n)
        if episode_counter%100 == 0 and episode_counter>0:
            # if episode_counter%100==0 and episode_counter>0:
            print(episode_counter, np.mean(return_saver), sigma)
            #print(params[method]['weight'])
            #weight = np.reshape(np.array(weights[episode_counter]), [2, 6])
            #print(weight)
            reward_file.write(str(np.mean(sorted(return_saver)[0:int(.95*len(return_saver))]))+"\n")
            error_file.write(str(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            error_file_median.write(str(np.median(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            var_error_file.write(str(np.var(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            var_file.write(str(np.var(sorted(return_saver)[0:int(.95*len(return_saver))]))+"\n")
            #weight_file.write(str(np.mean(return_saver)) + "\n")

            avg_reward.append(np.mean(sorted(return_saver)[0:int(.95*len(return_saver))]))
            avg_error.append(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))]))
            var_reward.append(np.var(return_saver))
            reward_file.close()
            var_file.close()
            error_file.close()
            error_file_median.close()
            var_error_file.close()
            weight_file.close()

            reward_file = open(
                result_folder + "reward_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            error_file = open(
                result_folder + "error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            var_file = open(
                result_folder + "var_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            var_error_file = open(
                result_folder + "var_error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                "a")
            weight_file = open(
                result_folder + "weight_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            error_file_median = open(
                result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                "a")

            return_saver = []
            error_saver = []
        num_episodes.append(n)

Example #7

Show file

File: multi_target_multi_sensor_jpdaf.py Project: gorjida/MultiSensorMutiTargetRL

            xt1 = []
            yt1 = []
            xt2 = []
            yt2 = []
            xt3 = []
            yt3 = []
            xt4 = []
            yt4 = []

            while episode_counter < N_max:
                sensor_locations = {}
                for weight_index in range(0, 1):
                    discounted_return = np.array([])
                    discount_vector = np.array([])
                    #print(episodes_counter)
                    scen = scenario(1, 1)
                    bearing_var = 1E-2  #variance of bearing measurement
                    #Target information
                    x = 10000 * np.random.random(
                        [num_targets]) - 5000  #initial x-location
                    y = 10000 * np.random.random(
                        [num_targets]) - 5000  #initial y-location
                    xdot = 10 * np.random.random([num_targets
                                                  ]) - 5  #initial xdot-value
                    ydot = 10 * np.random.random([num_targets
                                                  ]) - 5  #initial ydot-value
                    #TEMP

                    x = np.array([-2000, 2000, 4000, 2000])
                    y = np.array([-4000, -4000, -1000, -2000])
                    xdot = [2, -2, -4, -2]

Example #8

Show file

File: multiple_tracker_discrete_reward_with_internal_update_T210.py Project: gorjida/MultiSensorMutiTargetRL

def run(args):
#if __name__=="__main__":
    # initialize parameters of interest
    # Method:
    # 0: linear policy
    # 1: RBF policy
    # 2: MLP policy

    #method = args[0]
    #RBF_components = args[1]
    #MLP_neurons = args[2]
    process_index = args[3]
    folder_name = args[4]
    np.random.seed(process_index+100)
    #process_index = 0
    #np.random.seed(process_index + 100)
    #vel_var = args[5]
    #num_targets = args[6]

    method = 0
    RBF_components = 20
    MLP_neurons = 50
    vel_var = .001
    num_targets = min(6,max(2,np.random.poisson(3)))
    num_targets = np.random.randint(2,10)
    #num_targets = 4


    print("Starting Thread:" + str(process_index))

    #Initialize all the parameters
    params ={0:{},1:{},2:{}}
    if method==0:
        params[0]["weight2"] = np.random.normal(0, .3, [2, num_states_layer2])
        #params[0]["weight2"] = np.array([[  3.97573312,   0.4639474 ,   2.27280486,  12.9085868 ,
         #   3.45722461,   6.36735166],
         #[-11.87940874,   2.59549414,  -5.68556954,   2.87746786,
          #  7.08059984,   5.5631133 ]])

        params[0]["weight"] = np.array([[7.18777985, -13.68815256, 1.69010242, -5.62483187,
                           -4.30451483, 10.09592853],
                         [13.33104057, 13.60537864, 3.46939294, 0.8446329,
                         -14.79733566, -4.78599648]])

        #params[0]["weight"] = np.array([[ 1.45702249, -1.17664153, -0.11593174,  1.02967173, -0.25321044,
         #0.09052774],
       #[ 0.67730786,  0.3213561 ,  0.99580938, -2.39007038, -1.16340594,
        #-1.77515938]])
    elif method==1:
        featurizer = sklearn.pipeline.FeatureUnion([("rbf1", RBFSampler(gamma=rbf_var, n_components=RBF_components, random_state=1))])
        featurizer.fit(np.array(list_of_states))  # Use this featurizer for normalization
        params[1]["weight"] = np.random.normal(0, 1, [2, RBF_components])
    elif method==2:
        params[2]["weigh1"] = np.random.normal(0, 1, [MLP_neurons, num_states])
        params[2]["bias1"] = np.random.normal(0,1,[MLP_neurons,1])
        params[2]["weigh2"] = np.random.normal(0, 1, [2, MLP_neurons])
        params[2]["bias2"] = np.random.normal(0, 1, [2, 1])

    return_saver = []
    error_saver = []
    episode_counter = 0
    weight_saver1 = []
    weight_saver2 = []
    weight_saver2_1 = []
    weight_saver2_2 = []
    #for episode_counter in range(0,N_max):
    #Training parameters
    avg_reward = []
    avg_error = []
    var_reward = []
    training = True


    result_folder = base_path+folder_name+"/"
    reward_file = open(result_folder+"reward_noise:"+str(vel_var)+"_"+str(process_index)+  "_linear_6states.txt","a")
    error_file = open(result_folder + "error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    error_file_median = open(result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                      "a")
    var_file = open(result_folder + "var_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    var_error_file = open(result_folder + "var_error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    weight_file = open(result_folder + "weight_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")

    #flatten initial weight and store the values
    if method==0:
        weight = params[0]['weight']
        flatted_weights = list(weight[0, :]) + list(weight[1, :])
        temp = []
        [temp.append(str(x)) for x in flatted_weights]
        weight_file.write("\t".join(temp)+"\n")
    elif method==1:
        weight = params[1]['weight']
        flatted_weights = list(weight[0, :]) + list(weight[1, :])
        temp = []
        [temp.append(str(x)) for x in flatted_weights]
        weight_file.write("\t".join(temp) + "\n")
    elif method==2:
        pass

    #weight = np.reshape(np.array(weights[0]), [2, 6])
    init_max_target = 3
    num_targets = init_max_target
    while episode_counter<N_max:
        if episode_counter%1000==0 and episode_counter>0:
            init_max_target +=1
            init_max_target = min(20,init_max_target)
        
        if episode_counter%100==0 and episode_counter>0:
            num_targets = np.random.randint(3,init_max_target+1)
        sigma = gen_learning_rate(episode_counter,sigma_max,.1,5000)
        sigma = sigma_max
        discounted_return = np.array([])
        discount_vector = np.array([])
        #print(episodes_counter)
        scen = scenario(1,1)
        bearing_var = 1E-2#variance of bearing measurement
        #Target information
        x = 10000*np.random.random([num_targets])-5000#initial x-location
        y = 10000 * np.random.random([num_targets]) - 5000#initial y-location
        xdot = 10*np.random.random([num_targets])-5#initial xdot-value
        ydot = 10 * np.random.random([num_targets]) - 5#initial ydot-value

        #TEMP
        #x = [2000,-2000]
        #y = [2000,2000]
        #xdot = [1,1]
        #ydot = [-1,-1]

        init_target_state = []
        init_for_smc = []
        for target_counter in range(0,num_targets):
            init_target_state.append([x[target_counter],y[target_counter],xdot[target_counter],ydot[target_counter]])#initialize target state
            init_for_smc.append([x[target_counter]+np.random.normal(0,5),y[target_counter]
                                 +np.random.normal(0,5),np.random.normal(0,5),np.random.normal(0,5)])#init state for the tracker (tracker doesn't know about the initial state)


        #temp_loc = np.array(init_target_state[0:2]).reshape(2,1)
        #init_location_estimate = temp_loc+0*np.random.normal(np.zeros([2,1]),10)
        #init_location_estimate = [init_location_estimate[0][0],init_location_estimate[1][0]]
        #init_velocity_estimate = [6*random.random()-3,6*random.random()-3]
        #init_velocity_estimate = [init_target_state[2],init_target_state[3]]
        #init_estimate = init_location_estimate+init_velocity_estimate
        init_covariance = np.diag([MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY])#initial covariance of state estimation
        t = []
        for i in range(0,num_targets):
            t.append(target(init_target_state[i][0:2], init_target_state[i][2],
                            init_target_state[i][3], vel_var, vel_var, "CONS_V"))#constant-velocity model for target motion
        A, B = t[0].constant_velocity(1E-10)#Get motion model
        x_var = t[0].x_var
        y_var = t[0].y_var

        tracker_object = []
        for i in range(0,num_targets):
            tracker_object.append(EKF_tracker(init_for_smc[i], np.array(init_covariance), A,B,x_var,y_var,bearing_var))#create tracker object
            #smc_object = smc_tracker(A,B,x_var,y_var,bearing_var,1000,np.array(init_for_smc))

        #Initialize sensor object
        if method==0:
            s = sensor("POLICY_COMM_LINEAR")#create sensor object (stochastic policy)
        elif method==1:
            s = sensor("POLICY_COMM_RBF")
        elif method==2:
            s = sensor("POLICY_COMM_MLP")
        measure = measurement(bearing_var)#create measurement object

        m = []
        x_est = []; y_est = []; x_vel_est = []; y_vel_est = []
        x_truth = [];
        y_truth = [];
        x_vel_truth = [];
        y_vel_truth = []
        uncertainty = []
        vel_error = []
        pos_error = []
        iteration = []
        innovation = []
        for i in range(0,num_targets):
            x_truth.append([])
            y_truth.append([])
            x_vel_truth.append([])
            y_vel_truth.append([])
            uncertainty.append([])
            vel_error.append([])
            x_est.append([])
            y_est.append([])
            x_vel_est.append([])
            y_vel_est.append([])
            pos_error.append([])
            innovation.append([])
        reward = []
        episode_condition = True
        n=0
        violation = 0
        #store required information
        episode_state = []
        episode_state_out_layer = []
        episode_MLP_state = []
        episode_actions = []
        avg_uncertainty= []
        max_uncertainty = []

        while episode_condition:
            temp_m = []
            input_state_temp = []
            for i in range(0,num_targets):
                t[i].update_location()
                temp_m.append(measure.generate_bearing(t[i].current_location,s.current_location))

            m.append(temp_m)
            temp_reward = []
            target_actions = []
            for i in range(0,num_targets):
                tracker_object[i].update_states(s.current_location, m[-1][i])
                normalized_innovation = (tracker_object[i].innovation_list[-1])/tracker_object[i].innovation_var[-1]
                #print(normalized_innovation)
                #if (normalized_innovation<1E-4 or n<10) and n<200:
                    #end of episode
                current_state = list(tracker_object[i].x_k_k.reshape(len(tracker_object[i].x_k_k))) + list(s.current_location)

                #print(current_state)
                #state normalization
                x_slope = 2.0/(scen.x_max-scen.x_min)
                y_slope = 2.0 / (scen.y_max - scen.y_min)

                x_slope_sensor = 2.0 / (40000)
                y_slope_sensor = 2.0 / (40000)

                vel_slope = 2.0/(scen.vel_max-scen.vel_min)
                #normalization
                current_state[0] = -1+x_slope*(current_state[0]-scen.x_min)
                current_state[1] = -1 + y_slope * (current_state[1] - scen.y_min)
                current_state[2] = -1 + vel_slope * (current_state[2] - scen.vel_min)
                current_state[3] = -1 + vel_slope * (current_state[3] - scen.vel_min)
                current_state[4] = -1 + x_slope * (current_state[4] -scen.x_min)
                current_state[5] = -1 + y_slope * (current_state[5] - scen.y_min)


                #Refactor states based on the usage
                if method==0 or method==2:
                    input_state = current_state
                    input_state_temp.append(input_state) #store input-sates
                elif method==1:
                    #Generate states for the RBF input
                    input_state =  featurizer.transform(np.array(current_state).reshape(1,len(current_state)))
                    input_state = list(input_state[0])


                target_actions.append(s.generate_action(params,input_state,.01))
                estimate = tracker_object[i].x_k_k
                episode_state.append(input_state) ####Neeed to get modified
                if method==2: episode_MLP_state.append(extra_information) #need to get modified
                truth = t[i].current_location
                x_est[i].append(estimate[0])
                y_est[i].append(estimate[1])
                x_vel_est[i].append(estimate[2])
                y_vel_est[i].append(estimate[3])
                x_truth[i].append(truth[0])
                y_truth[i].append(truth[1])
                x_vel_truth[i].append(t[i].current_velocity[0])
                y_vel_truth[i].append(t[i].current_velocity[1])
                vel_error[i].append(np.linalg.norm(estimate[2:4]-np.array([t[i].current_velocity[0],t[i].current_velocity[1]]).reshape(2,1)))
                pos_error[i].append(np.linalg.norm(estimate[0:2]-np.array(truth).reshape(2,1)))
                innovation[i].append(normalized_innovation[0])
                unormalized_uncertainty = np.sum(tracker_object[i].p_k_k.diagonal())
                #if unormalized_uncertainty>MAX_UNCERTAINTY:
                #   normalized_uncertainty = 1
                #else:
                #   normalized_uncertainty = (1.0/MAX_UNCERTAINTY)*unormalized_uncertainty
                uncertainty[i].append((1.0 / MAX_UNCERTAINTY) * unormalized_uncertainty)
                #if len(uncertainty[i])<window_size+window_lag:
                 #   temp_reward.append(0)
                #else:
                 #   current_avg = np.mean(uncertainty[i][-window_size:])
                  #  prev_avg = np.mean(uncertainty[i][-(window_size+window_lag):-window_lag])
                   # if current_avg<prev_avg or uncertainty[i][-1]<.1:
                    #if current_avg < prev_avg:
                    #    temp_reward.append(1)
                    #else:
                     #   temp_reward.append(0)

            this_uncertainty = []
            [this_uncertainty.append(uncertainty[x][-1]) for x in range(0, num_targets)]
            avg_uncertainty.append(np.mean(this_uncertainty))
            max_uncertainty.append(np.max(this_uncertainty))
            if len(avg_uncertainty) < window_size + window_lag:
                reward.append(0)
            else:
                current_avg = np.mean(avg_uncertainty[-window_size:])
                prev_avg = np.mean(avg_uncertainty[-(window_size + window_lag):-window_lag])
                if current_avg < prev_avg or avg_uncertainty[-1] < .1:
                    # if current_avg < prev_avg:
                    reward.append(1)
                else:
                    reward.append(0)

            #voting
            #if np.mean(temp_reward)>.5:
             #   reward.append(np.mean(temp_reward))
            #else:
             #   reward.append(np.mean(temp_reward))

            #if sum(reward)>1100 and num_targets>2: sys.exit(1)

            #Do something on target_actions
            #Create feature-vector from generated target actions

            normalized_state,index_matrix1,index_matrix2,slope = s.update_location_decentralized(target_actions,sigma,params) #Update the sensor location based on all individual actions
            #index_matrix: an n_s \times T matrix that shows the derivative of state in the output layer to the action space in the internal-layer

            backpropagated_to_internal_1 = index_matrix1.dot(np.array(input_state_temp))#8 by 6
            backpropagated_to_internal_2 = index_matrix2.dot(np.array(input_state_temp))# 8 by 6

            episode_state_out_layer.append(normalized_state)
            episode_state.append([backpropagated_to_internal_1,backpropagated_to_internal_2]) #each entry would be a T \times 6 matrix with T being the number of targets
            #reward.append(-1*uncertainty[-1])
            #update return
            discount_vector = gamma*np.array(discount_vector)
            discounted_return+= (1.0*reward[-1])*discount_vector
            new_return = 1.0*reward[-1]
            list_discounted_return = list(discounted_return)
            list_discounted_return.append(new_return)
            discounted_return = np.array(list_discounted_return)

            list_discount_vector = list(discount_vector)
            list_discount_vector.append(1)
            discount_vector = np.array(list_discount_vector)
            iteration.append(n)
            if n>episode_length: break
            n+=1

        #Based on the return from the episode, update parameters of the policy model
        #Normalize returns by the length of episode
        #if episode_counter%10==0 and episode_counter>0: print(weight_saver[-1])

        prev_params = dict(params)
        condition = True
        for i in range(0,num_targets):
            if np.mean(pos_error[i])>10000:
                condition = False
                break
                episode_condition = False
                episode_counter-=1

        if not condition:
            #print("OOPSSSS...")
            continue
        #if episode_counter%100==0 and training:
            #print("Starting the evaluation phase...")
            #training = False
            #episode_condition = False


        condition = True
        if episode_condition and training:
            normalized_discounted_return = discounted_return
            episode_actions = s.sensor_actions
            #init_weight = np.array(weight)
            rate = gen_learning_rate(episode_counter,learning_rate,1E-12,20000)
            internal_rate = gen_learning_rate(episode_counter, 3*1E-5, 1E-15, 20000)
            total_adjustment = np.zeros(np.shape(weight))
            for e in range(0,len(episode_actions)):
                #calculate gradiant
                #state = np.array(episode_state[e]).reshape(len(episode_state[e]),1)
                out_state = np.array(episode_state_out_layer[e]).reshape(len(episode_state_out_layer[e]),1)
                backpropagated_terms = episode_state[e]

                #calculate gradient
                if method==0:
                    deriv_with_out_state = (episode_actions[e].reshape(2, 1) - params[0]['weight2'].dot(out_state)).transpose().dot(params[0]['weight2']) #1 by n_s==> derivative of F with respect to the output state-vector
                    internal_gradiant1 = deriv_with_out_state.dot(backpropagated_terms[0]) #1 by 6
                    internal_gradiant2 = deriv_with_out_state.dot(backpropagated_terms[1]) #1 by 6
                    internal_gradiant = np.concatenate([internal_gradiant1,internal_gradiant2])

                    #gradiant = ((episode_actions[e].reshape(2,1)-params[0]['weight'].dot(state)).dot(state.transpose()))/sigma**2#This is the gradiant
                    gradiant_out_layer = ((episode_actions[e].reshape(2, 1) - params[0]['weight2'].dot(out_state)).dot(
                        out_state.transpose())) / sigma ** 2  # This is the gradiant
                elif method==1:
                    gradiant = ((episode_actions[e].reshape(2, 1) - params[1]['weight'].dot(state)).dot(
                        state.transpose())) / sigma ** 2  # This is the gradiant
                elif method==2:
                    #Gradient for MLP
                    pass

                if np.max(np.abs(gradiant_out_layer))>1E2 or np.max(np.abs(internal_gradiant))>1E2:
                    #print("OOPPSSSS...")
                    continue #clip large gradients

                if method==0:
                    adjustment_term_out_layer = gradiant_out_layer*normalized_discounted_return[e]#an unbiased sample of return
                    adjustment_term_internal_layer = internal_gradiant*normalized_discounted_return[e]
                    params[0]['weight2'] += rate * adjustment_term_out_layer
                    params[0]['weight'] += internal_rate* adjustment_term_internal_layer
                elif method==1:
                    adjustment_term = gradiant * normalized_discounted_return[e]  # an unbiased sample of return
                    params[1]['weight'] += rate * adjustment_term
                elif method==2:
                    #Gradient for MLP
                    pass

            #if not condition:
             #   weight = prev_weight
              #  continue

            episode_counter+=1
            flatted_weights1 = list(params[0]['weight'][0, :]) + list(params[0]['weight'][1, :])
            flatted_weights2 = list(params[0]['weight2'][0, :]) + list(params[0]['weight2'][1, :])
            temp1 = []
            [temp1.append(str(x)) for x in flatted_weights1]
            temp2 = []
            [temp2.append(str(x)) for x in flatted_weights2]

            weight_file.write("\t".join(temp1)+"$$$"+"\t".join(temp2)+"\n")
            #flatted_weights = list(weight[0, :]) + list(weight[1, :])
            #temp = []
            #[temp.append(str(x)) for x in flatted_weights]
            #weight_file.write("\t".join(temp)+"\n")
            weight_saver1.append(params[0]['weight'][0][0])
            weight_saver2.append(params[0]['weight'][1][0])

            weight_saver2_1.append(params[0]['weight2'][0][0])
            weight_saver2_2.append(params[0]['weight2'][1][0])
        else:
            #print("garbage trajectory: no-update")
            pass


        #if not training:
        return_saver.append(sum(reward))

        error_saver.append(np.mean(pos_error))

        #print(len(return_saver),n)
        if episode_counter%100 == 0 and episode_counter>0:
            # if episode_counter%100==0 and episode_counter>0:
            print(episode_counter, np.mean(return_saver), sigma)
            #print(params[method]['weight'])
            #weight = np.reshape(np.array(weights[episode_counter]), [2, 6])
            #print(weight)
            reward_file.write(str(np.mean(sorted(return_saver,reverse=True)[0:int(.95*len(return_saver))]))+"\n")
            error_file.write(str(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            error_file_median.write(str(np.median(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            var_error_file.write(str(np.var(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            var_file.write(str(np.var(sorted(return_saver,reverse=True)[0:int(.95*len(return_saver))]))+"\n")
            #weight_file.write(str(np.mean(return_saver)) + "\n")

            avg_reward.append(np.mean(sorted(return_saver)[0:int(.95*len(return_saver))]))
            avg_error.append(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))]))
            var_reward.append(np.var(return_saver))
            reward_file.close()
            var_file.close()
            error_file.close()
            error_file_median.close()
            var_error_file.close()
            weight_file.close()

            reward_file = open(
                result_folder + "reward_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            error_file = open(
                result_folder + "error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            var_file = open(
                result_folder + "var_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            var_error_file = open(
                result_folder + "var_error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                "a")
            weight_file = open(
                result_folder + "weight_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            error_file_median = open(
                result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                "a")

            return_saver = []
            error_saver = []
        num_episodes.append(n)

Example #9

Show file

File: ltl_planner.py Project: SergioGarG/RobotSimulation

if __name__ == '__main__':
		
	robot_name='TIAGo'
	rospy.init_node('ltl_planner_%s' %robot_name)
    ########
	world = rospy.get_param('world_name')
	if len(sys.argv) == 2:
		world = str(sys.argv[1])
		print('Argument: %s.' %(str(sys.argv[1])))
	else:
		print 'No argument : world set automatically at %s' %str(world)
        # to run: python ltl_planner.py '<> (r2 && <>r3)'
        # to run: python ltl_planner.py '([]<> r2) && ([]<> r3) && ([]<> r1)'
        # to run: python ltl_planner.py '<> r2 && ([]<> r3) && ([]<> r1)'
    ###############
	while 1:
		if world == 'small_office' or 'tutorial_office' or 'tabletop_cube':
			scenario.scenario(world)

		try:
			robot_task = rospy.get_param('plan')
			#print 'param = %s' %(str('plan'))
			if str(robot_task) == 'none':
				sys.exit()
			print('Robot task: %s.' %(str(robot_task)))
			[robot_motion, init_pose, robot_action] = robot_model
			planner(robot_motion, init_pose, robot_action, robot_task)
		except rospy.ROSInterruptException:
			pass

Example #10

Show file

File: deploy.py Project: bconfais/benchmark

  'ssh': {
   'username': '******',
   'key': '/home/bastien/.ssh/id_rsa_grid5k'
  }
 }
})

if '__main__' == __name__:
 pw = getpass.getpass()
 config['iotlab']['ssh']['password'] = pw
 config['g5k']['ssh']['password'] = pw

 iotlab = iotlab_testbed(config.iotlab)
 g5k = g5k_testbed(config.g5k)

 sc = scenario(g5k, iotlab)
 sc.deploy()
 sc.play()

 # book nodes (2 nodes m3 and 1 a8 on iotlab)
# g5k.book_nodes()
# g5k.wait_nodes()
# iotlab.deploy('')


 # deploy g5k / install ipfs
 # deploy iotlab flash m3!
 # set ssh tunnel between g5k andn iotlab
 # put an object
 # read it

Example #11

Show file

File: multiple_tracker_discrete_reward_centralized_linear.py Project: gorjida/MultiSensorMutiTargetRL

def run(args):
#if __name__=="__main__":
    # initialize parameters of interest
    # Method:
    # 0: linear policy
    # 1: RBF policy
    # 2: MLP policy

    #args = [0,20,50,0,"TEST1",.001,10]

    method = args[0]
    RBF_components = args[1]
    MLP_neurons = args[2]
    process_index = args[3]
    folder_name = args[4]
    #process_index = args[4]
    np.random.seed(process_index + 100)
    vel_var = args[5]
    num_targets = args[6]




    #method = 0
    #RBF_components = 20
    #MLP_neurons = 50
    #vel_var = .001
    #num_targets = min(6,max(2,np.random.poisson(3)))
    #num_targets = 2

    print("Starting Thread:" + str(process_index))

    #Initialize all the parameters (input && output-layers)
    params ={0:{},1:{},2:{}}
    if method==0:
        params[0]["weight"] = np.random.normal(0, .3, [2, output_size]) #Output-layer (maps flattened states to the actions)
        #params[0]["weight"] = []
        #for f in range(0,filter_size):
         #   params[0]["weight"].append(np.random.normal(0,1,[spatial_weight_size,temporal_weight_size])) #Convolution weith matrix

        #params[0]["weight"] = np.array([[ 1.45702249, -1.17664153, -0.11593174,  1.02967173, -0.25321044,
         #0.09052774],
       #[ 0.67730786,  0.3213561 ,  0.99580938, -2.39007038, -1.16340594,
        #-1.77515938]])
    elif method==1:
        featurizer = sklearn.pipeline.FeatureUnion([("rbf1", RBFSampler(gamma=rbf_var, n_components=RBF_components, random_state=1))])
        featurizer.fit(np.array(list_of_states))  # Use this featurizer for normalization
        params[1]["weight"] = np.random.normal(0, 1, [2, RBF_components])
    elif method==2:
        params[2]["weigh1"] = np.random.normal(0, 1, [MLP_neurons, num_states])
        params[2]["bias1"] = np.random.normal(0,1,[MLP_neurons,1])
        params[2]["weigh2"] = np.random.normal(0, 1, [2, MLP_neurons])
        params[2]["bias2"] = np.random.normal(0, 1, [2, 1])

    return_saver = []
    error_saver = []
    episode_counter = 0
    weight_saver1 = []
    weight_saver2 = []
    #for episode_counter in range(0,N_max):
    #Training parameters
    avg_reward = []
    avg_error = []
    var_reward = []
    training = True


    result_folder = base_path+folder_name+"/"
    reward_file = open(result_folder+"reward_noise:"+str(vel_var)+"_"+str(process_index)+  "_linear_6states.txt","a")
    error_file = open(result_folder + "error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    error_file_median = open(result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                      "a")
    var_file = open(result_folder + "var_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    var_error_file = open(result_folder + "var_error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    weight_file = open(result_folder + "weight_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")

    #flatten initial weight and store the values
    if method==0:
        weight = params[0]['weight']
        flatted_weights = list(weight[0, :]) + list(weight[1, :])
        temp = []
        [temp.append(str(x)) for x in flatted_weights]
        weight_file.write("\t".join(temp)+"\n")
    elif method==1:
        weight = params[1]['weight']
        flatted_weights = list(weight[0, :]) + list(weight[1, :])
        temp = []
        [temp.append(str(x)) for x in flatted_weights]
        weight_file.write("\t".join(temp) + "\n")
    elif method==2:
        pass

    #weight = np.reshape(np.array(weights[0]), [2, 6])
    init_max_target = 3
    num_targets = 3
    while episode_counter<N_max:
        if episode_counter%1000==0 and episode_counter>0:
            init_max_target +=1
            init_max_target = min(10,init_max_target)

        if episode_counter%100==0 and episode_counter>0:
            num_targets = np.random.randint(3,init_max_target+1)
        num_targets = 3
        sigma = gen_learning_rate(episode_counter,sigma_max,.1,5000)
        sigma = sigma_max
        discounted_return = np.array([])
        discount_vector = np.array([])
        #print(episodes_counter)
        scen = scenario(1,1)
        bearing_var = 1E-2#variance of bearing measurement
        #Target information
        x = 10000*np.random.random([num_targets])-5000#initial x-location
        y = 10000 * np.random.random([num_targets]) - 5000#initial y-location
        xdot = 10*np.random.random([num_targets])-5#initial xdot-value
        ydot = 10 * np.random.random([num_targets]) - 5#initial ydot-value

        init_target_state = []
        init_for_smc = []
        for target_counter in range(0,num_targets):
            init_target_state.append([x[target_counter],y[target_counter],xdot[target_counter],ydot[target_counter]])#initialize target state
            init_for_smc.append([x[target_counter]+np.random.normal(0,5),y[target_counter]
                                 +np.random.normal(0,5),np.random.normal(0,5),np.random.normal(0,5)])#init state for the tracker (tracker doesn't know about the initial state)


        #temp_loc = np.array(init_target_state[0:2]).reshape(2,1)
        #init_location_estimate = temp_loc+0*np.random.normal(np.zeros([2,1]),10)
        #init_location_estimate = [init_location_estimate[0][0],init_location_estimate[1][0]]
        #init_velocity_estimate = [6*random.random()-3,6*random.random()-3]
        #init_velocity_estimate = [init_target_state[2],init_target_state[3]]
        #init_estimate = init_location_estimate+init_velocity_estimate
        init_covariance = np.diag([MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY])#initial covariance of state estimation
        t = []
        for i in range(0,num_targets):
            t.append(target(init_target_state[i][0:2], init_target_state[i][2],
                            init_target_state[i][3], vel_var, vel_var, "CONS_V"))#constant-velocity model for target motion
        A, B = t[0].constant_velocity(1E-10)#Get motion model
        x_var = t[0].x_var
        y_var = t[0].y_var

        tracker_object = []
        for i in range(0,num_targets):
            tracker_object.append(EKF_tracker(init_for_smc[i], np.array(init_covariance), A,B,x_var,y_var,bearing_var))#create tracker object
            #smc_object = smc_tracker(A,B,x_var,y_var,bearing_var,1000,np.array(init_for_smc))

        #Initialize sensor object
        if method==0:
            s = sensor("POLICY_COMM_LINEAR")#create sensor object (stochastic policy)
        elif method==1:
            s = sensor("POLICY_COMM_RBF")
        elif method==2:
            s = sensor("POLICY_COMM_MLP")
        measure = measurement(bearing_var)#create measurement object

        m = []
        x_est = []; y_est = []; x_vel_est = []; y_vel_est = []
        x_truth = [];
        y_truth = [];
        x_vel_truth = [];
        y_vel_truth = []
        uncertainty = []
        avg_uncertainty = []
        max_uncertainty = []

        vel_error = []
        pos_error = []
        iteration = []
        innovation = []
        for i in range(0,num_targets):
            x_truth.append([])
            y_truth.append([])
            x_vel_truth.append([])
            y_vel_truth.append([])
            uncertainty.append([])
            vel_error.append([])
            x_est.append([])
            y_est.append([])
            x_vel_est.append([])
            y_vel_est.append([])
            pos_error.append([])
            innovation.append([])
        reward = []
        episode_condition = True
        n=0
        violation = 0
        #store required information
        episode_state = []
        episode_state_out_layer = []
        episode_grad_with_state_w1 = []
        episode_grad_with_state_w2 = []

        episode_MLP_state = []
        episode_actions = []


        while episode_condition:
            temp_m = []
            for i in range(0,num_targets):
                t[i].update_location()
                temp_m.append(measure.generate_bearing(t[i].current_location,s.current_location))

            m.append(temp_m)
            temp_reward = []
            target_actions = []

            #create input-feature matrix
            input_state = np.zeros([num_states,num_targets]) #create a fixed-size matrix for input states


            for i in range(0,num_targets):
                tracker_object[i].update_states(s.current_location, m[-1][i])
                normalized_innovation = (tracker_object[i].innovation_list[-1])/tracker_object[i].innovation_var[-1]
                #print(normalized_innovation)
                #if (normalized_innovation<1E-4 or n<10) and n<200:
                    #end of episode
                current_state = list(tracker_object[i].x_k_k.reshape(len(tracker_object[i].x_k_k))) + list(s.current_location)

                #print(current_state)
                #state normalization
                x_slope = 2.0/(scen.x_max-scen.x_min)
                y_slope = 2.0 / (scen.y_max - scen.y_min)

                x_slope_sensor = 2.0 / (40000)
                y_slope_sensor = 2.0 / (40000)

                vel_slope = 2.0/(scen.vel_max-scen.vel_min)
                #normalization
                current_state[0] = -1+x_slope*(current_state[0]-scen.x_min)
                current_state[1] = -1 + y_slope * (current_state[1] - scen.y_min)
                current_state[2] = -1 + vel_slope * (current_state[2] - scen.vel_min)
                current_state[3] = -1 + vel_slope * (current_state[3] - scen.vel_min)
                current_state[4] = -1 + x_slope * (current_state[4] -scen.x_min)
                current_state[5] = -1 + y_slope * (current_state[5] - scen.y_min)

                if method==0 or method==2:input_state[:,i] = current_state


                #target_actions.append(s.generate_action(params,input_state,.1))
                estimate = tracker_object[i].x_k_k
                episode_state.append(input_state) ####Neeed to get modified
                if method==2: episode_MLP_state.append(extra_information) #need to get modified
                truth = t[i].current_location
                x_est[i].append(estimate[0])
                y_est[i].append(estimate[1])
                x_vel_est[i].append(estimate[2])
                y_vel_est[i].append(estimate[3])
                x_truth[i].append(truth[0])
                y_truth[i].append(truth[1])
                x_vel_truth[i].append(t[i].current_velocity[0])
                y_vel_truth[i].append(t[i].current_velocity[1])
                vel_error[i].append(np.linalg.norm(estimate[2:4]-np.array([t[i].current_velocity[0],t[i].current_velocity[1]]).reshape(2,1)))
                pos_error[i].append(np.linalg.norm(estimate[0:2]-np.array(truth).reshape(2,1)))
                innovation[i].append(normalized_innovation[0])
                unormalized_uncertainty = np.sum(tracker_object[i].p_k_k.diagonal())
                #if unormalized_uncertainty>MAX_UNCERTAINTY:
                #   normalized_uncertainty = 1
                #else:
                #   normalized_uncertainty = (1.0/MAX_UNCERTAINTY)*unormalized_uncertainty
                uncertainty[i].append((1.0 / MAX_UNCERTAINTY) * unormalized_uncertainty)

            #Featurize input-state using pooling
            input_state = list(np.max(input_state,axis=1))+list(np.min(input_state,axis=1))+\
                          list(np.mean(input_state, axis=1))+list(np.median(input_state,axis=1))

            this_uncertainty = []
            [this_uncertainty.append(uncertainty[x][-1]) for x in range(0,num_targets)]
            avg_uncertainty.append(np.mean(this_uncertainty))
            max_uncertainty.append(np.max(this_uncertainty))
            if len(avg_uncertainty) < window_size + window_lag:
                reward.append(0)
            else:
                current_avg = np.mean(avg_uncertainty[-window_size:])
                prev_avg = np.mean(avg_uncertainty[-(window_size + window_lag):-window_lag])
                if current_avg < prev_avg or avg_uncertainty[-1] < .1:
                    # if current_avg < prev_avg:
                    reward.append(1)
                else:
                    reward.append(0)

            #voting
            #if np.mean(temp_reward)>.5:
              #  reward.append(np.mean(temp_reward))
            #else:
             #   reward.append(np.mean(temp_reward))

            #if sum(reward)>1100 and num_targets>2: sys.exit(1)

            #Do something on target_actions
            #Create feature-vector from generated target actions

            s.update_location_new(params,np.array(input_state).reshape([len(input_state),1]),sigma)

            #Output created by the CNN
            episode_state_out_layer.append(input_state)


            #reward.append(-1*uncertainty[-1])
            #update return
            discount_vector = gamma*np.array(discount_vector)
            discounted_return+= (1.0*reward[-1])*discount_vector
            new_return = 1.0*reward[-1]
            list_discounted_return = list(discounted_return)
            list_discounted_return.append(new_return)
            discounted_return = np.array(list_discounted_return)

            list_discount_vector = list(discount_vector)
            list_discount_vector.append(1)
            discount_vector = np.array(list_discount_vector)
            iteration.append(n)
            if n>episode_length: break
            n+=1

        #Based on the return from the episode, update parameters of the policy model
        #Normalize returns by the length of episode
        #if episode_counter%10==0 and episode_counter>0: print(weight_saver[-1])


        prev_params = dict(params)
        condition = True
        for i in range(0,num_targets):
            if np.mean(pos_error[i])>10000:
                condition = False
                break
                episode_condition = False
                episode_counter-=1

        if not condition:
            #print("OOPSSSS...")
            continue
        condition = True

        prev_params = dict(params)
        if episode_condition and training:
            normalized_discounted_return = discounted_return
            episode_actions = s.sensor_actions
            #init_weight = np.array(weight)
            rate = gen_learning_rate(episode_counter,learning_rate,1E-8,10000)
            internal_rate = gen_learning_rate(episode_counter, 5*1E-5, 1E-9, 10000)
            total_adjustment = np.zeros(np.shape(weight))
            for e in range(0,len(episode_actions)):
                #calculate gradiant
                #state = np.array(episode_state[e]).reshape(len(episode_state[e]),1)
                out_state = np.array(episode_state_out_layer[e]).reshape(len(episode_state_out_layer[e]),1)

                #calculate gradient
                if method==0:
                    predicted_action = params[0]['weight'].dot(out_state)
                    #gradiant = ((episode_actions[e].reshape(2,1)-params[0]['weight'].dot(state)).dot(state.transpose()))/sigma**2#This is the gradiant
                    gradiant_out_layer = ((episode_actions[e].reshape(2, 1) - predicted_action).dot(
                        out_state.transpose())) / sigma ** 2  # This is the gradiant

                elif method==1:
                    gradiant = ((episode_actions[e].reshape(2, 1) - params[1]['weight'].dot(state)).dot(
                        state.transpose())) / sigma ** 2  # This is the gradiant
                elif method==2:
                    #Gradient for MLP
                    pass

                if np.max(np.abs(gradiant_out_layer))>1E2:# or np.max(np.abs(gradiant_internal[0]))>1E2:
                    #print("OOPPSSSS...")
                    continue #clip large gradients

                if method==0:
                    adjustment_term_out_layer = gradiant_out_layer*normalized_discounted_return[e]#an unbiased sample of return

                    params[0]['weight'] += rate * adjustment_term_out_layer
                elif method==1:
                    adjustment_term = gradiant * normalized_discounted_return[e]  # an unbiased sample of return
                    params[1]['weight'] += rate * adjustment_term
                elif method==2:
                    #Gradient for MLP
                    pass

            #if not condition:
             #   weight = prev_weight
              #  continue

            episode_counter+=1
            flatted_weights = list(params[0]['weight'][0,:]) + list(params[0]['weight'][1,:])
            temp = []
            [temp.append(str(x)) for x in flatted_weights]
            weight_file.write("\t".join(temp)+"\n")
            #weight_saver1.append(params[0]['weight'][0][0][0])
            #weight_saver2.append(params[0]['weight'][0][1][0])
        else:
            #print("garbage trajectory: no-update")
            pass


        #if not training:
        return_saver.append(sum(reward))

        error_saver.append(np.mean(pos_error))

        #print(len(return_saver),n)
        if episode_counter%100 == 0 and episode_counter>0:
            # if episode_counter%100==0 and episode_counter>0:
            print(episode_counter, np.mean(return_saver), sigma)
            #print(params[method]['weight'])
            #weight = np.reshape(np.array(weights[episode_counter]), [2, 6])
            #print(weight)
            reward_file.write(str(np.mean(sorted(return_saver)[0:int(.95*len(return_saver))]))+"\n")
            error_file.write(str(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            error_file_median.write(str(np.median(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            var_error_file.write(str(np.var(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            var_file.write(str(np.var(sorted(return_saver)[0:int(.95*len(return_saver))]))+"\n")
            #weight_file.write(str(np.mean(return_saver)) + "\n")

            avg_reward.append(np.mean(sorted(return_saver)[0:int(.95*len(return_saver))]))
            avg_error.append(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))]))
            var_reward.append(np.var(return_saver))
            reward_file.close()
            var_file.close()
            error_file.close()
            error_file_median.close()
            var_error_file.close()
            weight_file.close()

            reward_file = open(
                result_folder + "reward_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            error_file = open(
                result_folder + "error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            var_file = open(
                result_folder + "var_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            var_error_file = open(
                result_folder + "var_error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                "a")
            weight_file = open(
                result_folder + "weight_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            error_file_median = open(
                result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                "a")

            return_saver = []
            error_saver = []
        num_episodes.append(n)

Example #12

Show file

File: test.py Project: PSC-MAP05/Simulation

    for i in range(n_plat):
        liste[i] =m
    return liste
#PARTIE SIMULATION DES PREMIERES PERIODES AVEC PRIX AU HASARD
#définir listePRIX
liste_tab_Y = [[] for i in range(n_plat)]
liste_tab_X = [[] for i in range(n_plat)]
listeRevenus1 = [[] for i in range(n_plat)]
listeRevenus2 = [[] for i in range(n_plat)]
listeRevenus3 = [[] for i in range(n_plat)]

"""matriceOriginelle = matricefacto."""
for i in range(n_jours_training):
    print("Jour ", i)
    state = [stock_beg for i in range(n_plat)]
    scen = scenario.scenario(n_clients, n_periodes, n_plat)
    for pe in range(n_periodes-1):
        liste_prix = remplirListePrix(pe)

        listeDemandes, tab_notes = scen.simuler(pe, liste_prix)
        for plat in range(n_plat):
            state[plat] -= listeDemandes[plat]
            a = 0
            if state[plat] < 0:
                a = listeDemandes[plat]+state[plat]
            else:
                a = listeDemandes[plat]
            state[plat] = max(0, state[plat])
            listeRevenus1[plat].append(a*liste_prix[plat])

        # rajouter la partie NOTES....

Example #13

Show file

File: scen_output.py Project: ggomes/beats_python_tools

def load_reference_links(scen):
    if type(scen) is str:
        scen = scenario(scen)
    ReferenceLinks = collections.namedtuple("ReferenceLinks", "ids lengths lanes")
    links = scen.findAll('link')
    return ReferenceLinks([int(link['id']) for link in links], [float(link['length']) for link in links], [float(link['lanes']) for link in links])

Example #14

Show file

File: tracker_aws_1.py Project: gorjida/SensorManagementRl

    episode_counter = 0
    weight_saver1 = []
    weight_saver2 = []
    avg_reward = []
    var_reward = []
    list_of_states = []

    #Main loop to count number of valid episodes
    while episode_counter<N_max:
        #variable variance?
        #sigma = gen_learning_rate(episode_counter,sigma_max,.1,20000)
        sigma = sigma_max
        discounted_return = np.array([])
        discount_vector = np.array([])
        #print(episodes_counter)
        scen = scenario(1,1) #create scenario object (single-target, single-sensor)
        bearing_var = 1E-2#variance of bearing measurement
        #Initialize target location + velocity randomly
        x = 2000*random.random()-1000#initial x-location
        y = 2000 * random.random() - 1000#initial y-location
        xdot = 10*random.random()-5#initial xdot-value
        ydot = 10 * random.random() - 5#initial ydot-value

        init_target_state = [x,y,xdot,ydot]#initialize target state
        #Add noise to initial target location since the tracker doesn't know about the initial location
        init_for_smc = [x+np.random.normal(0,5),y+np.random.normal(0,5),np.random.normal(0,5),np.random.normal(0,5)]#init state for the tracker (tracker doesn't know about the initial state)
        #initialize sensor location randomly too
        init_sensor_state = [2000*random.random()-1000,2000 * random.random() - 1000,3,-2]#initial sensor-state
        temp_loc = np.array(init_target_state[0:2]).reshape(2,1)
        init_location_estimate = temp_loc+0*np.random.normal(np.zeros([2,1]),10)
        init_location_estimate = [init_location_estimate[0][0],init_location_estimate[1][0]]

Example #15

Show file

File: debug.py Project: PSC-MAP05/Simulation

import scenario

scen = scenario.scenario(10000, 24, 3)
somme1 = 0
somme2 = 0
somme3 = 0
for i in range(10000):
    tab = scen.simuler(12, [10, 10, 10])
    somme1 += tab[0]
    somme2 += tab[1]
    somme3 += tab[2]
print(somme1 / 10000)
print(somme2 / 10000)
print(somme3 / 10000)

Example #16

Show file

def run(args):
    # initialize parameters of interest
    # Method:
    # 0: linear policy
    # 1: RBF policy
    # 2: MLP policy

    vel_var = args[0]
    heading_rate = args[1]
    experiment_folder_name = args[2]
    file = open(
        base_path + "/" + experiment_folder_name + "/best_data_" +
        str(heading_rate), "w")
    # initialize actor parameters
    MAX_UNCERTAINTY = 1E9

    num_states = 6
    weight = np.random.normal(0, 1, [2, num_states])

    sigma_max = 1
    num_episodes = []
    gamma = .99

    episode_length = 1500
    learning_rate = 1E-3
    min_learning_rate = 1E-6
    N_max = 200

    window_size = 50
    window_lag = 10
    return_saver = []

    weight_saver1 = []
    weight_saver2 = []

    total_error = {}
    total_error_variance = {}
    total_reward = {}
    #for episode_counter in range(0,N_max):
    #Training parameters
    print("heading-rate=" + str(heading_rate))
    for xdot_sensor in np.arange(-15, 16, 1):
        for ydot_sensor in np.arange(-15, 16, 1):

            episode_counter = 0
            avg_reward = []
            var_reward = []
            error_saver = []
            while episode_counter < N_max:
                sigma = gen_learning_rate(episode_counter, sigma_max, .1, 5000)
                sigma = sigma_max
                discounted_return = np.array([])
                discount_vector = np.array([])
                #print(episodes_counter)
                scen = scenario(1, 1)
                bearing_var = 1E-2  #variance of bearing measurement
                #Target information
                x = 10000 * random.random() - 5000  #initial x-location
                y = 10000 * random.random() - 5000  #initial y-location
                xdot = 10 * random.random() - 5  #initial xdot-value
                ydot = 10 * random.random() - 5  #initial ydot-value
                #x = 250; y = 50; xdot = 7; ydot = -5

                init_target_state = [x, y, xdot,
                                     ydot]  #initialize target state
                init_for_smc = [
                    x + np.random.normal(0, 5), y + np.random.normal(0, 5),
                    np.random.normal(0, 5),
                    np.random.normal(0, 5)
                ]  #init state for the tracker (tracker doesn't know about the initial state)
                #init_for_smc = [x, y, xdot, ydot]
                init_sensor_state = [
                    10000 * random.random() - 5000,
                    10000 * random.random() - 5000, 3, -2
                ]  #initial sensor-state

                temp_loc = np.array(init_target_state[0:2]).reshape(2, 1)
                init_location_estimate = temp_loc + 0 * np.random.normal(
                    np.zeros([2, 1]), 10)
                init_location_estimate = [
                    init_location_estimate[0][0], init_location_estimate[1][0]
                ]
                init_velocity_estimate = [
                    6 * random.random() - 3, 6 * random.random() - 3
                ]
                init_velocity_estimate = [
                    init_target_state[2], init_target_state[3]
                ]

                init_estimate = init_location_estimate + init_velocity_estimate
                init_covariance = np.diag([
                    MAX_UNCERTAINTY, MAX_UNCERTAINTY, MAX_UNCERTAINTY,
                    MAX_UNCERTAINTY
                ])  #initial covariance of state estimation

                t = target(
                    init_target_state[0:2], init_target_state[2],
                    init_target_state[3], vel_var, vel_var,
                    "CONS_V")  #constant-velocity model for target motion
                A, B = t.constant_velocity(1E-10)  #Get motion model
                x_var = t.x_var
                y_var = t.y_var

                tracker_object = EKF_tracker(
                    init_for_smc, init_covariance, A, B, x_var, y_var,
                    bearing_var)  #create tracker object
                #smc_object = smc_tracker(A,B,x_var,y_var,bearing_var,1000,np.array(init_for_smc))

                s = sensor(
                    "CONS_V", [0, 0], [xdot_sensor, ydot_sensor],
                    heading_rate)  #create sensor object (stochastic policy)
                #s = sensor("CONS_V")
                measure = measurement(bearing_var)  #create measurement object

                m = []
                x_est = []
                y_est = []
                x_vel_est = []
                y_vel_est = []
                x_truth = []
                y_truth = []
                x_vel_truth = []
                y_vel_truth = []
                uncertainty = []

                vel_error = []
                pos_error = []
                iteration = []
                innovation = []

                reward = []
                episode_condition = True
                n = 0
                violation = 0
                #store required information
                episode_state = []
                episode_actions = []

                while episode_condition:

                    #if n>50: episode_condition=False
                    #update location of target and sensor + generate new measurement
                    #Also, run tracker object
                    t.update_location()
                    m.append(
                        measure.generate_bearing(t.current_location,
                                                 s.current_location))
                    tracker_object.update_states(s.current_location, m[-1])
                    #if len(tracker_object.meas_vec)>20:
                    #   tmp = np.zeros([2,2])
                    #  for n in range(0,10):
                    #     vector = tracker_object.meas_vec[-1-n]
                    #    cov = (vector.transpose().dot(vector))/bearing_var
                    #   sliced_cov = np.array([[cov[0,0],cov[0,1]],[cov[1,0],cov[1,1]]])
                    #  tmp+= sliced_cov

                    #Fisher_matrix = tmp/10.0
                    #crlb = np.linalg.inv(Fisher_matrix)
                    #print(crlb.diagonal())

                    #create state-vector

                    normalized_innovation = (
                        tracker_object.innovation_list[-1]
                    ) / tracker_object.innovation_var[-1]

                    #print(normalized_innovation)
                    #if (normalized_innovation<1E-4 or n<10) and n<200:
                    #end of episode
                    current_state = list(
                        tracker_object.x_k_k.reshape(len(
                            tracker_object.x_k_k))) + list(s.current_location)

                    #print(current_state)
                    #state normalization
                    x_slope = 2.0 / (scen.x_max - scen.x_min)
                    y_slope = 2.0 / (scen.y_max - scen.y_min)
                    vel_slope = 2.0 / (scen.vel_max - scen.vel_min)
                    #normalization
                    current_state[0] = -1 + x_slope * (current_state[0] -
                                                       scen.x_min)
                    current_state[1] = -1 + y_slope * (current_state[1] -
                                                       scen.y_min)
                    current_state[2] = -1 + vel_slope * (current_state[2] -
                                                         scen.vel_min)
                    current_state[3] = -1 + vel_slope * (current_state[3] -
                                                         scen.vel_min)
                    current_state[4] = -1 + x_slope * (current_state[4] -
                                                       scen.x_min)
                    current_state[5] = -1 + y_slope * (current_state[5] -
                                                       scen.y_min)
                    s.update_location(weight, sigma, np.array(current_state))
                    estimate = tracker_object.x_k_k
                    episode_state.append(current_state)

                    truth = t.current_location
                    x_est.append(estimate[0])
                    y_est.append(estimate[1])
                    x_vel_est.append(estimate[2])
                    y_vel_est.append(estimate[3])

                    x_truth.append(truth[0])
                    y_truth.append(truth[1])

                    x_vel_truth.append(t.current_velocity[0])
                    y_vel_truth.append(t.current_velocity[1])

                    #print(estimate[-1])
                    #print(np.linalg.norm(estimate[2:4]-np.array([t.current_velocity[0],t.current_velocity[1]])))
                    vel_error.append(
                        np.linalg.norm(estimate[2:4] - np.array([
                            t.current_velocity[0], t.current_velocity[1]
                        ]).reshape(2, 1)))
                    pos_error.append(
                        np.linalg.norm(estimate[0:2] -
                                       np.array(truth).reshape(2, 1)))
                    innovation.append(normalized_innovation[0])

                    unormalized_uncertainty = np.sum(
                        tracker_object.p_k_k.diagonal())
                    #if unormalized_uncertainty>MAX_UNCERTAINTY:
                    #   normalized_uncertainty = 1
                    #else:
                    #   normalized_uncertainty = (1.0/MAX_UNCERTAINTY)*unormalized_uncertainty

                    uncertainty.append(
                        (1.0 / MAX_UNCERTAINTY) * unormalized_uncertainty)
                    if len(uncertainty) < window_size + window_lag:
                        reward.append(0)
                    else:
                        current_avg = np.mean(uncertainty[-window_size:])
                        prev_avg = np.mean(
                            uncertainty[-(window_size +
                                          window_lag):-window_lag])
                        if current_avg < prev_avg or uncertainty[-1] < .1:
                            #if current_avg < prev_avg:
                            reward.append(1)
                        else:
                            reward.append(0)

                    #reward.append(-1*uncertainty[-1])
                    #update return

                    discount_vector = gamma * np.array(discount_vector)
                    #discount_vector = list(discount_vector)
                    #discount_vector.append(1)

                    discounted_return += (1.0 * reward[-1]) * discount_vector
                    new_return = 1.0 * reward[-1]
                    list_discounted_return = list(discounted_return)
                    list_discounted_return.append(new_return)
                    discounted_return = np.array(list_discounted_return)

                    list_discount_vector = list(discount_vector)
                    list_discount_vector.append(1)
                    discount_vector = np.array(list_discount_vector)

                    iteration.append(n)
                    if n > episode_length: break
                    n += 1
                num_episodes.append(n)
                error_saver.append(np.mean(pos_error))
                return_saver.append(sum(reward))
                episode_counter += 1

            total_error[str(xdot_sensor) + "|" + str(ydot_sensor)] = np.mean(
                sorted(error_saver)[0:int(.95 * N_max)])
            total_reward[str(xdot_sensor) + "|" + str(ydot_sensor)] = np.mean(
                sorted(return_saver, reverse=True)[0:int(.95 * N_max)])
            total_error_variance[str(xdot_sensor) + "|" +
                                 str(ydot_sensor)] = np.var(
                                     sorted(error_saver)[0:int(.95 * N_max)])

    sorted_error = sorted(total_error.items(), key=operator.itemgetter(1))
    key = sorted_error[0][0]

    file.write("Min Error=" + str(sorted_error[0][1]) + "\n")
    file.write("Best params=" + str(key) + "\n")
    file.close()