Esempio n. 1
0
class Player_vi:
    
    def __init__(self, params, env_map):
        self.world_params = params['world']
        self.robot_params = params['robot']
        self.sensor_params = params['sensor']
        self.num_particles = params['sim']['num_particles']
        self.goal_pos = params['sim']['goal_pos']
        self.goal_radius = params['sim']['goal_radius']
        self.map_belief = env_map
        self.state_belief = np.zeros(2)

        # Belief map
        self.timestep = 0
        self.w_slow = None
        self.w_fast = None

        l,w = self.map_belief.shape        

        # Discretized Actions
        self.actions = [{'dturn':dt, 'dmove':dm} for dt in [-np.pi/4, 0, np.pi/4] for dm in [2,-2]]

        # Value map from value iteration algorithm
        trans_sim = lambda state, act : calc_move(self.robot_params, state, act, self.map_belief)
        self.vi = ValueIteration(env_map, trans_sim, self.actions, self.reward)
        self.vi.solve()


        # Particle filter
        # Each row represents one particle of (pos_x, pos_y, orient)
        self.particles = self.gen_rand_particles(self.num_particles)


    def reward(self, x, y):
        goalx, goaly = self.goal_pos
        dist = np.sqrt((x - goalx)**2 + (y - goaly)**2)
        # Add penalty for being close to walls
        lst = [self.map_belief[x + dx, y + dy] for dx in [-1,0,1] for dy in [-1,0,1]]
        if sum(lst) > 0:
            wall_penalty = -100
        else:
            wall_penalty = 0
        if dist <= self.goal_radius:
            return 1000
        else:
            return -1 + wall_penalty

    # Generate matrix of random particles (num x state dimensions)
    def gen_rand_particles(self,num_particles):
        l,w = self.map_belief.shape        
        particles = np.random.rand(num_particles,3)
        particles[:,0] *= l
        particles[:,1] *= w
        particles[:,2] *= np.pi * 2
        return particles

    def pred_step(self, action):
        # Update Particles in prediction step
        tmp = np.zeros(self.particles.shape)
        for i in np.arange(self.num_particles):
            state = self.particles[i,:]
            tmp[i,:] = calc_move(self.robot_params, state, action, self.map_belief)
        self.particles = tmp

    # Calculate updated beliefs based on sensor readings
    # Must be issue in measurement model - not giving good results for p(z|x)
    def update_belief(self, sensor_readings):

        alpha_slow = 0.05
        alpha_fast = 0.5

        # Measurement Update step
        sensor_type = 'radar' # would need to extend for addition sensors
        actual_m = sensor_readings['radar']
        weights = np.zeros(self.num_particles)
        for i in np.arange(self.num_particles):
            pred_state = self.particles[i,:]
            pred_readings = read_sensor(self.sensor_params, pred_state, self.map_belief)
            logprob_m = 0
            for j in np.arange(len(actual_m)):
                # Conditional prob of pred_m given actual_m - predict with scaled up sensor noise
                logprob_m += norm.logpdf(x=pred_readings[j], loc=actual_m[j], 
                                         scale=self.sensor_params['noise_sigma'] * 10)
            weights[i] = np.exp(logprob_m)
        
        # Short and long term prob trends
        w_avg = np.mean(weights)
        if not self.w_slow and not self.w_fast:
            self.w_slow = w_avg
            self.w_fast = w_avg
        else:
            self.w_slow = self.w_slow + alpha_slow * (w_avg - self.w_slow)
            self.w_fast = self.w_fast + alpha_fast * (w_avg - self.w_fast)

        print (self.w_fast, self.w_slow, 1 - self.w_fast/self.w_slow)

        # Renormalize weights
        weights = weights / np.sum(weights)
        
        # Resample particles based on new weights
        sample_idx = np.random.choice(np.arange(self.num_particles),
                                      p=weights, size=self.num_particles)
        self.particles = self.particles[sample_idx,:]

        # Random sample based on long term trends
        rand_ind = np.random.rand(self.num_particles) < (1 - self.w_fast/self.w_slow)
        self.particles[rand_ind,:] = self.gen_rand_particles(np.sum(rand_ind))
    
    # Return action of a robot given map of the environment
    # action is a dictionary object with the following keys:
    #       dturn : degrees (radians) to turn robot
    #       dmove : value to move forward
    def get_action(self, readings):
        # Find ML estimate of position from particles

        # discretize action set

        act_vals = np.zeros((self.num_particles, len(self.actions)));
        for i in range(self.num_particles):
            for j in range(len(self.actions)):
                state = self.particles[i,:]
                action = self.actions[j]
                [nsx,nsy,nso] = calc_move(self.robot_params, state, action, self.map_belief)
                # lookup value at new state
                act_vals[i,j] = self.vi.get_val((nsx,nsy,nso))
        # Take greedy action with the highest expected value
        sum_vals = np.sum(act_vals,axis=0)
        max_ind = np.argmax(sum_vals)
        return self.actions[max_ind]


    # Returns the belief state of the robot
    def get_bstate(self):
        state = {'world':self.map_belief}
        state['robots'] = {0:Car(self.robot_params, [15,15,0])}
        state['particles'] = self.particles
        return state