コード例 #1
0
ファイル: Player_vi.py プロジェクト: electric26/MCPOMDP
    def __init__(self, params, env_map):
        self.world_params = params['world']
        self.robot_params = params['robot']
        self.sensor_params = params['sensor']
        self.num_particles = params['sim']['num_particles']
        self.goal_pos = params['sim']['goal_pos']
        self.goal_radius = params['sim']['goal_radius']
        self.map_belief = env_map
        self.state_belief = np.zeros(2)

        # Belief map
        self.timestep = 0
        self.w_slow = None
        self.w_fast = None

        l,w = self.map_belief.shape        

        # Discretized Actions
        self.actions = [{'dturn':dt, 'dmove':dm} for dt in [-np.pi/4, 0, np.pi/4] for dm in [2,-2]]

        # Value map from value iteration algorithm
        trans_sim = lambda state, act : calc_move(self.robot_params, state, act, self.map_belief)
        self.vi = ValueIteration(env_map, trans_sim, self.actions, self.reward)
        self.vi.solve()


        # Particle filter
        # Each row represents one particle of (pos_x, pos_y, orient)
        self.particles = self.gen_rand_particles(self.num_particles)
コード例 #2
0
ファイル: Player_vi.py プロジェクト: electric26/MCPOMDP
 def pred_step(self, action):
     # Update Particles in prediction step
     tmp = np.zeros(self.particles.shape)
     for i in np.arange(self.num_particles):
         state = self.particles[i,:]
         tmp[i,:] = calc_move(self.robot_params, state, action, self.map_belief)
     self.particles = tmp
コード例 #3
0
ファイル: Player_vi.py プロジェクト: electric26/MCPOMDP
    def get_action(self, readings):
        # Find ML estimate of position from particles

        # discretize action set

        act_vals = np.zeros((self.num_particles, len(self.actions)));
        for i in range(self.num_particles):
            for j in range(len(self.actions)):
                state = self.particles[i,:]
                action = self.actions[j]
                [nsx,nsy,nso] = calc_move(self.robot_params, state, action, self.map_belief)
                # lookup value at new state
                act_vals[i,j] = self.vi.get_val((nsx,nsy,nso))
        # Take greedy action with the highest expected value
        sum_vals = np.sum(act_vals,axis=0)
        max_ind = np.argmax(sum_vals)
        return self.actions[max_ind]