class Player_vi: def __init__(self, params, env_map): self.world_params = params['world'] self.robot_params = params['robot'] self.sensor_params = params['sensor'] self.num_particles = params['sim']['num_particles'] self.goal_pos = params['sim']['goal_pos'] self.goal_radius = params['sim']['goal_radius'] self.map_belief = env_map self.state_belief = np.zeros(2) # Belief map self.timestep = 0 self.w_slow = None self.w_fast = None l,w = self.map_belief.shape # Discretized Actions self.actions = [{'dturn':dt, 'dmove':dm} for dt in [-np.pi/4, 0, np.pi/4] for dm in [2,-2]] # Value map from value iteration algorithm trans_sim = lambda state, act : calc_move(self.robot_params, state, act, self.map_belief) self.vi = ValueIteration(env_map, trans_sim, self.actions, self.reward) self.vi.solve() # Particle filter # Each row represents one particle of (pos_x, pos_y, orient) self.particles = self.gen_rand_particles(self.num_particles) def reward(self, x, y): goalx, goaly = self.goal_pos dist = np.sqrt((x - goalx)**2 + (y - goaly)**2) # Add penalty for being close to walls lst = [self.map_belief[x + dx, y + dy] for dx in [-1,0,1] for dy in [-1,0,1]] if sum(lst) > 0: wall_penalty = -100 else: wall_penalty = 0 if dist <= self.goal_radius: return 1000 else: return -1 + wall_penalty # Generate matrix of random particles (num x state dimensions) def gen_rand_particles(self,num_particles): l,w = self.map_belief.shape particles = np.random.rand(num_particles,3) particles[:,0] *= l particles[:,1] *= w particles[:,2] *= np.pi * 2 return particles def pred_step(self, action): # Update Particles in prediction step tmp = np.zeros(self.particles.shape) for i in np.arange(self.num_particles): state = self.particles[i,:] tmp[i,:] = calc_move(self.robot_params, state, action, self.map_belief) self.particles = tmp # Calculate updated beliefs based on sensor readings # Must be issue in measurement model - not giving good results for p(z|x) def update_belief(self, sensor_readings): alpha_slow = 0.05 alpha_fast = 0.5 # Measurement Update step sensor_type = 'radar' # would need to extend for addition sensors actual_m = sensor_readings['radar'] weights = np.zeros(self.num_particles) for i in np.arange(self.num_particles): pred_state = self.particles[i,:] pred_readings = read_sensor(self.sensor_params, pred_state, self.map_belief) logprob_m = 0 for j in np.arange(len(actual_m)): # Conditional prob of pred_m given actual_m - predict with scaled up sensor noise logprob_m += norm.logpdf(x=pred_readings[j], loc=actual_m[j], scale=self.sensor_params['noise_sigma'] * 10) weights[i] = np.exp(logprob_m) # Short and long term prob trends w_avg = np.mean(weights) if not self.w_slow and not self.w_fast: self.w_slow = w_avg self.w_fast = w_avg else: self.w_slow = self.w_slow + alpha_slow * (w_avg - self.w_slow) self.w_fast = self.w_fast + alpha_fast * (w_avg - self.w_fast) print (self.w_fast, self.w_slow, 1 - self.w_fast/self.w_slow) # Renormalize weights weights = weights / np.sum(weights) # Resample particles based on new weights sample_idx = np.random.choice(np.arange(self.num_particles), p=weights, size=self.num_particles) self.particles = self.particles[sample_idx,:] # Random sample based on long term trends rand_ind = np.random.rand(self.num_particles) < (1 - self.w_fast/self.w_slow) self.particles[rand_ind,:] = self.gen_rand_particles(np.sum(rand_ind)) # Return action of a robot given map of the environment # action is a dictionary object with the following keys: # dturn : degrees (radians) to turn robot # dmove : value to move forward def get_action(self, readings): # Find ML estimate of position from particles # discretize action set act_vals = np.zeros((self.num_particles, len(self.actions))); for i in range(self.num_particles): for j in range(len(self.actions)): state = self.particles[i,:] action = self.actions[j] [nsx,nsy,nso] = calc_move(self.robot_params, state, action, self.map_belief) # lookup value at new state act_vals[i,j] = self.vi.get_val((nsx,nsy,nso)) # Take greedy action with the highest expected value sum_vals = np.sum(act_vals,axis=0) max_ind = np.argmax(sum_vals) return self.actions[max_ind] # Returns the belief state of the robot def get_bstate(self): state = {'world':self.map_belief} state['robots'] = {0:Car(self.robot_params, [15,15,0])} state['particles'] = self.particles return state