def state_func(self, action_vw, observed): # Find the closest obstacle coordinate. obstacles_pt, front_obstacle_r = self.MAP.get_closest_obstacle_v2(self.agent.state) if obstacles_pt is None: obstacles_pt = (self.sensor_r, np.pi) if front_obstacle_r is None: front_obstacle_r = self.sensor_r self.state = [] for i in range(self.num_targets): r_b, alpha_b = util.relative_distance_polar(self.belief_targets[i].state[:2], xy_base=self.agent.state[:2], theta_base=self.agent.state[2]) r_dot_b, alpha_dot_b = util.relative_velocity_polar( self.belief_targets[i].state[:2], self.belief_targets[i].state[2:], self.agent.state[:2], self.agent.state[2], action_vw[0], action_vw[1]) is_belief_blocked = self.MAP.is_blocked(self.agent.state[:2], self.belief_targets[i].state[:2]) self.state.extend([r_b, alpha_b, r_dot_b, alpha_dot_b, np.log(LA.det(self.belief_targets[i].cov)), float(observed[i]), float(is_belief_blocked)]) self.state.extend([obstacles_pt[0], obstacles_pt[1], front_obstacle_r]) self.state = np.array(self.state) # Update the visit frequency map. b_speed = np.mean([np.sqrt(np.sum(self.belief_targets[i].state[2:]**2)) for i in range(self.num_targets)]) decay_factor = np.exp(self.sampling_period*b_speed/self.sensor_r*np.log(0.7)) self.MAP.update_visit_freq_map(self.agent.state, decay_factor, observed=bool(np.mean(observed)))
def state_func(self, action_vw, observed): # Find the closest obstacle coordinate. obstacles_pt = self.MAP.get_closest_obstacle(self.agent.state) if obstacles_pt is None: obstacles_pt = (self.sensor_r, np.pi) self.state = [] for i in range(self.num_targets): r_b, alpha_b = util.relative_distance_polar( self.belief_targets[i].state[:2], xy_base=self.agent.state[:2], theta_base=self.agent.state[2]) r_dot_b, alpha_dot_b = util.relative_velocity_polar( self.belief_targets[i].state[:2], self.belief_targets[i].state[2:], self.agent.state[:2], self.agent.state[2], action_vw[0], action_vw[1]) self.state.extend([ r_b, alpha_b, r_dot_b, alpha_dot_b, np.log(LA.det(self.belief_targets[i].cov)), float(observed[i]) ]) self.state.extend([obstacles_pt[0], obstacles_pt[1]]) self.state = np.array(self.state) # Update the visit map when there is any target not observed for the evaluation purpose. if self.MAP.visit_map is not None: self.MAP.update_visit_freq_map(self.agent.state, 1.0, observed=bool(np.mean(observed)))
def step(self, action): self.agent.update(action, self.targets.state) # Update the true target state self.targets.update() # Observe measurements = self.agent.observation(self.targets.target) obstacles_pt = self.MAP.get_closest_obstacle(self.agent.state) # Update the belief of the agent on the target using KF GaussianBelief = infoplanner.IGL.MultiTargetFilter(measurements, self.agent.agent, debug=False) self.agent.update_belief(GaussianBelief) self.belief_targets.update(self.agent.get_belief_state(), self.agent.get_belief_cov()) observed = [m.validity for m in measurements] reward, done, mean_nlogdetcov = self.get_reward( obstacles_pt, observed, self.is_training) if obstacles_pt is None: obstacles_pt = (self.sensor_r, np.pi) self.state = [] target_b_state = self.agent.get_belief_state() target_b_cov = self.agent.get_belief_cov() control_input = self.action_map[action] for n in range(self.num_targets): r_b, alpha_b = util.relative_distance_polar( target_b_state[self.target_dim * n:self.target_dim * n + 2], xy_base=self.agent.state[:2], theta_base=self.agent.state[2]) r_dot_b, alpha_dot_b = util.relative_velocity_polar( target_b_state[self.target_dim * n:self.target_dim * n + 2], target_b_state[self.target_dim * n + 2:], self.agent.state[:2], self.agent.state[2], control_input[0], control_input[1]) self.state.extend([ r_b, alpha_b, r_dot_b, alpha_dot_b, np.log( LA.det(target_b_cov[self.target_dim * n:self.target_dim * (n + 1), self.target_dim * n:self.target_dim * (n + 1)])), float(observed[n]) ]) self.state.extend([obstacles_pt[0], obstacles_pt[1]]) self.state = np.array(self.state) return self.state, reward, done, {'mean_nlogdetcov': mean_nlogdetcov}