def state_func(self, action_vw, observed): # Find the closest obstacle coordinate. obstacles_pt, front_obstacle_r = self.MAP.get_closest_obstacle_v2(self.agent.state) if obstacles_pt is None: obstacles_pt = (self.sensor_r, np.pi) if front_obstacle_r is None: front_obstacle_r = self.sensor_r self.state = [] for i in range(self.num_targets): r_b, alpha_b = util.relative_distance_polar(self.belief_targets[i].state[:2], xy_base=self.agent.state[:2], theta_base=self.agent.state[2]) r_dot_b, alpha_dot_b = util.relative_velocity_polar( self.belief_targets[i].state[:2], self.belief_targets[i].state[2:], self.agent.state[:2], self.agent.state[2], action_vw[0], action_vw[1]) is_belief_blocked = self.MAP.is_blocked(self.agent.state[:2], self.belief_targets[i].state[:2]) self.state.extend([r_b, alpha_b, r_dot_b, alpha_dot_b, np.log(LA.det(self.belief_targets[i].cov)), float(observed[i]), float(is_belief_blocked)]) self.state.extend([obstacles_pt[0], obstacles_pt[1], front_obstacle_r]) self.state = np.array(self.state) # Update the visit frequency map. b_speed = np.mean([np.sqrt(np.sum(self.belief_targets[i].state[2:]**2)) for i in range(self.num_targets)]) decay_factor = np.exp(self.sampling_period*b_speed/self.sensor_r*np.log(0.7)) self.MAP.update_visit_freq_map(self.agent.state, decay_factor, observed=bool(np.mean(observed)))
def state_func(self, action_vw, observed): # Find the closest obstacle coordinate. obstacles_pt = self.MAP.get_closest_obstacle(self.agent.state) if obstacles_pt is None: obstacles_pt = (self.sensor_r, np.pi) self.state = [] for i in range(self.num_targets): r_b, alpha_b = util.relative_distance_polar( self.belief_targets[i].state[:2], xy_base=self.agent.state[:2], theta_base=self.agent.state[2]) self.state.extend([ r_b, alpha_b, np.log(LA.det(self.belief_targets[i].cov)), float(observed[i]) ]) self.state.extend([obstacles_pt[0], obstacles_pt[1]]) self.state = np.array(self.state) # Update the visit map for the evaluation purpose. if self.MAP.visit_map is not None: self.MAP.update_visit_freq_map(self.agent.state, 1.0, observed=bool(np.mean(observed)))
def update(self, z_t, x_t): """ Parameters -------- z_t : observation - radial and angular distances from the agent. x_t : agent state (x, y, orientation) in the global frame. """ # Kalman Filter Update r_pred, alpha_pred = util.relative_distance_polar( self.state[:2], x_t[:2], x_t[2]) diff_pred = np.array(self.state[:2]) - np.array(x_t[:2]) if self.dim == 2: Hmat = np.array([[diff_pred[0], diff_pred[1]], [-diff_pred[1] / r_pred, diff_pred[0] / r_pred] ]) / r_pred elif self.dim == 4: Hmat = np.array([ [diff_pred[0], diff_pred[1], 0.0, 0.0], [-diff_pred[1] / r_pred, diff_pred[0] / r_pred, 0.0, 0.0] ]) / r_pred else: raise ValueError('target dimension for KF must be either 2 or 4') innov = z_t - np.array([r_pred, alpha_pred]) innov[1] = util.wrap_around(innov[1]) R = np.matmul(np.matmul(Hmat, self.cov), Hmat.T) \ + self.obs_noise_func((r_pred, alpha_pred)) K = np.matmul(np.matmul(self.cov, Hmat.T), LA.inv(R)) C = np.eye(self.dim) - np.matmul(K, Hmat) self.cov = np.matmul(C, self.cov) self.state = np.clip(self.state + np.matmul(K, innov), self.limit[0], self.limit[1])
def update(self, z_t, x_t): # Kalman Filter Update r_pred, alpha_pred = util.relative_distance_polar( self.ukf.x[:2], x_t[:2], x_t[2]) self.ukf.update(z_t, R=self.obs_noise_func((r_pred, alpha_pred)), agent_state=x_t) self.cov = self.ukf.P self.state = np.clip(self.ukf.x, self.limit[0], self.limit[1])
def observation(self, target): r, alpha = util.relative_distance_polar(target.state[:2], xy_base=self.agent.state[:2], theta_base=self.agent.state[2]) observed = (r <= self.sensor_r) \ & (abs(alpha) <= self.fov/2/180*np.pi) \ & (not(self.MAP.is_blocked(self.agent.state, target.state))) z = None if observed: z = np.array([r, alpha]) z += np.random.multivariate_normal(np.zeros(2,), self.observation_noise(z)) return observed, z
def step(self, action): self.agent.update(action, self.targets.state) # Update the true target state self.targets.update() # Observe measurements = self.agent.observation(self.targets.target) obstacles_pt = self.MAP.get_closest_obstacle(self.agent.state) # Update the belief of the agent on the target using KF GaussianBelief = infoplanner.IGL.MultiTargetFilter(measurements, self.agent.agent, debug=False) self.agent.update_belief(GaussianBelief) self.belief_targets.update(self.agent.get_belief_state(), self.agent.get_belief_cov()) observed = [m.validity for m in measurements] reward, done, mean_nlogdetcov = self.get_reward( obstacles_pt, observed, self.is_training) if obstacles_pt is None: obstacles_pt = (self.sensor_r, np.pi) self.state = [] target_b_state = self.agent.get_belief_state() target_b_cov = self.agent.get_belief_cov() control_input = self.action_map[action] for n in range(self.num_targets): r_b, alpha_b = util.relative_distance_polar( target_b_state[self.target_dim * n:self.target_dim * n + 2], xy_base=self.agent.state[:2], theta_base=self.agent.state[2]) r_dot_b, alpha_dot_b = util.relative_velocity_polar( target_b_state[self.target_dim * n:self.target_dim * n + 2], target_b_state[self.target_dim * n + 2:], self.agent.state[:2], self.agent.state[2], control_input[0], control_input[1]) self.state.extend([ r_b, alpha_b, r_dot_b, alpha_dot_b, np.log( LA.det(target_b_cov[self.target_dim * n:self.target_dim * (n + 1), self.target_dim * n:self.target_dim * (n + 1)])), float(observed[n]) ]) self.state.extend([obstacles_pt[0], obstacles_pt[1]]) self.state = np.array(self.state) return self.state, reward, done, {'mean_nlogdetcov': mean_nlogdetcov}
def update(self, observed, z_t, x_t, u_t=None): # Kalman Filter Update self.ukf.predict(u=u_t) if observed: r_pred, alpha_pred = util.relative_distance_polar( self.ukf.x[:2], x_t[:2], x_t[2]) self.ukf.update(z_t, R=self.obs_noise_func((r_pred, alpha_pred)), agent_state=x_t) cov_new = self.ukf.P state_new = self.ukf.x if LA.det(cov_new) < 1e6: self.cov = cov_new if not (self.collision_func(state_new[:2])): self.state = np.clip(state_new, self.limit[0], self.limit[1])
def reset(self, **kwargs): self.state = [] t_init_sets = [] t_init_b_sets = [] init_pose = self.get_init_pose(**kwargs) a_init_igl = infoplanner.IGL.SE3Pose(init_pose['agent'], np.array([0, 0, 0, 1])) for i in range(self.num_targets): t_init_b_sets.append(init_pose['belief_targets'][i][:2]) t_init_sets.append(init_pose['targets'][i][:2]) r, alpha = util.relative_distance_polar( np.array(t_init_b_sets[-1][:2]), xy_base=np.array(init_pose['agent'][:2]), theta_base=init_pose['agent'][2]) logdetcov = np.log( LA.det(self.target_init_cov * np.eye(self.target_dim))) self.state.extend([r, alpha, 0.0, 0.0, logdetcov, 0.0]) self.state.extend([self.sensor_r, np.pi]) self.state = np.array(self.state) # Build a target target = self.cfg.setup_integrator_targets( n_targets=self.num_targets, init_pos=t_init_sets, init_vel=self.target_init_vel, q=METADATA['const_q_true'], max_vel=METADATA['target_speed_limit'] ) # Integrator Ground truth Model belief_target = self.cfg.setup_integrator_belief( n_targets=self.num_targets, q=METADATA['const_q'], init_pos=t_init_b_sets, cov_pos=self.target_init_cov, cov_vel=self.target_init_cov, init_vel=(0.0, 0.0)) # Build a robot self.agent.reset(a_init_igl, belief_target) self.targets.reset(target) self.belief_targets.update(self.agent.get_belief_state(), self.agent.get_belief_cov()) return np.array(self.state)
def reset(self, **kwargs): self.state = [] init_pose = self.get_init_pose(**kwargs) self.agent.reset(init_pose['agent']) for i in range(self.num_targets): self.belief_targets[i].reset(init_state=np.concatenate( (init_pose['belief_targets'][i], np.zeros(2))), init_cov=self.target_init_cov) t_init = np.concatenate( (init_pose['targets'][i], [self.target_init_vel[0], 0.0])) self.targets[i].reset(t_init) self.targets[i].policy.reset(t_init) r, alpha = util.relative_distance_polar( self.belief_targets[i].state[:2], xy_base=self.agent.state[:2], theta_base=self.agent.state[2]) logdetcov = np.log(LA.det(self.belief_targets[i].cov)) self.state.extend([r, alpha, 0.0, 0.0, logdetcov, 0.0]) self.state.extend([self.sensor_r, np.pi]) self.state = np.array(self.state) return self.state
def step(self, action): action_vw = self.action_map[action] is_col = self.agent.update(action_vw, [t.state[:2] for t in self.targets]) obstacles_pt = self.MAP.get_closest_obstacle(self.agent.state) observed = [] for i in range(self.num_targets): self.targets[i].update() # Observe obs = self.observation(self.targets[i]) observed.append(obs[0]) # Update the belief of the agent on the target using UKF self.belief_targets[i].update( obs[0], obs[1], self.agent.state, np.array([ np.random.random(), np.pi * np.random.random() - 0.5 * np.pi ])) reward, done, mean_nlogdetcov = self.get_reward(self.is_training, is_col=is_col) self.state = [] if obstacles_pt is None: obstacles_pt = (self.sensor_r, np.pi) for i in range(self.num_targets): r_b, alpha_b = util.relative_distance_polar( self.belief_targets[i].state[:2], xy_base=self.agent.state[:2], theta_base=self.agent.state[2]) r_dot_b, alpha_dot_b = util.relative_velocity_polar_se2( self.belief_targets[i].state[:3], self.belief_targets[i].state[3:], self.agent.state, action_vw) self.state.extend([ r_b, alpha_b, r_dot_b, alpha_dot_b, np.log(LA.det(self.belief_targets[i].cov)), float(observed[i]) ]) self.state.extend([obstacles_pt[0], obstacles_pt[1]]) self.state = np.array(self.state) return self.state, reward, done, {'mean_nlogdetcov': mean_nlogdetcov}