def observation(self, target, agent): r, alpha = util.relative_distance_polar(target.state[:2], xy_base=agent.state[:2], theta_base=agent.state[2]) # observed is a bool for capturing targets with short range sensor observed = (r <= self.sensor_r) \ & (abs(alpha) <= self.fov/2/180*np.pi) \ & (not(map_utils.is_blocked(self.MAP, agent.state, target.state))) # spotted is a bool for scouting targets with long range sensor spotted = (r <= self.sensor_r_long) \ & (abs(alpha) <= self.fov/2/180*np.pi) \ & (not(map_utils.is_blocked(self.MAP, agent.state, target.state))) z = None if spotted: z = np.array([r, alpha]) z += self.np_random.multivariate_normal(np.zeros(2, ), self.observation_noise(z)) if observed: z = np.array([r, alpha]) # z += np.random.multivariate_normal(np.zeros(2,), self.observation_noise(z)) z += self.np_random.multivariate_normal(np.zeros(2, ), self.observation_noise(z)) '''For some reason, self.np_random is needed only here instead of np.random in order for the RNG seed to work, if used in the gen_rand_pose functions RNG seed will NOT work ''' return observed, z, spotted
def reset(self, **kwargs): """ Random initialization a number of agents and targets at the reset of the env epsiode. Agents are given random positions in the map, targets are given random positions near a random agent. Return an observation state dict with agent ids (keys) that refer to their observation """ self.rng = np.random.default_rng() try: # self.nb_agents = kwargs['nb_agents'] self.nb_targets = kwargs['nb_targets'] except: # self.nb_agents = np.random.random_integers(1, self.num_agents) self.nb_targets = np.random.random_integers(1, self.num_targets) obs_dict = {} init_pose = self.get_init_pose(**kwargs) # Initialize agents for ii in range(self.nb_agents): self.agents[ii].reset(init_pose['agents'][ii]) obs_dict[self.agents[ii].agent_id] = [] # Initialize targets and beliefs for nn in range(self.nb_targets): self.belief_targets[nn].reset(init_state=np.concatenate( (init_pose['belief_targets'][nn], np.zeros(2))), init_cov=self.target_init_cov) t_init = np.concatenate( (init_pose['targets'][nn], [self.target_init_vel[0], 0.0])) self.targets[nn].reset(t_init) # For nb agents calculate belief of targets assigned for jj in range(self.nb_targets): for kk in range(self.nb_agents): r, alpha = util.relative_distance_polar( self.belief_targets[jj].state[:2], xy_base=self.agents[kk].state[:2], theta_base=self.agents[kk].state[2]) r_perim, a_perim = util.relative_distance_polar( self.origin_init_pos[:2], xy_base=self.agents[kk].state[:2], theta_base=self.agents[kk].state[2]) logdetcov = np.log(LA.det(self.belief_targets[jj].cov)) obs_dict[self.agents[kk].agent_id].append([ r, alpha, 0.0, 0.0, logdetcov, 0.0, 0.0, 0.0, r_perim, a_perim ]) for agent_id in obs_dict: obs_dict[agent_id] = np.asarray(obs_dict[agent_id]) return obs_dict
def update(self, observed, z_t, x_t, u_t=None): # Kalman Filter Update self.ukf.predict(u=u_t) if observed: r_pred, alpha_pred = util.relative_distance_polar( self.ukf.x[:2], x_t[:2], x_t[2]) self.ukf.update(z_t, R=self.obs_noise_func((r_pred, alpha_pred)), agent_state=x_t) cov_new = self.ukf.P state_new = self.ukf.x # if LA.det(cov_new) < 1e6: self.cov = cov_new if not (self.collision_func(state_new[:2])): self.state = np.clip(state_new, self.limit[0], self.limit[1])
def reset(self, **kwargs): """ Random initialization a number of agents and targets at the reset of the env epsiode. Agents are given random positions in the map, targets are given random positions near a random agent. Return an observation state dict with agent ids (keys) that refer to their observation """ # try: # self.nb_agents = kwargs['nb_agents'] # self.nb_targets = kwargs['nb_targets'] # except: # self.nb_agents = np.random.random_integers(1, self.num_agents) # self.nb_targets = np.random.random_integers(1, self.num_targets) # self.rdot = # self.thetadot = obs_dict = {} init_pose = self.get_init_pose(**kwargs) # Initialize agents for ii in range(self.nb_agents): self.agents[ii].reset(init_pose['agents'][ii]) obs_dict[self.agents[ii].agent_id] = [] # Initialize targets and beliefs for nn in range(self.nb_targets): self.belief_targets[nn].reset( init_state=init_pose['belief_targets'][nn], init_cov=self.target_init_cov) self.targets[nn].reset(init_pose['targets'][nn]) # For nb agents calculate belief of targets assigned for jj in range(self.nb_targets): for kk in range(self.nb_agents): r, alpha = util.relative_distance_polar( self.belief_targets[jj].state[:2], xy_base=self.agents[kk].state[:2], theta_base=self.agents[kk].state[2]) logdetcov = np.log(LA.det(self.belief_targets[jj].cov)) obs_dict[self.agents[kk].agent_id].append( [r, alpha, logdetcov, 0.0, self.sensor_r, np.pi]) for agent_id in obs_dict: obs_dict[agent_id] = np.asarray(obs_dict[agent_id]) return obs_dict
def update(self, z_t, x_t): """ Parameters -------- z_t : observation - radial and angular distances from the agent. x_t : agent state (x, y, orientation) in the global frame. """ # Kalman Filter Update r_pred, alpha_pred = util.relative_distance_polar( self.state[:2], x_t[:2], x_t[2]) diff_pred = self.state[:2] - x_t[:2] if self.dim == 2: Hmat = np.array([[diff_pred[0], diff_pred[1]], [-diff_pred[1] / r_pred, diff_pred[0] / r_pred] ]) / r_pred elif self.dim == 4: Hmat = np.array([ [diff_pred[0], diff_pred[1], 0.0, 0.0], [-diff_pred[1] / r_pred, diff_pred[0] / r_pred, 0.0, 0.0] ]) / r_pred else: raise ValueError('target dimension for KF must be either 2 or 4') innov = z_t - np.array([r_pred, alpha_pred]) innov[1] = util.wrap_around(innov[1]) R = np.matmul(np.matmul(Hmat, self.cov), Hmat.T) \ + self.obs_noise_func((r_pred, alpha_pred)) K = np.matmul(np.matmul(self.cov, Hmat.T), LA.inv(R)) C = np.eye(self.dim) - np.matmul(K, Hmat) cov_new = np.matmul(C, self.cov) state_new = self.state + np.matmul(K, innov) if True: #LA.det(cov_new) < 1e6: self.cov = cov_new self.state = np.clip(state_new, self.limit[0], self.limit[1])
def step(self, action_dict): obs_dict = {} reward_dict = {} done_dict = {'__all__': False} info_dict = {} all_observations = np.zeros(self.nb_targets, dtype=bool) # Targets move (t -> t+1) for n in range(self.nb_targets): self.targets[n].update() # Agents move (t -> t+1) and observe the targets for ii, agent_id in enumerate(action_dict): obs_dict[self.agents[ii].agent_id] = [] reward_dict[self.agents[ii].agent_id] = [] done_dict[self.agents[ii].agent_id] = [] action_vw = self.action_map[action_dict[agent_id]] # Locations of targets and agents in order to maintain a margin between them margin_pos = [t.state[:2] for t in self.targets[:self.nb_targets]] for p, ids in enumerate(action_dict): if agent_id != ids: margin_pos.append(np.array(self.agents[p].state[:2])) _ = self.agents[ii].update(action_vw, margin_pos) # Target and map observations observed = np.zeros(self.nb_targets, dtype=bool) # obstacles_pt = map_utils.get_closest_obstacle(self.MAP, self.agents[ii].state) # if obstacles_pt is None: obstacles_pt = (self.sensor_r, np.pi) # Update beliefs of targets for jj in range(self.nb_targets): # Observe obs, z_t, spot = self.observation(self.targets[jj], self.agents[ii]) observed[jj] = obs #if spotted than target has also been observed self.belief_targets[jj].update( spot, z_t, self.agents[ii].state, np.array([ np.random.random(), np.pi * np.random.random() - 0.5 * np.pi ])) r_b, alpha_b = util.relative_distance_polar( self.belief_targets[jj].state[:2], xy_base=self.agents[ii].state[:2], theta_base=self.agents[ii].state[-1]) obs_dict[agent_id].append([ r_b, alpha_b, np.log(LA.det(self.belief_targets[jj].cov)), float(obs + spot), obstacles_pt[0], obstacles_pt[1] ]) # float(observed[jj]), obstacles_pt[0], obstacles_pt[1]]) obs_dict[agent_id] = np.asarray(obs_dict[agent_id]) all_observations = np.logical_or(all_observations, observed) # Get all rewards after all agents and targets move (t -> t+1) reward, done, info_dict = self.get_reward(obstacles_pt, all_observations, self.is_training) reward_dict['__all__'], done_dict['__all__'] = reward, done return obs_dict, reward_dict, done_dict, info_dict