Ejemplo n.º 1
0
 def reset(self, **kwargs):
     """
     Agents are given random positions in the map, targets are given random positions near a random agent.
     Return an observation state dict with agent ids (keys) that refer to their observation
     """
     obs_dict = {}
     init_pose = self.get_init_pose(**kwargs)
     # Initialize agents
     for ii in range(self.num_agents):
         self.agents[ii].reset(init_pose['agents'][ii])
         obs_dict[self.agents[ii].agent_id] = []
     # Initialize targets and beliefs
     for jj in range(self.num_targets):
         self.belief_targets[jj].reset(init_state=np.concatenate(
             (init_pose['belief_targets'][jj][:2], np.zeros(2))),
                                       init_cov=self.target_init_cov)
         self.targets[jj].reset(
             np.concatenate(
                 (init_pose['targets'][jj][:2], self.target_init_vel)))
         #For each agent calculate belief of all targets
         for kk in range(self.num_agents):
             r, alpha = util.relative_distance_polar(
                 self.belief_targets[jj].state[:2],
                 xy_base=self.agents[kk].state[:2],
                 theta_base=self.agents[kk].state[2])
             logdetcov = np.log(LA.det(self.belief_targets[jj].cov))
             obs_dict[self.agents[kk].agent_id].extend(
                 [r, alpha, 0.0, 0.0, logdetcov, 0.0])
     for agent_id in obs_dict:
         obs_dict[agent_id].extend([self.sensor_r, np.pi])
     return obs_dict
    def step(self, action_dict):
        obs_dict = {}
        reward_dict = {}
        done_dict = {'__all__':False}
        info_dict = {}

        # Targets move (t -> t+1)
        for n in range(self.nb_targets):
            self.targets[n].update() 
            self.belief_targets[n].predict() # Belief state at t+1
        # Agents move (t -> t+1) and observe the targets
        for ii, agent_id in enumerate(action_dict):
            obs_dict[self.agents[ii].agent_id] = []
            reward_dict[self.agents[ii].agent_id] = []
            done_dict[self.agents[ii].agent_id] = []

            action_vw = self.action_map[action_dict[agent_id]]

            # Locations of all targets and agents in order to maintain a margin between them
            margin_pos = [t.state[:2] for t in self.targets[:self.nb_targets]]
            for p, ids in enumerate(action_dict):
                if agent_id != ids:
                    margin_pos.append(np.array(self.agents[p].state[:2]))
            _ = self.agents[ii].update(action_vw, margin_pos)
            
            # Target and map observations
            observed = np.zeros(self.nb_targets, dtype=bool)
            # obstacles_pt = map_utils.get_closest_obstacle(self.MAP, self.agents[ii].state)
            # if obstacles_pt is None:
            obstacles_pt = (self.sensor_r, np.pi)

            # Update beliefs of targets
            for jj in range(self.nb_targets):
                # Observe
                obs, z_t = self.observation(self.targets[jj], self.agents[ii])
                observed[jj] = obs
                if obs: # if observed, update the target belief.
                    self.belief_targets[jj].update(z_t, self.agents[ii].state)

                r_b, alpha_b = util.relative_distance_polar(self.belief_targets[jj].state[:2],
                                        xy_base=self.agents[ii].state[:2], 
                                        theta_base=self.agents[ii].state[-1])
                r_dot_b, alpha_dot_b = util.relative_velocity_polar(
                                        self.belief_targets[jj].state[:2],
                                        self.belief_targets[jj].state[2:],
                                        self.agents[ii].state[:2], self.agents[ii].state[-1],
                                        action_vw[0], action_vw[1])
                obs_dict[agent_id].append([r_b, alpha_b, r_dot_b, alpha_dot_b,
                                        np.log(LA.det(self.belief_targets[jj].cov)), 
                                        float(obs), obstacles_pt[0], obstacles_pt[1]])
        # Assign target
        for agent_id in obs_dict:
            obs_dict[agent_id] = np.asarray(obs_dict[agent_id])
            obs_dict[agent_id] = obs_dict[agent_id][None,self.greedy_dict[agent_id]]
        # Get all rewards after all agents and targets move (t -> t+1)
        reward, done, mean_nlogdetcov = self.get_reward(obstacles_pt, observed, self.is_training)
        reward_dict['__all__'], done_dict['__all__'], info_dict['mean_nlogdetcov'] = reward, done, mean_nlogdetcov
        return obs_dict, reward_dict, done_dict, info_dict
Ejemplo n.º 3
0
    def step(self, action_dict):
        obs_dict = {}
        reward_dict = {}
        done_dict = {'__all__': False}
        info_dict = {}

        # Targets move (t -> t+1)
        for n in range(self.num_targets):
            self.targets[n].update()
        # Agents move (t -> t+1) and observe the targets
        for ii, agent_id in enumerate(action_dict):
            obs_dict[self.agents[ii].agent_id] = []
            reward_dict[self.agents[ii].agent_id] = []
            done_dict[self.agents[ii].agent_id] = []

            action_vw = self.action_map[action_dict[agent_id]]
            _ = self.agents[ii].update(action_vw,
                                       [t.state[:2] for t in self.targets])

            observed = []
            for jj in range(self.num_targets):
                # Observe
                obs = self.observation(self.targets[jj], self.agents[ii])
                observed.append(obs[0])
                self.belief_targets[jj].predict()  # Belief state at t+1
                if obs[0]:  # if observed, update the target belief.
                    self.belief_targets[jj].update(obs[1],
                                                   self.agents[ii].state)

            obstacles_pt = map_utils.get_closest_obstacle(
                self.MAP, self.agents[ii].state)

            if obstacles_pt is None:
                obstacles_pt = (self.sensor_r, np.pi)
            for kk in range(self.num_targets):
                r_b, alpha_b = util.relative_distance_polar(
                    self.belief_targets[kk].state[:2],
                    xy_base=self.agents[ii].state[:2],
                    theta_base=self.agents[ii].state[-1])
                r_dot_b, alpha_dot_b = util.relative_velocity_polar(
                    self.belief_targets[kk].state[:2],
                    self.belief_targets[kk].state[2:],
                    self.agents[ii].state[:2], self.agents[ii].state[-1],
                    action_vw[0], action_vw[1])
                obs_dict[agent_id].extend([
                    r_b, alpha_b, r_dot_b, alpha_dot_b,
                    np.log(LA.det(self.belief_targets[kk].cov)),
                    float(observed[kk])
                ])
            obs_dict[agent_id].extend([obstacles_pt[0], obstacles_pt[1]])
        # Get all rewards after all agents and targets move (t -> t+1)
        reward, done, mean_nlogdetcov = self.get_reward(
            obstacles_pt, observed, self.is_training)
        reward_dict['__all__'], done_dict['__all__'], info_dict[
            'mean_nlogdetcov'] = reward, done, mean_nlogdetcov
        return obs_dict, reward_dict, done_dict, info_dict
Ejemplo n.º 4
0
    def reset(self, **kwargs):
        """
        Random initialization a number of agents and targets at the reset of the env epsiode.
        Agents are given random positions in the map, targets are given random positions near a random agent.
        Return an observation state dict with agent ids (keys) that refer to their observation
        """
        try:
            self.nb_agents = kwargs['nb_agents']
            self.nb_targets = kwargs['nb_targets']
        except:
            self.nb_agents = np.random.random_integers(1, self.num_agents)
            self.nb_targets = np.random.random_integers(1, self.num_targets)
        obs_dict = {}
        init_pose = self.get_init_pose(**kwargs)
        # Initialize agents
        for ii in range(self.nb_agents):
            self.agents[ii].reset(init_pose['agents'][ii])
            obs_dict[self.agents[ii].agent_id] = []

        # Initialize targets and beliefs
        for nn in range(self.nb_targets):
            self.belief_targets[nn].reset(init_state=np.concatenate(
                (init_pose['belief_targets'][nn][:2], np.zeros(2))),
                                          init_cov=self.target_init_cov)
            self.targets[nn].reset(
                np.concatenate(
                    (init_pose['targets'][nn][:2], self.target_init_vel)))
        # For nb agents calculate belief of targets assigned
        for jj in range(self.nb_targets):
            for kk in range(self.nb_agents):
                r, alpha = util.relative_distance_polar(
                    self.belief_targets[jj].state[:2],
                    xy_base=self.agents[kk].state[:2],
                    theta_base=self.agents[kk].state[2])
                logdetcov = np.log(LA.det(self.belief_targets[jj].cov))
                obs_dict[self.agents[kk].agent_id].append(
                    [r, alpha, 0.0, 0.0, logdetcov, 0.0, self.sensor_r, np.pi])
        # Greedily assign agents to closest target in order, all targets assigned if agents > targets
        mask = np.ones(self.nb_targets, bool)
        if self.nb_targets > self.nb_agents:
            oracle = 1
        else:
            oracle = 0
        for agent_id in obs_dict:
            obs_dict[agent_id] = np.asarray(obs_dict[agent_id])
            if np.sum(mask) != np.maximum(
                    0, self.nb_targets - self.nb_agents + oracle):
                idx = np.flatnonzero(mask)
                close = idx[np.argmin(obs_dict[agent_id][:, 0][mask])]
                obs_dict[agent_id] = obs_dict[agent_id][None, close]
                mask[close] = False
        return obs_dict
Ejemplo n.º 5
0
    def reset(self, **kwargs):
        """
        Random initialization a number of agents and targets at the reset of the env epsiode.
        Agents are given random positions in the map, targets are given random positions near a random agent.
        Return an observation state dict with agent ids (keys) that refer to their observation
        """
        try:
            self.nb_agents = kwargs['nb_agents']
            self.nb_targets = kwargs['nb_targets']
        except:
            self.nb_agents = np.random.random_integers(1, self.num_agents)
            self.nb_targets = np.random.random_integers(1, self.num_targets)
        obs_dict = {}
        init_pose = self.get_init_pose(**kwargs)
        # Initialize agents
        for ii in range(self.nb_agents):
            self.agents[ii].reset(init_pose['agents'][ii])
            obs_dict[self.agents[ii].agent_id] = []

        # Initialize targets and beliefs
        for nn in range(self.nb_targets):
            self.belief_targets[nn].reset(init_state=np.concatenate(
                (init_pose['belief_targets'][nn][:2], np.zeros(2))),
                                          init_cov=self.target_init_cov)
            self.targets[nn].reset(
                np.concatenate(
                    (init_pose['targets'][nn][:2], self.target_init_vel)))
        # For nb agents calculate belief of targets assigned
        for jj in range(self.nb_targets):
            for kk in range(self.nb_agents):
                r, alpha = util.relative_distance_polar(
                    self.belief_targets[jj].state[:2],
                    xy_base=self.agents[kk].state[:2],
                    theta_base=self.agents[kk].state[2])
                logdetcov = np.log(LA.det(self.belief_targets[jj].cov))
                obs_dict[self.agents[kk].agent_id].append(
                    [r, alpha, 0.0, 0.0, logdetcov, 0.0, self.sensor_r, np.pi])

        # Assign agents to closest k targets, if less targets than k, consider all targets
        self.k = 8
        if self.nb_targets > self.k:
            for agent_id in obs_dict:
                obs_dict[agent_id] = np.asarray(obs_dict[agent_id])
                close = np.argpartition(obs_dict[agent_id][:, 0], self.k)
                obs_dict[agent_id] = obs_dict[agent_id][close[:self.k]]
        else:
            for agent_id in obs_dict:
                obs_dict[agent_id] = np.asarray(obs_dict[agent_id])

        return obs_dict
Ejemplo n.º 6
0
 def observation(self, target, agent):
     r, alpha = util.relative_distance_polar(target.state[:2],
                                         xy_base=agent.state[:2], 
                                         theta_base=agent.state[2])    
     observed = (r <= self.sensor_r) \
                 & (abs(alpha) <= self.fov/2/180*np.pi) \
                 & (not(map_utils.is_blocked(self.MAP, agent.state, target.state)))
     z = None
     if observed:
         z = np.array([r, alpha])
         # z += np.random.multivariate_normal(np.zeros(2,), self.observation_noise(z))
         z += self.np_random.multivariate_normal(np.zeros(2,), self.observation_noise(z))
     '''For some reason, self.np_random is needed only here instead of np.random in order for the 
     RNG seed to work, if used in the gen_rand_pose functions RNG seed will NOT work '''
     return observed, z
Ejemplo n.º 7
0
    def update(self, observed, z_t, x_t, u_t=None):
        # Kalman Filter Update
        self.ukf.predict(u=u_t)

        if observed:
            r_pred, alpha_pred = util.relative_distance_polar(
                self.ukf.x[:2], x_t[:2], x_t[2])
            self.ukf.update(z_t,
                            R=self.obs_noise_func((r_pred, alpha_pred)),
                            agent_state=x_t)

        cov_new = self.ukf.P
        state_new = self.ukf.x

        if LA.det(cov_new) < 1e6:
            self.cov = cov_new
        if not (self.collision_func(state_new[:2])):
            self.state = np.clip(state_new, self.limit[0], self.limit[1])
Ejemplo n.º 8
0
    def update(self, z_t, x_t):
        """
        Parameters
        --------
        z_t : observation - radial and angular distances from the agent.
        x_t : agent state (x, y, orientation) in the global frame.
        """
        # Kalman Filter Update
        r_pred, alpha_pred = util.relative_distance_polar(
            self.state[:2], x_t[:2], x_t[2])
        diff_pred = self.state[:2] - x_t[:2]
        if self.dim == 2:
            Hmat = np.array([[diff_pred[0], diff_pred[1]],
                             [-diff_pred[1] / r_pred, diff_pred[0] / r_pred]
                             ]) / r_pred
        elif self.dim == 4:
            Hmat = np.array([
                [diff_pred[0], diff_pred[1], 0.0, 0.0],
                [-diff_pred[1] / r_pred, diff_pred[0] / r_pred, 0.0, 0.0]
            ]) / r_pred
        else:
            raise ValueError('target dimension for KF must be either 2 or 4')
        innov = z_t - np.array([r_pred, alpha_pred])
        innov[1] = util.wrap_around(innov[1])

        R = np.matmul(np.matmul(Hmat, self.cov), Hmat.T) \
                                + self.obs_noise_func((r_pred, alpha_pred))
        K = np.matmul(np.matmul(self.cov, Hmat.T), LA.inv(R))
        C = np.eye(self.dim) - np.matmul(K, Hmat)

        cov_new = np.matmul(C, self.cov)
        state_new = self.state + np.matmul(K, innov)

        if True:  #LA.det(cov_new) < 1e6:
            self.cov = cov_new
        self.state = np.clip(state_new, self.limit[0], self.limit[1])
Ejemplo n.º 9
0
    def step(self, action_dict):
        obs_dict = {}
        reward_dict = {}
        done_dict = {'__all__': False}
        info_dict = {}

        # Targets move (t -> t+1)
        for n in range(self.nb_targets):
            self.targets[n].update()
            self.belief_targets[n].predict()  # Belief state at t+1
        # Agents move (t -> t+1) and observe the targets
        for ii, agent_id in enumerate(action_dict):
            obs_dict[self.agents[ii].agent_id] = []
            reward_dict[self.agents[ii].agent_id] = []
            done_dict[self.agents[ii].agent_id] = []

            action_vw = self.action_map[action_dict[agent_id]]

            # Locations of all targets and agents in order to maintain a margin between them
            margin_pos = [t.state[:2] for t in self.targets[:self.nb_targets]]
            for p, ids in enumerate(action_dict):
                if agent_id != ids:
                    margin_pos.append(np.array(self.agents[p].state[:2]))
            _ = self.agents[ii].update(action_vw, margin_pos)

            # Target and map observations
            observed = np.zeros(self.nb_targets, dtype=bool)
            # obstacles_pt = map_utils.get_closest_obstacle(self.MAP, self.agents[ii].state)
            # if obstacles_pt is None:
            obstacles_pt = (self.sensor_r, np.pi)

            # Update beliefs of targets
            for jj in range(self.nb_targets):
                # Observe
                obs, z_t = self.observation(self.targets[jj], self.agents[ii])
                observed[jj] = obs
                if obs:  # if observed, update the target belief.
                    self.belief_targets[jj].update(z_t, self.agents[ii].state)

                    r_b, alpha_b = util.relative_distance_polar(
                        self.targets[jj].state[:2],
                        xy_base=self.agents[ii].state[:2],
                        theta_base=self.agents[ii].state[-1])
                    r_dot_b, alpha_dot_b = util.relative_velocity_polar(
                        self.targets[jj].state[:2], self.targets[jj].state[2:],
                        self.agents[ii].state[:2], self.agents[ii].state[-1],
                        action_vw[0], action_vw[1])
                    obs_dict[agent_id].append(
                        [r_b, alpha_b, r_dot_b, alpha_dot_b,
                         float(obs)])
                else:
                    # if no obs, take old obs + kalman like prediction (random noise)
                    # self._obs_dict[agent_id][jj][:2] += self.np_random.multivariate_normal(np.zeros(2,),
                    #                                     self.observation_noise(self._obs_dict[agent_id][jj][:2]))
                    # self._obs_dict[agent_id][jj][2:4] += self.np_random.multivariate_normal(np.zeros(2,),
                    #                                     self.observation_noise(self._obs_dict[agent_id][jj][:2]))
                    # self._obs_dict[agent_id][jj][4] = float(obs)
                    # obs_dict[agent_id].append(self._obs_dict[agent_id][jj])
                    obs_dict[agent_id].append([0.0, 0.0, 0.0, 0.0, float(obs)])

            obs_dict[agent_id] = np.asarray(obs_dict[agent_id])
            # store obs dict before shuffle
            self._obs_dict[agent_id] = obs_dict[agent_id]
            # shuffle obs to promote permutation invariance
            self.rng.shuffle(obs_dict[agent_id])
        # Get all rewards after all agents and targets move (t -> t+1)
        reward, done, mean_nlogdetcov = self.get_reward(
            obstacles_pt, observed, self.is_training)
        reward_dict['__all__'], done_dict['__all__'], info_dict[
            'mean_nlogdetcov'] = reward, done, mean_nlogdetcov
        return obs_dict, reward_dict, done_dict, info_dict