def state_func(self, action_vw, observed):
        # Find the closest obstacle coordinate.
        obstacles_pt, front_obstacle_r = self.MAP.get_closest_obstacle_v2(self.agent.state)
        if obstacles_pt is None:
            obstacles_pt = (self.sensor_r, np.pi)
        if front_obstacle_r is None:
            front_obstacle_r = self.sensor_r

        self.state = []
        for i in range(self.num_targets):
            r_b, alpha_b = util.relative_distance_polar(self.belief_targets[i].state[:2],
                                                xy_base=self.agent.state[:2],
                                                theta_base=self.agent.state[2])
            r_dot_b, alpha_dot_b = util.relative_velocity_polar(
                                    self.belief_targets[i].state[:2],
                                    self.belief_targets[i].state[2:],
                                    self.agent.state[:2], self.agent.state[2],
                                    action_vw[0], action_vw[1])
            is_belief_blocked = self.MAP.is_blocked(self.agent.state[:2], self.belief_targets[i].state[:2])
            self.state.extend([r_b, alpha_b, r_dot_b, alpha_dot_b,
                                np.log(LA.det(self.belief_targets[i].cov)),
                                float(observed[i]), float(is_belief_blocked)])
        self.state.extend([obstacles_pt[0], obstacles_pt[1], front_obstacle_r])
        self.state = np.array(self.state)

        # Update the visit frequency map.
        b_speed = np.mean([np.sqrt(np.sum(self.belief_targets[i].state[2:]**2)) for i in range(self.num_targets)])
        decay_factor = np.exp(self.sampling_period*b_speed/self.sensor_r*np.log(0.7))
        self.MAP.update_visit_freq_map(self.agent.state, decay_factor, observed=bool(np.mean(observed)))
Exemple #2
0
    def state_func(self, action_vw, observed):
        # Find the closest obstacle coordinate.
        obstacles_pt = self.MAP.get_closest_obstacle(self.agent.state)
        if obstacles_pt is None:
            obstacles_pt = (self.sensor_r, np.pi)

        self.state = []
        for i in range(self.num_targets):
            r_b, alpha_b = util.relative_distance_polar(
                self.belief_targets[i].state[:2],
                xy_base=self.agent.state[:2],
                theta_base=self.agent.state[2])
            self.state.extend([
                r_b, alpha_b,
                np.log(LA.det(self.belief_targets[i].cov)),
                float(observed[i])
            ])
        self.state.extend([obstacles_pt[0], obstacles_pt[1]])
        self.state = np.array(self.state)

        # Update the visit map for the evaluation purpose.
        if self.MAP.visit_map is not None:
            self.MAP.update_visit_freq_map(self.agent.state,
                                           1.0,
                                           observed=bool(np.mean(observed)))
Exemple #3
0
    def update(self, z_t, x_t):
        """
        Parameters
        --------
        z_t : observation - radial and angular distances from the agent.
        x_t : agent state (x, y, orientation) in the global frame.
        """
        # Kalman Filter Update
        r_pred, alpha_pred = util.relative_distance_polar(
            self.state[:2], x_t[:2], x_t[2])
        diff_pred = np.array(self.state[:2]) - np.array(x_t[:2])
        if self.dim == 2:
            Hmat = np.array([[diff_pred[0], diff_pred[1]],
                             [-diff_pred[1] / r_pred, diff_pred[0] / r_pred]
                             ]) / r_pred
        elif self.dim == 4:
            Hmat = np.array([
                [diff_pred[0], diff_pred[1], 0.0, 0.0],
                [-diff_pred[1] / r_pred, diff_pred[0] / r_pred, 0.0, 0.0]
            ]) / r_pred
        else:
            raise ValueError('target dimension for KF must be either 2 or 4')
        innov = z_t - np.array([r_pred, alpha_pred])
        innov[1] = util.wrap_around(innov[1])

        R = np.matmul(np.matmul(Hmat, self.cov), Hmat.T) \
                                + self.obs_noise_func((r_pred, alpha_pred))
        K = np.matmul(np.matmul(self.cov, Hmat.T), LA.inv(R))
        C = np.eye(self.dim) - np.matmul(K, Hmat)

        self.cov = np.matmul(C, self.cov)
        self.state = np.clip(self.state + np.matmul(K, innov), self.limit[0],
                             self.limit[1])
Exemple #4
0
    def update(self, z_t, x_t):
        # Kalman Filter Update
        r_pred, alpha_pred = util.relative_distance_polar(
            self.ukf.x[:2], x_t[:2], x_t[2])
        self.ukf.update(z_t,
                        R=self.obs_noise_func((r_pred, alpha_pred)),
                        agent_state=x_t)

        self.cov = self.ukf.P
        self.state = np.clip(self.ukf.x, self.limit[0], self.limit[1])
Exemple #5
0
 def observation(self, target):
     r, alpha = util.relative_distance_polar(target.state[:2],
                                         xy_base=self.agent.state[:2],
                                         theta_base=self.agent.state[2])
     observed = (r <= self.sensor_r) \
                 & (abs(alpha) <= self.fov/2/180*np.pi) \
                 & (not(self.MAP.is_blocked(self.agent.state, target.state)))
     z = None
     if observed:
         z = np.array([r, alpha])
         z += np.random.multivariate_normal(np.zeros(2,), self.observation_noise(z))
     return observed, z
Exemple #6
0
    def step(self, action):
        self.agent.update(action, self.targets.state)

        # Update the true target state
        self.targets.update()
        # Observe
        measurements = self.agent.observation(self.targets.target)
        obstacles_pt = self.MAP.get_closest_obstacle(self.agent.state)
        # Update the belief of the agent on the target using KF
        GaussianBelief = infoplanner.IGL.MultiTargetFilter(measurements,
                                                           self.agent.agent,
                                                           debug=False)
        self.agent.update_belief(GaussianBelief)
        self.belief_targets.update(self.agent.get_belief_state(),
                                   self.agent.get_belief_cov())

        observed = [m.validity for m in measurements]
        reward, done, mean_nlogdetcov = self.get_reward(
            obstacles_pt, observed, self.is_training)
        if obstacles_pt is None:
            obstacles_pt = (self.sensor_r, np.pi)

        self.state = []
        target_b_state = self.agent.get_belief_state()
        target_b_cov = self.agent.get_belief_cov()
        control_input = self.action_map[action]
        for n in range(self.num_targets):
            r_b, alpha_b = util.relative_distance_polar(
                target_b_state[self.target_dim * n:self.target_dim * n + 2],
                xy_base=self.agent.state[:2],
                theta_base=self.agent.state[2])
            r_dot_b, alpha_dot_b = util.relative_velocity_polar(
                target_b_state[self.target_dim * n:self.target_dim * n + 2],
                target_b_state[self.target_dim * n + 2:], self.agent.state[:2],
                self.agent.state[2], control_input[0], control_input[1])
            self.state.extend([
                r_b, alpha_b, r_dot_b, alpha_dot_b,
                np.log(
                    LA.det(target_b_cov[self.target_dim * n:self.target_dim *
                                        (n + 1), self.target_dim *
                                        n:self.target_dim * (n + 1)])),
                float(observed[n])
            ])

        self.state.extend([obstacles_pt[0], obstacles_pt[1]])
        self.state = np.array(self.state)
        return self.state, reward, done, {'mean_nlogdetcov': mean_nlogdetcov}
    def update(self, observed, z_t, x_t, u_t=None):
        # Kalman Filter Update
        self.ukf.predict(u=u_t)

        if observed:
            r_pred, alpha_pred = util.relative_distance_polar(
                self.ukf.x[:2], x_t[:2], x_t[2])
            self.ukf.update(z_t,
                            R=self.obs_noise_func((r_pred, alpha_pred)),
                            agent_state=x_t)

        cov_new = self.ukf.P
        state_new = self.ukf.x

        if LA.det(cov_new) < 1e6:
            self.cov = cov_new
        if not (self.collision_func(state_new[:2])):
            self.state = np.clip(state_new, self.limit[0], self.limit[1])
Exemple #8
0
    def reset(self, **kwargs):
        self.state = []
        t_init_sets = []
        t_init_b_sets = []
        init_pose = self.get_init_pose(**kwargs)
        a_init_igl = infoplanner.IGL.SE3Pose(init_pose['agent'],
                                             np.array([0, 0, 0, 1]))
        for i in range(self.num_targets):
            t_init_b_sets.append(init_pose['belief_targets'][i][:2])
            t_init_sets.append(init_pose['targets'][i][:2])
            r, alpha = util.relative_distance_polar(
                np.array(t_init_b_sets[-1][:2]),
                xy_base=np.array(init_pose['agent'][:2]),
                theta_base=init_pose['agent'][2])
            logdetcov = np.log(
                LA.det(self.target_init_cov * np.eye(self.target_dim)))
            self.state.extend([r, alpha, 0.0, 0.0, logdetcov, 0.0])

        self.state.extend([self.sensor_r, np.pi])
        self.state = np.array(self.state)
        # Build a target
        target = self.cfg.setup_integrator_targets(
            n_targets=self.num_targets,
            init_pos=t_init_sets,
            init_vel=self.target_init_vel,
            q=METADATA['const_q_true'],
            max_vel=METADATA['target_speed_limit']
        )  # Integrator Ground truth Model
        belief_target = self.cfg.setup_integrator_belief(
            n_targets=self.num_targets,
            q=METADATA['const_q'],
            init_pos=t_init_b_sets,
            cov_pos=self.target_init_cov,
            cov_vel=self.target_init_cov,
            init_vel=(0.0, 0.0))
        # Build a robot
        self.agent.reset(a_init_igl, belief_target)
        self.targets.reset(target)
        self.belief_targets.update(self.agent.get_belief_state(),
                                   self.agent.get_belief_cov())
        return np.array(self.state)
 def reset(self, **kwargs):
     self.state = []
     init_pose = self.get_init_pose(**kwargs)
     self.agent.reset(init_pose['agent'])
     for i in range(self.num_targets):
         self.belief_targets[i].reset(init_state=np.concatenate(
             (init_pose['belief_targets'][i], np.zeros(2))),
                                      init_cov=self.target_init_cov)
         t_init = np.concatenate(
             (init_pose['targets'][i], [self.target_init_vel[0], 0.0]))
         self.targets[i].reset(t_init)
         self.targets[i].policy.reset(t_init)
         r, alpha = util.relative_distance_polar(
             self.belief_targets[i].state[:2],
             xy_base=self.agent.state[:2],
             theta_base=self.agent.state[2])
         logdetcov = np.log(LA.det(self.belief_targets[i].cov))
         self.state.extend([r, alpha, 0.0, 0.0, logdetcov, 0.0])
     self.state.extend([self.sensor_r, np.pi])
     self.state = np.array(self.state)
     return self.state
    def step(self, action):
        action_vw = self.action_map[action]
        is_col = self.agent.update(action_vw,
                                   [t.state[:2] for t in self.targets])
        obstacles_pt = self.MAP.get_closest_obstacle(self.agent.state)
        observed = []
        for i in range(self.num_targets):
            self.targets[i].update()
            # Observe
            obs = self.observation(self.targets[i])
            observed.append(obs[0])
            # Update the belief of the agent on the target using UKF
            self.belief_targets[i].update(
                obs[0], obs[1], self.agent.state,
                np.array([
                    np.random.random(),
                    np.pi * np.random.random() - 0.5 * np.pi
                ]))

        reward, done, mean_nlogdetcov = self.get_reward(self.is_training,
                                                        is_col=is_col)
        self.state = []
        if obstacles_pt is None:
            obstacles_pt = (self.sensor_r, np.pi)
        for i in range(self.num_targets):
            r_b, alpha_b = util.relative_distance_polar(
                self.belief_targets[i].state[:2],
                xy_base=self.agent.state[:2],
                theta_base=self.agent.state[2])
            r_dot_b, alpha_dot_b = util.relative_velocity_polar_se2(
                self.belief_targets[i].state[:3],
                self.belief_targets[i].state[3:], self.agent.state, action_vw)
            self.state.extend([
                r_b, alpha_b, r_dot_b, alpha_dot_b,
                np.log(LA.det(self.belief_targets[i].cov)),
                float(observed[i])
            ])
        self.state.extend([obstacles_pt[0], obstacles_pt[1]])
        self.state = np.array(self.state)
        return self.state, reward, done, {'mean_nlogdetcov': mean_nlogdetcov}