Beispiel #1
0
    def step(self, action):
        self.agent.update(action, self.target.state)

        # Update the true target state
        self.target.update()
        # Observe
        measurements = self.agent.observation(self.target.target)
        obstacles_pt = map_utils.get_cloest_obstacle(self.MAP,
                                                     self.agent.state)
        # Update the belief of the agent on the target using KF
        GaussianBelief = IGL.MultiTargetFilter(measurements,
                                               self.agent.agent,
                                               debug=False)
        self.agent.update_belief(GaussianBelief)
        self.belief_target.update(self.agent.get_belief_state(),
                                  self.agent.get_belief_cov())

        reward, done, test_reward = self.get_reward(obstacles_pt,
                                                    measurements[0].validity,
                                                    self.is_training)
        if obstacles_pt is None:
            obstacles_pt = (self.sensor_r, np.pi)
        r_b, alpha_b, _ = util.relative_measure(
            self.agent.get_belief_state()[:2], self.agent.state)
        rel_target_vel = util.coord_change2b(self.agent.get_belief_state()[:2],
                                             alpha_b + self.agent.state[-1])
        self.state = np.array([
            r_b, alpha_b, rel_target_vel[0], rel_target_vel[1],
            np.log(LA.det(self.agent.get_belief_cov())),
            float(measurements[0].validity), obstacles_pt[0], obstacles_pt[1]
        ])
        return self.state, reward, done, {'test_reward': test_reward}
Beispiel #2
0
 def reset(self, init_random=True):
     self.state = []
     if init_random:
         if self.MAP.map is None:
             a_init = self.agent_init_pos[:2]
             self.agent.reset(self.agent_init_pos)
         else:
             isvalid = False
             while (not isvalid):
                 a_init = np.random.random(
                     (2, )) * (self.MAP.mapmax -
                               self.MAP.mapmin) + self.MAP.mapmin
                 isvalid = not (map_utils.is_collision(
                     self.MAP, a_init, MARGIN2WALL))
             self.agent.reset([
                 a_init[0], a_init[1],
                 np.random.random() * 2 * np.pi - np.pi
             ])
         for i in range(self.num_targets):
             t_init = np.load("path_sh_%d.npy" % i)[0][:2]
             self.belief_targets[i].reset(init_state=np.concatenate(
                 (t_init + 10 * (np.random.rand(2) - 0.5), np.zeros(2))),
                                          init_cov=self.target_init_cov)
             self.targets[i].reset(
                 np.concatenate((t_init, self.target_init_vel)))
             r, alpha, _ = util.relative_measure(
                 self.belief_targets[i].state, self.agent.state)
             logdetcov = np.log(LA.det(self.belief_targets[i].cov))
             self.state.extend([r, alpha, 0.0, 0.0, logdetcov, 0.0])
     self.state.extend([self.sensor_r, np.pi])
     self.state = np.array(self.state)
     return self.state
Beispiel #3
0
    def step(self, action):
        action_val = self.action_map[action]
        boundary_penalty = self.agent.update(
            action_val, [t.state[:2] for t in self.targets])
        obstacles_pt = map_utils.get_cloest_obstacle(self.MAP,
                                                     self.agent.state)
        observed = []
        for i in range(self.num_targets):
            self.targets[i].update(self.agent.state[:2])
            # Observe
            obs = self.observation(self.targets[i])
            observed.append(obs[0])
            # Update the belief of the agent on the target using KF
            self.belief_targets[i].update(obs[0], obs[1], self.agent.state)

        reward, done, test_reward = self.get_reward(obstacles_pt, observed,
                                                    self.is_training)
        self.state = []
        if obstacles_pt is None:
            obstacles_pt = (self.sensor_r, np.pi)
        for i in range(self.num_targets):
            r_b, alpha_b, _ = util.relative_measure(
                self.belief_targets[i].state, self.agent.state)
            rel_target_vel = util.coord_change2b(
                self.belief_targets[i].state[2:],
                alpha_b + self.agent.state[-1])
            self.state.extend([
                r_b, alpha_b, rel_target_vel[0], rel_target_vel[1],
                np.log(LA.det(self.belief_targets[i].cov)),
                float(observed[i])
            ])
        self.state.extend([obstacles_pt[0], obstacles_pt[1]])
        self.state = np.array(self.state)
        return self.state, reward, done, {'test_reward': test_reward}
Beispiel #4
0
 def observation(self, target):
     r, alpha, _ = util.relative_measure(target.state,
                                         self.agent.state)  # True Value
     observed = (r <= self.sensor_r) \
                 & (abs(alpha) <= self.fov/2/180*np.pi) \
                 & (not(map_utils.is_blocked(self.MAP, self.agent.state, target.state)))
     z = None
     if observed:
         z = np.array([r, alpha])
         z += np.random.multivariate_normal(np.zeros(2, ),
                                            self.observation_noise(z))
     return observed, z
Beispiel #5
0
    def update(self, observed, z_t, x_t, u_t=None):
        # Kalman Filter Update
        self.ukf.predict(u=u_t)

        if observed: 
            r_pred, alpha_pred, _ = util.relative_measure(self.ukf.x, x_t)
            self.ukf.update(z_t, R=self.obs_noise_func((r_pred, alpha_pred)), agent_state=x_t)

        cov_new = self.ukf.P
        state_new = self.ukf.x

        if LA.det(cov_new) < 1e6:
            self.cov = cov_new
        if not(self.collision_func(state_new[:2])):
            self.state = np.clip(state_new, self.limit[0], self.limit[1])
Beispiel #6
0
    def reset(self, init_random=True):
        self.state = []
        if init_random:
            if self.MAP.map is None:
                a_init = self.agent_init_pos[:2]
                self.agent.reset(self.agent_init_pos)
            else:
                isvalid = False
                while (not isvalid):
                    a_init = np.random.random(
                        (2, )) * (self.MAP.mapmax -
                                  self.MAP.mapmin) + self.MAP.mapmin
                    isvalid = not (map_utils.is_collision(
                        self.MAP, a_init, MARGIN2WALL))
                self.agent.reset([
                    a_init[0], a_init[1],
                    np.random.random() * 2 * np.pi - np.pi
                ])
            for i in range(self.num_targets):
                isvalid = False
                while (not isvalid):
                    rand_ang = np.random.rand() * 2 * np.pi - np.pi
                    t_r = INIT_DISTANCE
                    t_init = np.array([
                        t_r * np.cos(rand_ang), t_r * np.sin(rand_ang)
                    ]) + a_init
                    if (np.sqrt(np.sum((t_init - a_init)**2)) < MARGIN):
                        isvalid = False
                    else:
                        isvalid = not (map_utils.is_collision(
                            self.MAP, t_init, MARGIN2WALL))

                self.belief_targets[i].reset(init_state=np.concatenate(
                    (t_init + 10 * (np.random.rand(2) - 0.5), np.zeros(2))),
                                             init_cov=self.target_init_cov)
                self.targets[i].reset(
                    np.concatenate((t_init, self.target_init_vel)))
                r, alpha, _ = util.relative_measure(
                    self.belief_targets[i].state, self.agent.state)
                logdetcov = np.log(LA.det(self.belief_targets[i].cov))
                self.state.extend([r, alpha, 0.0, 0.0, logdetcov, 0.0])
        self.state.extend([self.sensor_r, np.pi])
        self.state = np.array(self.state)
        self.local_map = map_utils.local_map(self.MAP, self.im_size,
                                             self.agent.state)
        return np.concatenate((self.local_map.flatten(), self.state))
Beispiel #7
0
    def step(self, action):
        self.agent.update(action, self.targets.state)

        # Update the true target state
        self.targets.update()
        # Observe
        measurements = self.agent.observation(self.targets.target)
        obstacles_pt = map_utils.get_cloest_obstacle(self.MAP,
                                                     self.agent.state)
        # Update the belief of the agent on the target using KF
        GaussianBelief = IGL.MultiTargetFilter(measurements,
                                               self.agent.agent,
                                               debug=False)
        self.agent.update_belief(GaussianBelief)
        self.belief_targets.update(self.agent.get_belief_state(),
                                   self.agent.get_belief_cov())

        observed = [m.validity for m in measurements]
        reward, done, test_reward = self.get_reward(obstacles_pt, observed,
                                                    self.is_training)
        if obstacles_pt is None:
            obstacles_pt = (self.sensor_r, np.pi)

        self.state = []
        target_b_state = self.agent.get_belief_state()
        target_b_cov = self.agent.get_belief_cov()
        for n in range(self.num_targets):
            r_b, alpha_b, _ = util.relative_measure(
                target_b_state[self.target_dim * n:self.target_dim * n + 2],
                self.agent.state)
            rel_target_vel = util.coord_change2b(
                target_b_state[self.target_dim * n:self.target_dim * n + 2],
                alpha_b + self.agent.state[-1])
            self.state.extend([
                r_b, alpha_b, rel_target_vel[0], rel_target_vel[1],
                np.log(
                    LA.det(target_b_cov[self.target_dim * n:self.target_dim *
                                        (n + 1), self.target_dim *
                                        n:self.target_dim * (n + 1)])),
                float(observed[n])
            ])

        self.state.extend([obstacles_pt[0], obstacles_pt[1]])
        self.state = np.array(self.state)
        return self.state, reward, done, {'test_reward': test_reward}
Beispiel #8
0
    def update(self, observed, z_t, x_t):
        # Kalman Filter Prediction and Update

        # Prediction
        state_predicted = np.matmul(self.A, self.state)
        cov_predicted = np.matmul(np.matmul(self.A, self.cov), self.A.T)+  self.W

        # Update
        if observed: 
            r_pred, alpha_pred, diff_pred = util.relative_measure(state_predicted, x_t)
            if self.dim == 2:
                Hmat = np.array([[diff_pred[0],diff_pred[1]],
                            [-diff_pred[1]/r_pred, diff_pred[0]/r_pred]])/r_pred
            elif self.dim == 4:
                Hmat = np.array([[diff_pred[0], diff_pred[1], 0.0, 0.0],
                            [-diff_pred[1]/r_pred, diff_pred[0]/r_pred, 0.0, 0.0]])/r_pred
            else:
                raise ValueError('target dimension for KF must be either 2 or 4')
            innov = z_t - np.array([r_pred, alpha_pred])
            innov[1] = util.wrap_around(innov[1])

            R = np.matmul(np.matmul(Hmat, cov_predicted), Hmat.T) \
                                    + self.obs_noise_func((r_pred, alpha_pred))
            K = np.matmul(np.matmul(cov_predicted, Hmat.T), LA.inv(R))
            C = np.eye(self.dim) - np.matmul(K, Hmat)

            cov_new = np.matmul(C, cov_predicted)
            state_new = state_predicted +  np.matmul(K, innov)
        else:
            cov_new = cov_predicted
            state_new = state_predicted

        if LA.det(cov_new) < 1e6:
            self.cov = cov_new
        if not(self.collision_func(state_new[:2])):
            self.state = np.clip(state_new, self.limit[0], self.limit[1])
Beispiel #9
0
    def reset(self, init_random=True):
        if init_random:
            isvalid = False
            while (not isvalid):
                if self.MAP.map is None:
                    a_init = self.agent_init_pos[:2]
                else:
                    a_init = np.random.random(
                        (2, )) * (self.MAP.mapmax -
                                  self.MAP.mapmin) + self.MAP.mapmin
                if not (map_utils.is_collision(self.MAP, a_init, MARGIN2WALL)):
                    rand_ang = np.random.rand() * 2 * np.pi - np.pi
                    t_r = INIT_DISTANCE
                    t_init = np.array([
                        t_r * np.cos(rand_ang), t_r * np.sin(rand_ang)
                    ]) + a_init
                    if (np.sqrt(np.sum((t_init - a_init)**2)) < MARGIN):
                        isvalid = False
                    else:
                        isvalid = not (map_utils.is_collision(
                            self.MAP, t_init, MARGIN2WALL))
            a_init = IGL.SE3Pose(
                np.concatenate(
                    (a_init, [np.random.random() * 2 * np.pi - np.pi])),
                np.array([0, 0, 0, 1]))
        else:
            a_init = IGL.SE3Pose(self.agent_init_pos, np.array([0, 0, 0, 1]))
            t_init = self.target_init_pos

        # Build a target
        target = self.cfg.setup_se2_targets(
            n_targets=self.num_targets,
            init_odom=[
                np.concatenate(
                    (t_init, [np.random.rand() * 2 * np.pi - np.pi]))
            ],
            q=self.q,
            policy=Policy.random_policy(
                0.5, 60
            )  #, collision_func=lambda x: map_utils.is_collision(self.MAP, x, MARGIN2WALL))
        )  # Integrator Ground truth Model
        belief_target = self.cfg.setup_integrator_belief(
            n_targets=self.num_targets,
            q=self.q,
            init_pos=[t_init + 10. * (np.random.rand(2) - 0.5)],
            cov_pos=self.target_init_cov,
            cov_vel=self.target_init_cov,
            init_vel=(0.0, 0.0))
        # Build a robot
        self.agent.reset(a_init, belief_target)
        r, alpha, _ = util.relative_measure(self.agent.get_belief_state()[:2],
                                            self.agent.state)
        self.state = np.array([
            r, alpha, 0.0, 0.0,
            np.log(LA.det(self.agent.get_belief_cov())), 0.0, self.sensor_r,
            np.pi
        ])
        self.target.reset(target)
        self.belief_target.update(self.agent.get_belief_state(),
                                  self.agent.get_belief_cov())

        return np.array(self.state)
Beispiel #10
0
    def reset(self, init_random=True):
        self.state = []
        t_init_sets = []
        t_init_b_sets = []
        if init_random:
            if self.MAP.map is None:
                a_init = self.agent_init_pos
                a_init_igl = IGL.SE3Pose(self.agent_init_pos,
                                         np.array([0, 0, 0, 1]))
            else:
                isvalid = False
                while (not isvalid):
                    a_init = np.random.random((2, )) * (
                        (self.MAP.mapmax - MARGIN) -
                        self.MAP.mapmin) + self.MAP.mapmin - .5 * MARGIN
                    isvalid = not (map_utils.is_collision(
                        self.MAP, a_init, MARGIN2WALL))
                a_init = np.concatenate(
                    (a_init, [np.random.random() * 2 * np.pi - np.pi]))
                a_init_igl = IGL.SE3Pose(a_init, np.array([0, 0, 0, 1]))

            for i in range(self.num_targets):
                isvalid = False
                while (not isvalid):
                    rand_ang = np.random.rand() * 2 * np.pi - np.pi
                    t_r = INIT_DISTANCE
                    t_init = np.array([
                        t_r * np.cos(rand_ang), t_r * np.sin(rand_ang)
                    ]) + a_init[:2]
                    if (np.sqrt(np.sum((t_init - a_init[:2])**2)) < MARGIN):
                        isvalid = False
                    else:
                        isvalid = not (map_utils.is_collision(
                            self.MAP, t_init, MARGIN2WALL))
                t_init_b_sets.append(t_init + 10 * (np.random.rand(2) - 0.5))
                t_init_sets.append(t_init)
                r, alpha, _ = util.relative_measure(t_init_b_sets[-1][:2],
                                                    a_init)
                logdetcov = np.log(
                    LA.det(self.target_init_cov * np.eye(self.target_dim)))
                self.state.extend([r, alpha, 0.0, 0.0, logdetcov, 0.0])

        self.state.extend([self.sensor_r, np.pi])
        self.state = np.array(self.state)
        # Build a target
        target = self.cfg.setup_integrator_targets(
            n_targets=self.num_targets,
            init_pos=t_init_sets,
            init_vel=self.target_init_vel,
            q=self.q,
            max_vel=self.vel_limit[0])  # Integrator Ground truth Model
        belief_target = self.cfg.setup_integrator_belief(
            n_targets=self.num_targets,
            q=self.q,
            init_pos=t_init_b_sets,
            cov_pos=self.target_init_cov,
            cov_vel=self.target_init_cov,
            init_vel=(0.0, 0.0))
        # Build a robot
        self.agent.reset(a_init_igl, belief_target)
        self.targets.reset(target)
        self.belief_targets.update(self.agent.get_belief_state(),
                                   self.agent.get_belief_cov())
        return np.array(self.state)