Ejemplo n.º 1
0
    def reward(self, state):
        # import pdb; pdb.set_trace()
        """
        If you have reached any point, then 1.
        Otherwise if you are closer to the next waypoint then you used to be,
        get some small reward.

        :param state State: full state of the environment
        :return float: the reward
        """

        self._state.update_goal(state.pose)
        robot_pose = state.pose
        spat_dist, ang_dist = pose_distances(self._state.current_goal_pose(),
                                      robot_pose)

        spat_near = spat_dist < self._params.spatial_precision
	
        reward = 0
        if spat_near:
            reward = 200.0
        else:
            reward -= float(not (spat_near))

        if state.robot_collided:
            reward -= 100

        return reward
Ejemplo n.º 2
0
def test_pose_distances():
    np.testing.assert_almost_equal(
        pose_distances(np.array([0., 0., 0.]), np.array([1., 1., 1])),
        (np.sqrt(2), 1))

    np.testing.assert_almost_equal(
        pose_distances(np.array([-1., -1., -1]), np.array([1., 1., 1])),
        (2 * np.sqrt(2), 2))

    np.testing.assert_almost_equal(
        pose_distances(np.array([-1., -1., -np.pi + 0.1]),
                       np.array([1., 1., np.pi - 0.1])), (2 * np.sqrt(2), 0.2))

    np.testing.assert_almost_equal(
        pose_distances(np.array([[0., 0., 0.], [-1., -1., -np.pi + 0.1]]),
                       np.array([[1., 1., 1], [1., 1., np.pi - 0.1]])),
        [[np.sqrt(2), 2 * np.sqrt(2)], [1, 0.2]])
Ejemplo n.º 3
0
    def reward(self, state):
        """
        If you have reached any point, then 1.
        Otherwise if you are closer to the next waypoint then you used to be,
        get some small reward.

        :param state State: full state of the environment
        :return float: the reward
        """
        if self._state.done():
            return 0.0

        # See if you have reached any new point
        last_reached_idx = find_last_reached(state.pose, self._state.path,
                                             self._params.spatial_precision,
                                             self._params.angular_precision)

        if last_reached_idx is not None and last_reached_idx >= self._state.target_idx:
            # assign the reward for reaching a waypoint
            next_idx = last_reached_idx + 1

            self._state.target_idx = next_idx

            # set up the new waypoint
            if not self._state.done():
                dist_to_goal, _ = pose_distances(
                    self._state.current_goal_pose(), state.pose)
                self._state.min_spat_dist_so_far = dist_to_goal
            else:
                self._state.min_spat_dist_so_far = 0.0

            return 1.0
        else:
            # maybe we get some progress towards current waypoint
            dist_to_goal, _ = pose_distances(self._state.current_goal_pose(),
                                             state.pose)

            if dist_to_goal < self._state.min_spat_dist_so_far:
                # We progressed toward the current waypoint
                reward = self._state.min_spat_dist_so_far - dist_to_goal
                self._state.min_spat_dist_so_far = dist_to_goal
                return reward * self._params.spatial_progress_multiplier
            else:
                # We didn't do any progress
                return 0.0
Ejemplo n.º 4
0
    def done(self, state):
        """ Are we done?
        :param state: current state of the environment
        :return bool: True if we are done with this environment. """
        robot_pose = state.pose
        spat_dist, _ = pose_distances(self.current_goal_pose(), robot_pose)
        spat_near = spat_dist < 1.0
        goal_reached = spat_near

        return goal_reached
Ejemplo n.º 5
0
    def done(self, state, spatial_precision, angular_precision):
        """ Are we done?
        :param state: current state of the environment
        :return bool: True if we are done with this environment. """
        robot_pose = state.pose
        spat_dist, angular_dist = pose_distances(self.current_goal_pose(), robot_pose)
        spat_near = spat_dist < spatial_precision
#         angular_near = angular_dist < angular_precision
#         goal_reached = spat_near and angular_near

        return spat_near
Ejemplo n.º 6
0
    def generate_initial_state(path, params):  # pylint: disable=unused-argument
        """ Generate the initial state of the reward provider.
        :param path np.ndarray(N, 3): the static path
        :param params RewardParams: parametrization of the reward provider, not used here but kept it for consistent API call
        :return ContinuousRewardProviderState: the initial state of the reward provider
        """
        initial_pose = path[0]

        target_idx = 1
        goal_pose = path[-1]
        dist_to_goal, _ = pose_distances(goal_pose, initial_pose)

        return ContinuousRewardPurePursuitProviderState(
            min_spat_dist_so_far=dist_to_goal,
            path=path,
            target_idx=target_idx)
Ejemplo n.º 7
0
def _sample_mini_env_params(gen_params, rng):
    """
    Sample mini env instance based on passed params and check if the start and end poses
    are not colliding.
    If you can't sample, raise an error.

    :param gen_params RandomMiniEnvParams: params of the random mini env
    :param rng np.random.RandomState: independent random state
    :return MiniEnvParams: sampled params of the environment
    """
    for _ in range(1000):
        # We need to check for collisions of start and
        try:
            drawn_params = _sample_mini_env_params_no_final_check(
                gen_params, rng)
        except SpaceSeemsEmptyError:
            continue

        costmap, static_path = prepare_map_and_path(drawn_params)
        robot = TricycleRobot(dimensions=get_dimensions_example(
            gen_params.env_params.robot_name))
        robot.set_front_wheel_angle(gen_params.env_params.initial_wheel_angle)

        x, y, angle = static_path[0]
        first_pose_collides = pose_collides(x, y, angle, robot, costmap)

        x, y, angle = static_path[1]
        second_pose_collides = pose_collides(x, y, angle, robot, costmap)

        collides = first_pose_collides or second_pose_collides

        # are beg and goal poses not immediately too close
        cart_dist, ang_dist = pose_distances(static_path[0], static_path[1])
        cart_near = cart_dist < gen_params.env_params.goal_spat_dist
        ang_near = ang_dist < gen_params.env_params.goal_ang_dist
        too_close = cart_near and ang_near

        if not collides and not too_close:
            return drawn_params

    raise ValueError("Something went wrong, the sampling space looks empty.")
Ejemplo n.º 8
0
    def generate_initial_state(path, params):
        """ Generate the initial state of the reward provider.
        :param path np.ndarray(N, 3): the static path
        :param params RewardParams: parametrization of the reward provider
        :return ContinuousRewardProviderState: the initial state of the reward provider
        """
        initial_pose = path[0]
        last_reached_idx = find_last_reached(initial_pose, path,
                                             params.spatial_precision,
                                             params.angular_precision)

        if last_reached_idx == len(path) - 1:
            # We are out of path to follow at the beginning!
            raise ValueError("Goal pose too close to initial pose")
        else:
            target_idx = last_reached_idx + 1

        goal_pose = path[target_idx]
        dist_to_goal, _ = pose_distances(goal_pose, initial_pose)

        return ContinuousRewardProviderState(min_spat_dist_so_far=dist_to_goal,
                                             path=path,
                                             target_idx=target_idx)
Ejemplo n.º 9
0
    def reward(self, state):
        """
        If you have reached any point, then 1.
        Otherwise if you are closer to the next waypoint then you used to be,
        get some small reward.

        :param state State: full state of the environment
        :return float: the reward
        """

        self._state.update_goal(state.pose)

        reward = -0.05

        dist_to_goal, _ = pose_distances(self._state.current_goal_pose(),
                                         state.pose)
        reward += self._state.min_spat_dist_so_far - dist_to_goal
        self._state.min_spat_dist_so_far = dist_to_goal

        if state.robot_collided:
            reward -= 100

        return reward