コード例 #1
0
    def reset(self):
        # reset simulation
        del self.platform

        # initialize simulation
        initial_robot_position = (
            TriFingerPlatform.spaces.robot_position.default)
        initial_object_pose = self.initializer.get_initial_state()
        goal_object_pose = self.initializer.get_goal()

        self.platform = TriFingerPlatform(
            visualization=self.visualization,
            initial_robot_position=initial_robot_position,
            initial_object_pose=initial_object_pose,
        )

        self.goal = {
            "position": goal_object_pose.position,
            "orientation": goal_object_pose.orientation,
        }

        # visualize the goal
        if self.visualization:
            self.goal_marker = visual_objects.CubeMarker(
                width=0.065,
                position=goal_object_pose.position,
                orientation=goal_object_pose.orientation,
                physicsClientId=self.platform.simfinger._pybullet_client_id,
            )

        self.info = {"difficulty": self.initializer.difficulty}

        self.step_count = 0

        return self._create_observation(0)
コード例 #2
0
    def reset(self):
        # reset simulation
        del self.platform
        self.platform = TriFingerPlatform(
            visualization=self.visualization,
            initial_robot_position=TriFingerPlatform.spaces.robot_position.
            default,
            initial_object_pose=self.initializer.get_initial_state(),
        )

        goal = self.initializer.get_goal()
        self.goal = {
            "position": goal.position,
            "orientation": goal.orientation,
        }

        # visualize the goal
        if self.visualization:
            self.goal_marker = visual_objects.CubeMarker(
                width=0.065,
                position=goal.position,
                orientation=goal.orientation,
            )

        self.info = {"difficulty": self.initializer.difficulty}

        self.step_count = 0

        return self._create_observation(0)
コード例 #3
0
    def reset(self):
        """Reset the environment."""
        # hard-reset simulation
        del self.platform

        # initialize simulation
        initial_robot_position = (
            TriFingerPlatform.spaces.robot_position.default)
        # initialize cube at the centre
        initial_object_pose = mct.move_cube.Pose(
            position=mct.INITIAL_CUBE_POSITION)

        self.platform = TriFingerPlatform(
            visualization=self.visualization,
            initial_robot_position=initial_robot_position,
            initial_object_pose=initial_object_pose,
        )

        # get goal trajectory from the initializer
        trajectory = self.initializer.get_trajectory()

        # visualize the goal
        if self.visualization:
            self.goal_marker = visual_objects.CubeMarker(
                width=mct.move_cube._CUBE_WIDTH,
                position=trajectory[0][1],
                orientation=(0, 0, 0, 1),
                pybullet_client_id=self.platform.simfinger._pybullet_client_id,
            )

        self.info = {"time_index": -1, "trajectory": trajectory}

        self.step_count = 0

        return self._create_observation(0)
コード例 #4
0
    def reset(self):
        # reset simulation
        del self.platform

        # initialize simulation
        if self.initializer is None:
            # if no initializer is given (which will be the case during training),
            # we can initialize in any way desired. here, we initialize the cube always
            # in the center of the arena, instead of randomly, as this appears to help
            # training
            initial_robot_position = (
                TriFingerPlatform.spaces.robot_position.default
            )
            default_object_position = (
                TriFingerPlatform.spaces.object_position.default
            )
            default_object_orientation = (
                TriFingerPlatform.spaces.object_orientation.default
            )
            initial_object_pose = move_cube.Pose(
                position=default_object_position,
                orientation=default_object_orientation,
            )
            goal_object_pose = move_cube.sample_goal(difficulty=1)
        else:
            # if an initializer is given, i.e. during evaluation, we need to initialize
            # according to it, to make sure we remain coherent with the standard CubeEnv.
            # otherwise the trajectories produced during evaluation will be invalid.
            initial_robot_position = (
                TriFingerPlatform.spaces.robot_position.default
            )
            initial_object_pose = self.initializer.get_initial_state()
            goal_object_pose = self.initializer.get_goal()

        self.platform = TriFingerPlatform(
            visualization=self.visualization,
            initial_robot_position=initial_robot_position,
            initial_object_pose=initial_object_pose,
        )

        self.goal = {
            "position": goal_object_pose.position,
            "orientation": goal_object_pose.orientation,
        }
        # visualize the goal
        if self.visualization:
            self.goal_marker = visual_objects.CubeMarker(
                width=0.065,
                position=goal_object_pose.position,
                orientation=goal_object_pose.orientation,
                pybullet_client_id=self.platform.simfinger._pybullet_client_id,
            )

        self.info = dict()

        self.step_count = 0

        return self._create_observation(0)
コード例 #5
0
    def test_object_pose_observation(self):
        Pose = namedtuple("Pose", ["position", "orientation"])
        pose = Pose([0.1, -0.5, 0], [0, 0, 0.2084599, 0.97803091])

        platform = TriFingerPlatform(initial_object_pose=pose)
        t = platform.append_desired_action(platform.Action())
        obs = platform.get_camera_observation(t)

        np.testing.assert_array_equal(obs.object_pose.position, pose.position)
        np.testing.assert_array_almost_equal(obs.object_pose.orientation,
                                             pose.orientation)
コード例 #6
0
    def test_timestamps(self):
        platform = TriFingerPlatform(visualization=False, enable_cameras=True)
        action = platform.Action()

        # ensure the camera frame rate is set to 10 Hz
        self.assertEqual(platform.camera_rate_fps, 10)

        # compute camera update step interval based on configured rates
        camera_update_step_interval = (
            1 / platform.camera_rate_fps) / platform._time_step
        # robot time step in milliseconds
        time_step_ms = platform._time_step * 1000

        # First time step
        t = platform.append_desired_action(action)
        first_stamp_ms = platform.get_timestamp_ms(t)
        first_stamp_s = first_stamp_ms / 1000

        camera_obs = platform.get_camera_observation(t)
        self.assertEqual(first_stamp_ms, camera_obs.cameras[0].timestamp)
        self.assertEqual(first_stamp_ms, camera_obs.cameras[1].timestamp)
        self.assertEqual(first_stamp_ms, camera_obs.cameras[2].timestamp)

        # Test time stamps of observations t+1
        camera_obs_next = platform.get_camera_observation(t + 1)
        next_stamp_ms = first_stamp_ms + time_step_ms
        self.assertEqual(next_stamp_ms, camera_obs_next.cameras[0].timestamp)
        self.assertEqual(next_stamp_ms, camera_obs_next.cameras[1].timestamp)
        self.assertEqual(next_stamp_ms, camera_obs_next.cameras[2].timestamp)

        # Second time step
        t = platform.append_desired_action(action)
        second_stamp_ms = platform.get_timestamp_ms(t)
        self.assertEqual(second_stamp_ms, first_stamp_ms + time_step_ms)

        # there should not be a new camera observation yet
        camera_obs = platform.get_camera_observation(t)
        self.assertEqual(first_stamp_s, camera_obs.cameras[0].timestamp)
        self.assertEqual(first_stamp_s, camera_obs.cameras[1].timestamp)
        self.assertEqual(first_stamp_s, camera_obs.cameras[2].timestamp)

        # do several steps until a new camera/object update is expected
        # (-1 because there is already one action appended above for the
        # "second time step")
        for _ in range(int(camera_update_step_interval - 1)):
            t = platform.append_desired_action(action)

        nth_stamp_ms = platform.get_timestamp_ms(t)
        nth_stamp_s = nth_stamp_ms / 1000
        self.assertGreater(nth_stamp_ms, second_stamp_ms)

        camera_obs = platform.get_camera_observation(t)
        self.assertEqual(nth_stamp_s, camera_obs.cameras[0].timestamp)
        self.assertEqual(nth_stamp_s, camera_obs.cameras[1].timestamp)
        self.assertEqual(nth_stamp_s, camera_obs.cameras[2].timestamp)
コード例 #7
0
    def test_get_camera_observation_timeindex(self):
        platform = TriFingerPlatform(enable_cameras=True)

        # negative time index needs to be rejected
        with self.assertRaises(ValueError):
            platform.get_camera_observation(-1)

        t = platform.append_desired_action(platform.Action())
        try:
            platform.get_camera_observation(t)
            platform.get_camera_observation(t + 1)
        except Exception:
            self.fail()

        with self.assertRaises(ValueError):
            platform.get_camera_observation(t + 2)
コード例 #8
0
class ExamplePushingTrainingEnv(gym.Env):
    """Gym environment for moving cubes with simulated TriFingerPro."""
    def __init__(
        self,
        initializer=None,
        action_type=cube_env.ActionType.POSITION,
        frameskip=1,
        visualization=False,
    ):
        """Initialize.

        Args:
            initializer: Initializer class for providing initial cube pose and
                goal pose. If no initializer is provided, we will initialize in a way
                which is be helpful for learning.
            action_type (ActionType): Specify which type of actions to use.
                See :class:`ActionType` for details.
            frameskip (int):  Number of actual control steps to be performed in
                one call of step().
            visualization (bool): If true, the pyBullet GUI is run for
                visualization.
        """
        # Basic initialization
        # ====================

        self.initializer = initializer
        self.action_type = action_type
        self.visualization = visualization

        if frameskip < 1:
            raise ValueError("frameskip cannot be less than 1.")
        self.frameskip = frameskip

        # will be initialized in reset()
        self.platform = None

        # Create the action and observation spaces
        # ========================================

        spaces = TriFingerPlatform.spaces

        if self.action_type == cube_env.ActionType.TORQUE:
            self.action_space = spaces.robot_torque.gym
        elif self.action_type == cube_env.ActionType.POSITION:
            self.action_space = spaces.robot_position.gym
        elif self.action_type == cube_env.ActionType.TORQUE_AND_POSITION:
            self.action_space = gym.spaces.Dict({
                "torque":
                spaces.robot_torque.gym,
                "position":
                spaces.robot_position.gym,
            })
        else:
            raise ValueError("Invalid action_type")

        self.observation_names = [
            "robot_position",
            "robot_velocity",
            "robot_tip_positions",
            "object_position",
            "object_orientation",
            "goal_object_position",
        ]

        self.observation_space = gym.spaces.Dict({
            "robot_position":
            spaces.robot_position.gym,
            "robot_velocity":
            spaces.robot_velocity.gym,
            "robot_tip_positions":
            gym.spaces.Box(
                low=np.array([spaces.object_position.low] * 3),
                high=np.array([spaces.object_position.high] * 3),
            ),
            "object_position":
            spaces.object_position.gym,
            "object_orientation":
            spaces.object_orientation.gym,
            "goal_object_position":
            spaces.object_position.gym,
        })

    def step(self, action):
        if self.platform is None:
            raise RuntimeError("Call `reset()` before starting to step.")

        if not self.action_space.contains(action):
            raise ValueError(
                "Given action is not contained in the action space.")

        num_steps = self.frameskip

        # ensure episode length is not exceeded due to frameskip
        step_count_after = self.step_count + num_steps
        if step_count_after > move_cube.episode_length:
            excess = step_count_after - move_cube.episode_length
            num_steps = max(1, num_steps - excess)

        reward = 0.0
        for _ in range(num_steps):
            self.step_count += 1
            if self.step_count > move_cube.episode_length:
                raise RuntimeError("Exceeded number of steps for one episode.")

            # send action to robot
            robot_action = self._gym_action_to_robot_action(action)
            t = self.platform.append_desired_action(robot_action)

            previous_observation = self._create_observation(t)
            observation = self._create_observation(t + 1)

            reward += self._compute_reward(
                previous_observation=previous_observation,
                observation=observation,
            )

        is_done = self.step_count == move_cube.episode_length

        return observation, reward, is_done, self.info

    def reset(self):
        # reset simulation
        del self.platform

        # initialize simulation
        if self.initializer is None:
            # if no initializer is given (which will be the case during training),
            # we can initialize in any way desired. here, we initialize the cube always
            # in the center of the arena, instead of randomly, as this appears to help
            # training
            initial_robot_position = (
                TriFingerPlatform.spaces.robot_position.default)
            default_object_position = (
                TriFingerPlatform.spaces.object_position.default)
            default_object_orientation = (
                TriFingerPlatform.spaces.object_orientation.default)
            initial_object_pose = move_cube.Pose(
                position=default_object_position,
                orientation=default_object_orientation,
            )
            goal_object_pose = move_cube.sample_goal(difficulty=1)
        else:
            # if an initializer is given, i.e. during evaluation, we need to initialize
            # according to it, to make sure we remain coherent with the standard CubeEnv.
            # otherwise the trajectories produced during evaluation will be invalid.
            initial_robot_position = (
                TriFingerPlatform.spaces.robot_position.default)
            initial_object_pose = self.initializer.get_initial_state()
            goal_object_pose = self.initializer.get_goal()

        self.platform = TriFingerPlatform(
            visualization=self.visualization,
            initial_robot_position=initial_robot_position,
            initial_object_pose=initial_object_pose,
        )

        self.goal = {
            "position": goal_object_pose.position,
            "orientation": goal_object_pose.orientation,
        }
        # visualize the goal
        if self.visualization:
            self.goal_marker = visual_objects.CubeMarker(
                width=0.065,
                position=goal_object_pose.position,
                orientation=goal_object_pose.orientation,
                physicsClientId=self.platform.simfinger._pybullet_client_id,
            )

        self.info = dict()

        self.step_count = 0

        return self._create_observation(0)

    def seed(self, seed=None):
        self.np_random, seed = gym.utils.seeding.np_random(seed)
        move_cube.random = self.np_random
        return [seed]

    def _create_observation(self, t):
        robot_observation = self.platform.get_robot_observation(t)
        object_observation = self.platform.get_object_pose(t)
        robot_tip_positions = self.platform.forward_kinematics(
            robot_observation.position)
        robot_tip_positions = np.array(robot_tip_positions)

        observation = {
            "robot_position": robot_observation.position,
            "robot_velocity": robot_observation.velocity,
            "robot_tip_positions": robot_tip_positions,
            "object_position": object_observation.position,
            "object_orientation": object_observation.orientation,
            "goal_object_position": self.goal["position"],
        }
        return observation

    @staticmethod
    def _compute_reward(previous_observation, observation):

        # calculate first reward term
        current_distance_from_block = np.linalg.norm(
            observation["robot_tip_positions"] -
            observation["object_position"])
        previous_distance_from_block = np.linalg.norm(
            previous_observation["robot_tip_positions"] -
            previous_observation["object_position"])

        reward_term_1 = (previous_distance_from_block -
                         current_distance_from_block)

        # calculate second reward term
        current_dist_to_goal = np.linalg.norm(
            observation["goal_object_position"] -
            observation["object_position"])
        previous_dist_to_goal = np.linalg.norm(
            previous_observation["goal_object_position"] -
            previous_observation["object_position"])
        reward_term_2 = previous_dist_to_goal - current_dist_to_goal

        reward = 750 * reward_term_1 + 250 * reward_term_2

        return reward

    def _gym_action_to_robot_action(self, gym_action):
        # construct robot action depending on action type
        if self.action_type == cube_env.ActionType.TORQUE:
            robot_action = self.platform.Action(torque=gym_action)
        elif self.action_type == cube_env.ActionType.POSITION:
            robot_action = self.platform.Action(position=gym_action)
        elif self.action_type == cube_env.ActionType.TORQUE_AND_POSITION:
            robot_action = self.platform.Action(
                torque=gym_action["torque"], position=gym_action["position"])
        else:
            raise ValueError("Invalid action_type")

        return robot_action
コード例 #9
0
class CubeEnv(gym.GoalEnv):
    """Gym environment for moving cubes with simulated TriFingerPro."""
    def __init__(
        self,
        initializer,
        action_type=ActionType.POSITION,
        frameskip=1,
        visualization=False,
    ):
        """Initialize.

        Args:
            initializer: Initializer class for providing initial cube pose and
                goal pose.  See :class:`RandomInitializer` and
                :class:`FixedInitializer`.
            action_type (ActionType): Specify which type of actions to use.
                See :class:`ActionType` for details.
            frameskip (int):  Number of actual control steps to be performed in
                one call of step().
            visualization (bool): If true, the pyBullet GUI is run for
                visualization.
        """
        # Basic initialization
        # ====================

        self.initializer = initializer
        self.action_type = action_type
        self.visualization = visualization

        # TODO: The name "frameskip" makes sense for an atari environment but
        # not really for our scenario.  The name is also misleading as
        # "frameskip = 1" suggests that one frame is skipped while it actually
        # means "do one step per step" (i.e. no skip).
        if frameskip < 1:
            raise ValueError("frameskip cannot be less than 1.")
        self.frameskip = frameskip

        # will be initialized in reset()
        self.platform = None

        # Create the action and observation spaces
        # ========================================

        spaces = TriFingerPlatform.spaces

        object_state_space = gym.spaces.Dict({
            "position":
            spaces.object_position.gym,
            "orientation":
            spaces.object_orientation.gym,
        })

        if self.action_type == ActionType.TORQUE:
            self.action_space = spaces.robot_torque.gym
        elif self.action_type == ActionType.POSITION:
            self.action_space = spaces.robot_position.gym
        elif self.action_type == ActionType.TORQUE_AND_POSITION:
            self.action_space = gym.spaces.Dict({
                "torque":
                spaces.robot_torque.gym,
                "position":
                spaces.robot_position.gym,
            })
        else:
            raise ValueError("Invalid action_type")

        self.observation_space = gym.spaces.Dict({
            "observation":
            gym.spaces.Dict({
                "position": spaces.robot_position.gym,
                "velocity": spaces.robot_velocity.gym,
                "torque": spaces.robot_torque.gym,
            }),
            "desired_goal":
            object_state_space,
            "achieved_goal":
            object_state_space,
        })

    def compute_reward(self, achieved_goal, desired_goal, info):
        """Compute the reward for the given achieved and desired goal.
        Args:
            achieved_goal (dict): Current pose of the object.
            desired_goal (dict): Goal pose of the object.
            info (dict): An info dictionary containing a field "difficulty"
                which specifies the difficulty level.
        Returns:
            float: The reward that corresponds to the provided achieved goal
            w.r.t. to the desired goal. Note that the following should always
            hold true::
                ob, reward, done, info = env.step()
                assert reward == env.compute_reward(
                    ob['achieved_goal'],
                    ob['desired_goal'],
                    info,
                )
        """
        return -move_cube.evaluate_state(
            move_cube.Pose.from_dict(desired_goal),
            move_cube.Pose.from_dict(achieved_goal),
            info["difficulty"],
        )

    def step(self, action):
        """Run one timestep of the environment's dynamics.
        When end of episode is reached, you are responsible for calling
        ``reset()`` to reset this environment's state.
        Args:
            action: An action provided by the agent (depends on the selected
                :class:`ActionType`).
        Returns:
            tuple:
            - observation (dict): agent's observation of the current
              environment.
            - reward (float) : amount of reward returned after previous action.
            - done (bool): whether the episode has ended, in which case further
              step() calls will return undefined results.
            - info (dict): info dictionary containing the difficulty level of
              the goal.
        """
        if self.platform is None:
            raise RuntimeError("Call `reset()` before starting to step.")

        if not self.action_space.contains(action):
            raise ValueError(
                "Given action is not contained in the action space.")

        num_steps = self.frameskip

        # ensure episode length is not exceeded due to frameskip
        step_count_after = self.step_count + num_steps
        if step_count_after > move_cube.episode_length:
            excess = step_count_after - move_cube.episode_length
            num_steps = max(1, num_steps - excess)

        reward = 0.0
        for _ in range(num_steps):
            self.step_count += 1
            if self.step_count > move_cube.episode_length:
                raise RuntimeError("Exceeded number of steps for one episode.")

            # send action to robot
            robot_action = self._gym_action_to_robot_action(action)
            t = self.platform.append_desired_action(robot_action)

            # Use observations of step t + 1 to follow what would be expected
            # in a typical gym environment.  Note that on the real robot, this
            # will not be possible
            observation = self._create_observation(t + 1)

            reward += self.compute_reward(
                observation["achieved_goal"],
                observation["desired_goal"],
                self.info,
            )

        is_done = self.step_count == move_cube.episode_length

        return observation, reward, is_done, self.info

    def reset(self):
        # reset simulation
        del self.platform
        self.platform = TriFingerPlatform(
            visualization=self.visualization,
            initial_robot_position=TriFingerPlatform.spaces.robot_position.
            default,
            initial_object_pose=self.initializer.get_initial_state(),
        )

        goal = self.initializer.get_goal()
        self.goal = {
            "position": goal.position,
            "orientation": goal.orientation,
        }

        # visualize the goal
        if self.visualization:
            self.goal_marker = visual_objects.CubeMarker(
                width=0.065,
                position=goal.position,
                orientation=goal.orientation,
            )

        self.info = {"difficulty": self.initializer.difficulty}

        self.step_count = 0

        return self._create_observation(0)

    def seed(self, seed=None):
        """Sets the seed for this env’s random number generator.
        .. note::
           Spaces need to be seeded separately.  E.g. if you want to sample
           actions directly from the action space using
           ``env.action_space.sample()`` you can set a seed there using
           ``env.action_space.seed()``.
        Returns:
            List of seeds used by this environment.  This environment only uses
            a single seed, so the list contains only one element.
        """
        self.np_random, seed = gym.utils.seeding.np_random(seed)
        move_cube.random = self.np_random
        return [seed]

    def _create_observation(self, t):
        robot_observation = self.platform.get_robot_observation(t)
        object_observation = self.platform.get_object_pose(t)

        observation = {
            "observation": {
                "position": robot_observation.position,
                "velocity": robot_observation.velocity,
                "torque": robot_observation.torque,
            },
            "desired_goal": self.goal,
            "achieved_goal": {
                "position": object_observation.position,
                "orientation": object_observation.orientation,
            },
        }
        return observation

    def _gym_action_to_robot_action(self, gym_action):
        # construct robot action depending on action type
        if self.action_type == ActionType.TORQUE:
            robot_action = self.platform.Action(torque=gym_action)
        elif self.action_type == ActionType.POSITION:
            robot_action = self.platform.Action(position=gym_action)
        elif self.action_type == ActionType.TORQUE_AND_POSITION:
            robot_action = self.platform.Action(
                torque=gym_action["torque"], position=gym_action["position"])
        else:
            raise ValueError("Invalid action_type")

        return robot_action
コード例 #10
0
class CubeTrajectoryEnv(gym.Env):
    """Gym environment for moving cubes with simulated TriFingerPro."""
    def __init__(
        self,
        initializer: Initializer,
        action_type: ActionType = ActionType.POSITION,
        step_size: int = 1,
        visualization: bool = False,
    ):
        """Initialize.

        Args:
            initializer: Initializer class for providing goal trajectories.
                See :class:`RandomInitializer` and :class:`FixedInitializer`.
            action_type (ActionType): Specify which type of actions to use.
                See :class:`ActionType` for details.
            step_size (int):  Number of actual control steps to be performed in
                one call of step().
            visualization (bool): If true, the pyBullet GUI is run for
                visualization.
        """
        # Basic initialization
        # ====================

        self.initializer = initializer
        self.action_type = action_type
        self.visualization = visualization

        if step_size < 1:
            raise ValueError("step_size cannot be less than 1.")
        self.step_size = step_size

        # will be initialized in reset()
        self.platform = None

        # Create the action and observation spaces
        # ========================================

        spaces = TriFingerPlatform.spaces

        if self.action_type == ActionType.TORQUE:
            self.action_space = spaces.robot_torque.gym
        elif self.action_type == ActionType.POSITION:
            self.action_space = spaces.robot_position.gym
        elif self.action_type == ActionType.TORQUE_AND_POSITION:
            self.action_space = gym.spaces.Dict({
                "torque":
                spaces.robot_torque.gym,
                "position":
                spaces.robot_position.gym,
            })
        else:
            raise ValueError("Invalid action_type")

        self.observation_space = gym.spaces.Dict({
            "observation":
            gym.spaces.Dict({
                "position": spaces.robot_position.gym,
                "velocity": spaces.robot_velocity.gym,
                "torque": spaces.robot_torque.gym,
            }),
            "desired_goal":
            spaces.object_position.gym,
            "achieved_goal":
            spaces.object_position.gym,
        })

    def compute_reward(
        self,
        achieved_goal: mct.Position,
        desired_goal: mct.Position,
        info: dict,
    ) -> float:
        """Compute the reward for the given achieved and desired goal.

        Args:
            achieved_goal: Current position of the object.
            desired_goal: Goal trajectory of the object.
            info: An info dictionary containing a field "time_index" which
                contains the time index of the achieved_goal.

        Returns:
            The reward that corresponds to the provided achieved goal w.r.t. to
            the desired goal. Note that the following should always hold true::

                ob, reward, done, info = env.step()
                assert reward == env.compute_reward(
                    ob['achieved_goal'],
                    ob['desired_goal'],
                    info,
                )
        """
        # This is just some sanity check to verify that the given desired_goal
        # actually matches with the active goal in the trajectory.
        active_goal = np.asarray(
            mct.get_active_goal(self.info["trajectory"],
                                self.info["time_index"]))
        assert np.all(active_goal == desired_goal), "{}: {} != {}".format(
            info["time_index"], active_goal, desired_goal)

        return -mct.evaluate_state(info["trajectory"], info["time_index"],
                                   achieved_goal)

    def step(self, action):
        """Run one timestep of the environment's dynamics.

        When end of episode is reached, you are responsible for calling
        ``reset()`` to reset this environment's state.

        Args:
            action: An action provided by the agent (depends on the selected
                :class:`ActionType`).

        Returns:
            tuple:

            - observation (dict): agent's observation of the current
              environment.
            - reward (float): amount of reward returned after previous action.
            - done (bool): whether the episode has ended, in which case further
              step() calls will return undefined results.
            - info (dict): info dictionary containing the current time index.
        """
        if self.platform is None:
            raise RuntimeError("Call `reset()` before starting to step.")

        if not self.action_space.contains(action):
            raise ValueError(
                "Given action is not contained in the action space.")

        num_steps = self.step_size

        # ensure episode length is not exceeded due to step_size
        step_count_after = self.step_count + num_steps
        if step_count_after > mct.EPISODE_LENGTH:
            excess = step_count_after - mct.EPISODE_LENGTH
            num_steps = max(1, num_steps - excess)

        reward = 0.0
        for _ in range(num_steps):
            self.step_count += 1
            if self.step_count > mct.EPISODE_LENGTH:
                raise RuntimeError("Exceeded number of steps for one episode.")

            # send action to robot
            robot_action = self._gym_action_to_robot_action(action)
            t = self.platform.append_desired_action(robot_action)

            # update goal visualization
            if self.visualization:
                goal_position = mct.get_active_goal(self.info["trajectory"], t)
                self.goal_marker.set_state(goal_position, (0, 0, 0, 1))

            # Use observations of step t + 1 to follow what would be expected
            # in a typical gym environment.  Note that on the real robot, this
            # will not be possible
            self.info["time_index"] = t + 1

            # Alternatively use the observation of step t.  This is the
            # observation from the moment before action_t is applied, i.e. the
            # result of that action is not yet visible in this observation.
            #
            # When using this observation, the resulting cumulative reward
            # should match exactly the one computed during replay (with the
            # above it will differ slightly).
            #
            # self.info["time_index"] = t

            observation = self._create_observation(self.info["time_index"])

            reward += self.compute_reward(
                observation["achieved_goal"],
                observation["desired_goal"],
                self.info,
            )

        is_done = self.step_count >= mct.EPISODE_LENGTH

        return observation, reward, is_done, self.info

    def reset(self):
        """Reset the environment."""
        # hard-reset simulation
        del self.platform

        # initialize simulation
        initial_robot_position = (
            TriFingerPlatform.spaces.robot_position.default)
        # initialize cube at the centre
        initial_object_pose = mct.move_cube.Pose(
            position=mct.INITIAL_CUBE_POSITION)

        self.platform = TriFingerPlatform(
            visualization=self.visualization,
            initial_robot_position=initial_robot_position,
            initial_object_pose=initial_object_pose,
        )

        # get goal trajectory from the initializer
        trajectory = self.initializer.get_trajectory()

        # visualize the goal
        if self.visualization:
            self.goal_marker = visual_objects.CubeMarker(
                width=mct.move_cube._CUBE_WIDTH,
                position=trajectory[0][1],
                orientation=(0, 0, 0, 1),
                pybullet_client_id=self.platform.simfinger._pybullet_client_id,
            )

        self.info = {"time_index": -1, "trajectory": trajectory}

        self.step_count = 0

        return self._create_observation(0)

    def seed(self, seed=None):
        """Sets the seed for this env’s random number generator.

        .. note::

           Spaces need to be seeded separately.  E.g. if you want to sample
           actions directly from the action space using
           ``env.action_space.sample()`` you can set a seed there using
           ``env.action_space.seed()``.

        Returns:
            List of seeds used by this environment.  This environment only uses
            a single seed, so the list contains only one element.
        """
        self.np_random, seed = gym.utils.seeding.np_random(seed)
        mct.move_cube.random = self.np_random
        return [seed]

    def _create_observation(self, t):
        robot_observation = self.platform.get_robot_observation(t)
        camera_observation = self.platform.get_camera_observation(t)
        object_observation = camera_observation.filtered_object_pose

        active_goal = np.asarray(
            mct.get_active_goal(self.info["trajectory"], t))

        observation = {
            "observation": {
                "position": robot_observation.position,
                "velocity": robot_observation.velocity,
                "torque": robot_observation.torque,
            },
            "desired_goal": active_goal,
            "achieved_goal": object_observation.position,
        }

        return observation

    def _gym_action_to_robot_action(self, gym_action):
        # construct robot action depending on action type
        if self.action_type == ActionType.TORQUE:
            robot_action = self.platform.Action(torque=gym_action)
        elif self.action_type == ActionType.POSITION:
            robot_action = self.platform.Action(position=gym_action)
        elif self.action_type == ActionType.TORQUE_AND_POSITION:
            robot_action = self.platform.Action(
                torque=gym_action["torque"], position=gym_action["position"])
        else:
            raise ValueError("Invalid action_type")

        return robot_action
コード例 #11
0
def test_timestamps_no_camera_delay():
    platform = TriFingerPlatform(
        visualization=False, enable_cameras=True, camera_delay_steps=0
    )
    action = platform.Action()

    # ensure the camera frame rate is set to 10 Hz
    assert platform.camera_rate_fps == 10

    # compute camera update step interval based on configured rates
    camera_update_step_interval = (
        1 / platform.camera_rate_fps
    ) / platform._time_step
    # robot time step in milliseconds
    time_step_ms = platform._time_step * 1000

    # First time step
    t = platform.append_desired_action(action)
    first_stamp_ms = platform.get_timestamp_ms(t)
    first_stamp_s = first_stamp_ms / 1000

    camera_obs = platform.get_camera_observation(t)
    assert first_stamp_s == camera_obs.cameras[0].timestamp
    assert first_stamp_s == camera_obs.cameras[1].timestamp
    assert first_stamp_s == camera_obs.cameras[2].timestamp

    # Test time stamps of observations t+1 (with the current implementation,
    # the observation should be exactly the same as for t).
    camera_obs_next = platform.get_camera_observation(t + 1)
    assert first_stamp_s == camera_obs_next.cameras[0].timestamp
    assert first_stamp_s == camera_obs_next.cameras[1].timestamp
    assert first_stamp_s == camera_obs_next.cameras[2].timestamp

    # Second time step
    t = platform.append_desired_action(action)
    second_stamp_ms = platform.get_timestamp_ms(t)
    assert second_stamp_ms == first_stamp_ms + time_step_ms

    # there should not be a new camera observation yet
    camera_obs = platform.get_camera_observation(t)
    assert first_stamp_s == camera_obs.cameras[0].timestamp
    assert first_stamp_s == camera_obs.cameras[1].timestamp
    assert first_stamp_s == camera_obs.cameras[2].timestamp

    # do several steps until a new camera/object update is expected
    # (-1 because there is already one action appended above for the
    # "second time step")
    for _ in range(int(camera_update_step_interval - 1)):
        t = platform.append_desired_action(action)

    nth_stamp_ms = platform.get_timestamp_ms(t)
    nth_stamp_s = nth_stamp_ms / 1000
    assert nth_stamp_ms > second_stamp_ms

    camera_obs = platform.get_camera_observation(t)
    assert nth_stamp_s == camera_obs.cameras[0].timestamp
    assert nth_stamp_s == camera_obs.cameras[1].timestamp
    assert nth_stamp_s == camera_obs.cameras[2].timestamp
コード例 #12
0
def test_camera_timestamps_with_camera_delay_simple():
    # Basic test with simple values: Camera update every 2 steps, delay of 1
    # step.
    platform = TriFingerPlatform(
        visualization=False, enable_cameras=True, camera_delay_steps=1
    )
    action = platform.Action()

    # set camera rate to 100 fps so we need less stepping in this test
    platform.camera_rate_fps = 500
    assert platform._compute_camera_update_step_interval() == 2

    initial_stamp_s = platform.get_timestamp_ms(0) / 1000

    # first step triggers camera (we get the initial observation at this point)
    t = platform.append_desired_action(action)
    assert t == 0
    camera_obs = platform.get_camera_observation(t)
    assert initial_stamp_s == camera_obs.cameras[0].timestamp, f"t={t}"
    assert initial_stamp_s == camera_obs.cameras[1].timestamp, f"t={t}"
    assert initial_stamp_s == camera_obs.cameras[2].timestamp, f"t={t}"

    trigger1_stamp_s = platform.get_timestamp_ms(t) / 1000

    # in second step observation is ready but still has stamp zero
    t = platform.append_desired_action(action)
    camera_obs = platform.get_camera_observation(t)
    assert trigger1_stamp_s == camera_obs.cameras[0].timestamp, f"t={t}"
    assert trigger1_stamp_s == camera_obs.cameras[1].timestamp, f"t={t}"
    assert trigger1_stamp_s == camera_obs.cameras[2].timestamp, f"t={t}"

    # in third step new observation is triggered but due to delay we still get
    # the old one
    t = platform.append_desired_action(action)
    camera_obs = platform.get_camera_observation(t)
    assert trigger1_stamp_s == camera_obs.cameras[0].timestamp, f"t={t}"
    assert trigger1_stamp_s == camera_obs.cameras[1].timestamp, f"t={t}"
    assert trigger1_stamp_s == camera_obs.cameras[2].timestamp, f"t={t}"

    trigger2_stamp_s = platform.get_timestamp_ms(t) / 1000
    assert trigger2_stamp_s > trigger1_stamp_s

    # in forth step the new observation is ready
    t = platform.append_desired_action(action)
    camera_obs = platform.get_camera_observation(t)
    assert trigger2_stamp_s == camera_obs.cameras[0].timestamp, f"t={t}"
    assert trigger2_stamp_s == camera_obs.cameras[1].timestamp, f"t={t}"
    assert trigger2_stamp_s == camera_obs.cameras[2].timestamp, f"t={t}"

    # again trigger but we get previous observation due to delay
    t = platform.append_desired_action(action)
    camera_obs = platform.get_camera_observation(t)
    assert trigger2_stamp_s == camera_obs.cameras[0].timestamp, f"t={t}"
    assert trigger2_stamp_s == camera_obs.cameras[1].timestamp, f"t={t}"
    assert trigger2_stamp_s == camera_obs.cameras[2].timestamp, f"t={t}"

    trigger3_stamp_s = platform.get_timestamp_ms(t) / 1000
    assert trigger3_stamp_s > trigger2_stamp_s

    # and there should be an update again
    t = platform.append_desired_action(action)
    camera_obs = platform.get_camera_observation(t)
    assert trigger3_stamp_s == camera_obs.cameras[0].timestamp, f"t={t}"
    assert trigger3_stamp_s == camera_obs.cameras[1].timestamp, f"t={t}"
    assert trigger3_stamp_s == camera_obs.cameras[2].timestamp, f"t={t}"
コード例 #13
0
def test_camera_timestamps_with_camera_delay_more_than_rate():
    # In this test the delay is higher than the camera rate, so this results in
    # the effective rate to be reduced.
    # Use small numbers (camera update interval 2 and delay 3) to keep the test
    # manageable.

    platform = TriFingerPlatform(
        visualization=False, enable_cameras=True, camera_delay_steps=3
    )
    action = platform.Action()

    # set high camera rate so we need less stepping in this test
    platform.camera_rate_fps = 500
    assert platform._compute_camera_update_step_interval() == 2

    initial_stamp_s = platform.get_timestamp_ms(0) / 1000

    # first step triggers camera (we get the initial observation at this point)
    t = platform.append_desired_action(action)
    assert t == 0
    camera_obs = platform.get_camera_observation(t)
    assert initial_stamp_s == camera_obs.cameras[0].timestamp, f"t={t}"
    assert initial_stamp_s == camera_obs.cameras[1].timestamp, f"t={t}"
    assert initial_stamp_s == camera_obs.cameras[2].timestamp, f"t={t}"

    trigger1_stamp_s = platform.get_timestamp_ms(t) / 1000

    # now it takes 3 steps until we actually see the new observation
    t = platform.append_desired_action(action)
    t = platform.append_desired_action(action)
    camera_obs = platform.get_camera_observation(t)
    assert initial_stamp_s == camera_obs.cameras[0].timestamp, f"t={t}"
    assert initial_stamp_s == camera_obs.cameras[1].timestamp, f"t={t}"
    assert initial_stamp_s == camera_obs.cameras[2].timestamp, f"t={t}"
    t = platform.append_desired_action(action)
    camera_obs = platform.get_camera_observation(t)
    assert trigger1_stamp_s == camera_obs.cameras[0].timestamp, f"t={t}"
    assert trigger1_stamp_s == camera_obs.cameras[1].timestamp, f"t={t}"
    assert trigger1_stamp_s == camera_obs.cameras[2].timestamp, f"t={t}"

    # Only now, one step later, the next update is triggered
    t = platform.append_desired_action(action)
    camera_obs = platform.get_camera_observation(t)
    assert trigger1_stamp_s == camera_obs.cameras[0].timestamp, f"t={t}"
    assert trigger1_stamp_s == camera_obs.cameras[1].timestamp, f"t={t}"
    assert trigger1_stamp_s == camera_obs.cameras[2].timestamp, f"t={t}"

    trigger2_stamp_s = platform.get_timestamp_ms(t) / 1000
    assert trigger2_stamp_s > trigger1_stamp_s

    # And again it takes 3 steps until we actually see the new observation
    t = platform.append_desired_action(action)
    t = platform.append_desired_action(action)
    camera_obs = platform.get_camera_observation(t)
    assert trigger1_stamp_s == camera_obs.cameras[0].timestamp, f"t={t}"
    assert trigger1_stamp_s == camera_obs.cameras[1].timestamp, f"t={t}"
    assert trigger1_stamp_s == camera_obs.cameras[2].timestamp, f"t={t}"
    t = platform.append_desired_action(action)
    camera_obs = platform.get_camera_observation(t)
    assert trigger2_stamp_s == camera_obs.cameras[0].timestamp, f"t={t}"
    assert trigger2_stamp_s == camera_obs.cameras[1].timestamp, f"t={t}"
    assert trigger2_stamp_s == camera_obs.cameras[2].timestamp, f"t={t}"
コード例 #14
0
def test_camera_timestamps_with_camera_delay_less_than_rate():
    # A bit more complex example (probably redundant with the simple one above
    # but since I already implemented it, let's keep it).

    platform = TriFingerPlatform(
        visualization=False, enable_cameras=True, camera_delay_steps=10
    )
    action = platform.Action()

    # ensure the camera frame rate is set to 10 Hz
    assert platform.camera_rate_fps == 10
    assert platform._compute_camera_update_step_interval() == 100

    first_stamp_s = platform.get_timestamp_ms(0) / 1000

    # The start is a bit tricky because of the initial observation which has
    # timestamp 0 but the next observation is triggered in the first step,
    # resulting in the same timestamp.  So first step 100 times, until the next
    # camera update is triggered.
    for i in range(100):
        t = platform.append_desired_action(action)

        # In each step, we still should see the camera observation from step 0
        cameras = platform.get_camera_observation(t).cameras
        assert first_stamp_s == cameras[0].timestamp, f"i={i}, t={t}"
        assert first_stamp_s == cameras[1].timestamp, f"i={i}, t={t}"
        assert first_stamp_s == cameras[2].timestamp, f"i={i}, t={t}"

    assert t == 99

    # one more step to trigger the next camera update
    t = platform.append_desired_action(action)
    second_stamp_s = platform.get_timestamp_ms(t) / 1000

    # The next observation should be triggered now but due to the delay, we
    # should still see the old observation for the next 9 steps
    for i in range(9):
        t = platform.append_desired_action(action)
        cameras = platform.get_camera_observation(t).cameras
        assert first_stamp_s == cameras[0].timestamp, f"i={i}, t={t}"
        assert first_stamp_s == cameras[1].timestamp, f"i={i}, t={t}"
        assert first_stamp_s == cameras[2].timestamp, f"i={i}, t={t}"

    # after the next step, we should see an updated camera observation which
    # again persists for 100 steps
    for i in range(100):
        t = platform.append_desired_action(action)
        cameras = platform.get_camera_observation(t).cameras
        assert second_stamp_s == cameras[0].timestamp, f"i={i}, t={t}"
        assert second_stamp_s == cameras[1].timestamp, f"i={i}, t={t}"
        assert second_stamp_s == cameras[2].timestamp, f"i={i}, t={t}"

    # and now the next update should be there
    t = platform.append_desired_action(action)
    camera_obs = platform.get_camera_observation(t)
    assert second_stamp_s < camera_obs.cameras[0].timestamp
コード例 #15
0
    def test_get_object_pose_timeindex(self):
        platform = TriFingerPlatform()

        # negative time index needs to be rejected
        with self.assertRaises(ValueError):
            platform.get_object_pose(-1)

        t = platform.append_desired_action(platform.Action())
        try:
            platform.get_object_pose(t)
            platform.get_object_pose(t + 1)
        except Exception:
            self.fail()

        with self.assertRaises(ValueError):
            platform.get_object_pose(t + 2)
コード例 #16
0
    def test_timestamps(self):
        platform = TriFingerPlatform(visualization=False, enable_cameras=True)
        action = platform.Action()

        # compute camera update step interval based on configured rates
        camera_update_step_interval = (
            1 / platform._camera_rate_fps) / platform._time_step
        # robot time step in milliseconds
        time_step_ms = platform._time_step * 1000

        # First time step
        t = platform.append_desired_action(action)
        first_stamp_ms = platform.get_timestamp_ms(t)
        first_stamp_s = first_stamp_ms / 1000

        object_pose = platform.get_object_pose(t)
        camera_obs = platform.get_camera_observation(t)
        self.assertEqual(first_stamp_ms, object_pose.timestamp)
        self.assertEqual(first_stamp_ms, camera_obs.cameras[0].timestamp)
        self.assertEqual(first_stamp_ms, camera_obs.cameras[1].timestamp)
        self.assertEqual(first_stamp_ms, camera_obs.cameras[2].timestamp)

        # Test time stamps of observations t+1
        object_pose_next = platform.get_object_pose(t + 1)
        camera_obs_next = platform.get_camera_observation(t + 1)
        next_stamp_ms = first_stamp_ms + time_step_ms
        self.assertEqual(next_stamp_ms, object_pose_next.timestamp)
        self.assertEqual(next_stamp_ms, camera_obs_next.cameras[0].timestamp)
        self.assertEqual(next_stamp_ms, camera_obs_next.cameras[1].timestamp)
        self.assertEqual(next_stamp_ms, camera_obs_next.cameras[2].timestamp)

        # XXX ===============================================================
        # The following part of the test is disabled as currently everything is
        # updated in each step (i.e. no lower update rate for camera and
        # object).
        return

        # Second time step
        t = platform.append_desired_action(action)
        second_stamp_ms = platform.get_timestamp_ms(t)
        self.assertEqual(second_stamp_ms, first_stamp_ms + time_step_ms)

        # there should not be a new camera observation yet
        object_pose = platform.get_object_pose(t)
        camera_obs = platform.get_camera_observation(t)
        self.assertEqual(first_stamp_s, object_pose.timestamp)
        self.assertEqual(first_stamp_s, camera_obs.cameras[0].timestamp)
        self.assertEqual(first_stamp_s, camera_obs.cameras[1].timestamp)
        self.assertEqual(first_stamp_s, camera_obs.cameras[2].timestamp)

        # do several steps until a new camera/object update is expected
        for _ in range(int(camera_update_step_interval)):
            t = platform.append_desired_action(action)

        nth_stamp_ms = platform.get_timestamp_ms(t)
        nth_stamp_s = nth_stamp_ms / 1000
        self.assertGreater(nth_stamp_ms, second_stamp_ms)

        object_pose = platform.get_object_pose(t)
        camera_obs = platform.get_camera_observation(t)
        self.assertEqual(nth_stamp_s, object_pose.timestamp)
        self.assertEqual(nth_stamp_s, camera_obs.cameras[0].timestamp)
        self.assertEqual(nth_stamp_s, camera_obs.cameras[1].timestamp)
        self.assertEqual(nth_stamp_s, camera_obs.cameras[2].timestamp)