예제 #1
0
class PandaLift(PandaEnv):
    """
    This class corresponds to the lifting task for the Panda robot arm.
    """
    def __init__(
        self,
        gripper_type="PandaGripper",
        table_full_size=(0.8, 0.8, 0.8),
        table_friction=(1., 5e-3, 1e-4),
        use_camera_obs=True,
        use_object_obs=True,
        reward_shaping=False,
        placement_initializer=None,
        gripper_visualization=False,
        use_indicator_object=False,
        has_renderer=False,
        has_offscreen_renderer=True,
        render_collision_mesh=False,
        render_visual_mesh=True,
        control_freq=10,
        horizon=1000,
        ignore_done=False,
        camera_name="frontview",
        camera_height=256,
        camera_width=256,
        camera_depth=False,
    ):
        """
        Args:

            gripper_type (str): type of gripper, used to instantiate
                gripper models from gripper factory.

            table_full_size (3-tuple): x, y, and z dimensions of the table.

            table_friction (3-tuple): the three mujoco friction parameters for
                the table.

            use_camera_obs (bool): if True, every observation includes a
                rendered image.

            use_object_obs (bool): if True, include object (cube) information in
                the observation.

            reward_shaping (bool): if True, use dense rewards.

            placement_initializer (ObjectPositionSampler instance): if provided, will
                be used to place objects on every reset, else a UniformRandomSampler
                is used by default.

            gripper_visualization (bool): True if using gripper visualization.
                Useful for teleoperation.

            use_indicator_object (bool): if True, sets up an indicator object that
                is useful for debugging.

            has_renderer (bool): If true, render the simulation state in
                a viewer instead of headless mode.

            has_offscreen_renderer (bool): True if using off-screen rendering.

            render_collision_mesh (bool): True if rendering collision meshes
                in camera. False otherwise.

            render_visual_mesh (bool): True if rendering visual meshes
                in camera. False otherwise.

            control_freq (float): how many control signals to receive
                in every second. This sets the amount of simulation time
                that passes between every action input.

            horizon (int): Every episode lasts for exactly @horizon timesteps.

            ignore_done (bool): True if never terminating the environment (ignore @horizon).

            camera_name (str): name of camera to be rendered. Must be
                set if @use_camera_obs is True.

            camera_height (int): height of camera frame.

            camera_width (int): width of camera frame.

            camera_depth (bool): True if rendering RGB-D, and RGB otherwise.
        """

        # settings for table top
        self.table_full_size = table_full_size
        self.table_friction = table_friction

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # reward configuration
        self.reward_shaping = reward_shaping

        # object placement initializer
        if placement_initializer:
            self.placement_initializer = placement_initializer
        else:
            self.placement_initializer = UniformRandomSampler(
                x_range=[-0.03, 0.03],
                y_range=[-0.03, 0.03],
                ensure_object_boundary_in_range=False,
                z_rotation=True,
            )

        super().__init__(
            gripper_type=gripper_type,
            gripper_visualization=gripper_visualization,
            use_indicator_object=use_indicator_object,
            has_renderer=has_renderer,
            has_offscreen_renderer=has_offscreen_renderer,
            render_collision_mesh=render_collision_mesh,
            render_visual_mesh=render_visual_mesh,
            control_freq=control_freq,
            horizon=horizon,
            ignore_done=ignore_done,
            use_camera_obs=use_camera_obs,
            camera_name=camera_name,
            camera_height=camera_height,
            camera_width=camera_width,
            camera_depth=camera_depth,
        )

    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()
        self.mujoco_robot.set_base_xpos([0, 0, 0])

        # load model for table top workspace
        self.mujoco_arena = TableArena(table_full_size=self.table_full_size,
                                       table_friction=self.table_friction)
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # The panda robot has a pedestal, we want to align it with the table
        self.mujoco_arena.set_origin(
            [0.16 + self.table_full_size[0] / 2, 0, 0])

        # initialize objects of interest
        cube = BoxObject(
            size_min=[0.020, 0.020, 0.020],  # [0.015, 0.015, 0.015],
            size_max=[0.022, 0.022, 0.022],  # [0.018, 0.018, 0.018])
            rgba=[1, 0, 0, 1],
        )
        self.mujoco_objects = OrderedDict([("cube", cube)])

        # task includes arena, robot, and objects of interest
        self.model = TableTopTask(
            self.mujoco_arena,
            self.mujoco_robot,
            self.mujoco_objects,
            initializer=self.placement_initializer,
        )
        self.model.place_objects()

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flatten array, which is how MuJoCo stores physical simulation data.
        """
        super()._get_reference()
        self.cube_body_id = self.sim.model.body_name2id("cube")
        self.l_finger_geom_ids = [
            self.sim.model.geom_name2id(x)
            for x in self.gripper.left_finger_geoms
        ]
        self.r_finger_geom_ids = [
            self.sim.model.geom_name2id(x)
            for x in self.gripper.right_finger_geoms
        ]
        self.cube_geom_id = self.sim.model.geom_name2id("cube")

    def _reset_internal(self):
        """
        Resets simulation internal configurations.
        """
        super()._reset_internal()

        # reset positions of objects
        self.model.place_objects()

        # reset joint positions
        init_pos = self.mujoco_robot.init_qpos
        init_pos += np.random.randn(init_pos.shape[0]) * 0.02
        self.sim.data.qpos[self._ref_joint_pos_indexes] = np.array(init_pos)

    def reward(self, action=None):
        """
        Reward function for the task.

        The dense reward has three components.

            Reaching: in [0, 1], to encourage the arm to reach the cube
            Grasping: in {0, 0.25}, non-zero if arm is grasping the cube
            Lifting: in {0, 1}, non-zero if arm has lifted the cube

        The sparse reward only consists of the lifting component.

        Args:
            action (np array): unused for this task

        Returns:
            reward (float): the reward
        """
        reward = 0.

        # sparse completion reward
        if self._check_success():
            reward = 1.0

        # use a shaping reward
        if self.reward_shaping:

            # reaching reward
            cube_pos = self.sim.data.body_xpos[self.cube_body_id]
            gripper_site_pos = self.sim.data.site_xpos[self.eef_site_id]
            dist = np.linalg.norm(gripper_site_pos - cube_pos)
            reaching_reward = 1 - np.tanh(10.0 * dist)
            reward += reaching_reward

            # grasping reward
            touch_left_finger = False
            touch_right_finger = False
            for i in range(self.sim.data.ncon):
                c = self.sim.data.contact[i]
                if c.geom1 in self.l_finger_geom_ids and c.geom2 == self.cube_geom_id:
                    touch_left_finger = True
                if c.geom1 == self.cube_geom_id and c.geom2 in self.l_finger_geom_ids:
                    touch_left_finger = True
                if c.geom1 in self.r_finger_geom_ids and c.geom2 == self.cube_geom_id:
                    touch_right_finger = True
                if c.geom1 == self.cube_geom_id and c.geom2 in self.r_finger_geom_ids:
                    touch_right_finger = True
            if touch_left_finger and touch_right_finger:
                reward += 0.25

        return reward

    def _get_observation(self):
        """
        Returns an OrderedDict containing observations [(name_string, np.array), ...].

        Important keys:
            robot-state: contains robot-centric information.
            object-state: requires @self.use_object_obs to be True.
                contains object-centric information.
            image: requires @self.use_camera_obs to be True.
                contains a rendered frame from the simulation.
            depth: requires @self.use_camera_obs and @self.camera_depth to be True.
                contains a rendered depth map from the simulation
        """
        di = super()._get_observation()
        # camera observations
        if self.use_camera_obs:
            camera_obs = self.sim.render(
                camera_name=self.camera_name,
                width=self.camera_width,
                height=self.camera_height,
                depth=self.camera_depth,
            )
            if self.camera_depth:
                di["image"], di["depth"] = camera_obs
            else:
                di["image"] = camera_obs

        # low-level object information
        if self.use_object_obs:
            # position and rotation of object
            cube_pos = np.array(self.sim.data.body_xpos[self.cube_body_id])
            cube_quat = convert_quat(np.array(
                self.sim.data.body_xquat[self.cube_body_id]),
                                     to="xyzw")
            di["cube_pos"] = cube_pos
            di["cube_quat"] = cube_quat

            gripper_site_pos = np.array(
                self.sim.data.site_xpos[self.eef_site_id])
            di["gripper_to_cube"] = gripper_site_pos - cube_pos

            di["object-state"] = np.concatenate(
                [cube_pos, cube_quat, di["gripper_to_cube"]])

        return di

    def _check_contact(self):
        """
        Returns True if gripper is in contact with an object.
        """
        collision = False
        for contact in self.sim.data.contact[:self.sim.data.ncon]:
            if (self.sim.model.geom_id2name(
                    contact.geom1) in self.gripper.contact_geoms()
                    or self.sim.model.geom_id2name(
                        contact.geom2) in self.gripper.contact_geoms()):
                collision = True
                break
        return collision

    def _check_success(self):
        """
        Returns True if task has been completed.
        """
        cube_height = self.sim.data.body_xpos[self.cube_body_id][2]
        table_height = self.table_full_size[2]

        # cube is higher than the table top above a margin
        return cube_height > table_height + 0.04

    def _gripper_visualization(self):
        """
        Do any needed visualization here. Overrides superclass implementations.
        """

        # color the gripper site appropriately based on distance to cube
        if self.gripper_visualization:
            # get distance to cube
            cube_site_id = self.sim.model.site_name2id("cube")
            dist = np.sum(
                np.square(self.sim.data.site_xpos[cube_site_id] -
                          self.sim.data.get_site_xpos("grip_site")))

            # set RGBA for the EEF site here
            max_dist = 0.1
            scaled = (1.0 - min(dist / max_dist, 1.))**15
            rgba = np.zeros(4)
            rgba[0] = 1 - scaled
            rgba[1] = scaled
            rgba[3] = 0.5

            self.sim.model.site_rgba[self.eef_site_id] = rgba
예제 #2
0
class PandaPush(change_dof(PandaEnv, 7, 8)):  # don't need to control a gripper
    """
    This class corresponds to the pushing task for the Panda robot arm.
    """

    parameters_spec = {
        **PandaEnv.parameters_spec,
        'table_size_0': [0.7, 0.9],
        'table_size_1': [0.7, 0.9],
        'table_size_2': [0.7, 0.9],
        #'table_friction_0': [0.4, 1.6],
        'table_friction_1': [0.0025, 0.0075],
        'table_friction_2': [0.00005, 0.00015],
        'boxobject_size_0': [0.018, 0.022],
        'boxobject_size_1': [0.018, 0.022],
        'boxobject_size_2': [0.018, 0.022],
        'boxobject_friction_0': [0.05, 0.15],
        #'boxobject_friction_1': [0.0025, 0.0075],    # fixed this to zero
        'boxobject_friction_2': [0.00005, 0.00015],
        'boxobject_density_1000': [0.06, 0.14],
    }

    def reset_props(self,
                    table_size_0=0.8,
                    table_size_1=0.8,
                    table_size_2=0.8,
                    table_friction_0=0.,
                    table_friction_1=0.005,
                    table_friction_2=0.0001,
                    boxobject_size_0=0.020,
                    boxobject_size_1=0.020,
                    boxobject_size_2=0.020,
                    boxobject_friction_0=0.1,
                    boxobject_friction_1=0.0,
                    boxobject_friction_2=0.0001,
                    boxobject_density_1000=0.1,
                    **kwargs):

        self.table_full_size = (table_size_0, table_size_1, table_size_2)
        self.table_friction = (table_friction_0, table_friction_1,
                               table_friction_2)
        self.boxobject_size = (boxobject_size_0, boxobject_size_1,
                               boxobject_size_2)
        self.boxobject_friction = (boxobject_friction_0, boxobject_friction_1,
                                   boxobject_friction_2)
        self.boxobject_density = boxobject_density_1000 * 1000.
        super().reset_props(**kwargs)

    def __init__(self,
                 use_object_obs=True,
                 reward_shaping=True,
                 placement_initializer=None,
                 object_obs_process=True,
                 **kwargs):
        """
        Args:

            use_object_obs (bool): if True, include object (cube) information in
                the observation.

            reward_shaping (bool): if True, use dense rewards.

            placement_initializer (ObjectPositionSampler instance): if provided, will
                be used to place objects on every reset, else a UniformRandomSampler
                is used by default.

            object_obs_process (bool): if True, process the object observation to get a task_state.
                Setting this to False is useful when some transformation (eg. noise) need to be done to object observation raw data prior to the processing.
        """

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # reward configuration
        self.reward_shaping = reward_shaping

        # object placement initializer
        if placement_initializer:
            self.placement_initializer = placement_initializer
        else:
            # self.placement_initializer = UniformRandomSampler(
            #     x_range=[-0.1, 0.1],
            #     y_range=[-0.1, 0.1],
            #     ensure_object_boundary_in_range=False,
            #     z_rotation=None,
            # )
            self.placement_initializer = UniformRandomSamplerObjectSpecific(
                x_ranges=[[-0.03, -0.02], [0.09, 0.1]],
                y_ranges=[[-0.05, -0.04], [-0.05, -0.04]],
                ensure_object_boundary_in_range=False,
                z_rotation=None,
            )

        # for first initialization
        self.table_full_size = (0.8, 0.8, 0.8)
        self.table_friction = (0., 0.005, 0.0001)
        self.boxobject_size = (0.02, 0.02, 0.02)
        self.boxobject_friction = (0.1, 0.005, 0.0001)
        self.boxobject_density = 100.

        self.object_obs_process = object_obs_process

        super().__init__(gripper_visualization=True, **kwargs)

    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()
        self.mujoco_robot.set_base_xpos([0, 0, 0])

        # load model for table top workspace
        self.mujoco_arena = TableArena(table_full_size=self.table_full_size,
                                       table_friction=self.table_friction)
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # The panda robot has a pedestal, we want to align it with the table
        self.mujoco_arena.set_origin(
            [0.16 + self.table_full_size[0] / 2, 0, 0])

        # initialize objects of interest
        # in original robosuite, a simple domain randomization is included in BoxObject implementation, and called here. We choose to discard that implementation.
        cube = FullyFrictionalBoxObject(
            size=self.boxobject_size,
            friction=self.boxobject_friction,
            density=self.boxobject_density,
            rgba=[1, 0, 0, 1],
        )
        self.mujoco_cube = cube

        goal = CylinderObject(
            size=[0.03, 0.001],
            rgba=[0, 1, 0, 1],
        )
        self.mujoco_goal = goal

        self.mujoco_objects = OrderedDict([("cube", cube), ("goal", goal)])

        # task includes arena, robot, and objects of interest
        self.model = TableTopTask(
            self.mujoco_arena,
            self.mujoco_robot,
            self.mujoco_objects,
            initializer=self.placement_initializer,
            visual_objects=['goal'],
        )
        self.model.place_objects()

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flatten array, which is how MuJoCo stores physical simulation data.
        """
        super()._get_reference()
        self.cube_body_id = self.sim.model.body_name2id("cube")
        self.l_finger_geom_ids = [
            self.sim.model.geom_name2id(x)
            for x in self.gripper.left_finger_geoms
        ]
        self.r_finger_geom_ids = [
            self.sim.model.geom_name2id(x)
            for x in self.gripper.right_finger_geoms
        ]
        self.cube_geom_id = self.sim.model.geom_name2id("cube")

        # gripper ids
        self.goal_body_id = self.sim.model.body_name2id('goal')
        self.goal_site_id = self.sim.model.site_name2id('goal')

    def _reset_internal(self):
        """
        Resets simulation internal configurations.
        """
        super()._reset_internal()
        self.sim.forward()

        # reset positions of objects
        self.model.place_objects()

        # reset joint positions
        init_pos = self.mujoco_robot.init_qpos
        init_pos += np.random.randn(init_pos.shape[0]) * 0.02
        self.sim.data.qpos[self._ref_joint_pos_indexes] = np.array(init_pos)
        # shut the gripper
        self.sim.data.qpos[
            self._ref_joint_gripper_actuator_indexes] = np.array([0., -0.])

        # set other reference attributes
        eef_rot_in_world = self.sim.data.get_body_xmat("right_hand").reshape(
            (3, 3))
        self.world_rot_in_eef = copy.deepcopy(
            eef_rot_in_world.T
        )  # TODO inspect on this: should we set a golden reference other than a initial position?

    # reward function from sawyer_push
    def reward(self, action=None):
        """
        Reward function for the task.

        The dense reward has three components.

            Reaching: in [-inf, 0], to encourage the arm to reach the object
            Goal Distance: in [-inf, 0] the distance between the pushed object and the goal
            Safety reward in [-inf, 0], -1 for every joint that is at its limit.

        The sparse reward only receives a {0,1} upon reaching the goal

        Args:
            action (np array): The action taken in that timestep

        Returns:
            reward (float): the reward
            previously in robosuite-extra, when dense reward is used, the return value will be a dictionary. but we removed that feature.
        """
        reward = 0.

        # sparse completion reward
        if not self.reward_shaping and self._check_success():
            reward = 1.0

        # use a dense reward
        if self.reward_shaping:
            object_pos = self.sim.data.body_xpos[self.cube_body_id]

            # max joint angles reward
            joint_limits = self._joint_ranges
            current_joint_pos = self._joint_positions

            hitting_limits_reward = -int(
                any([(x < joint_limits[i, 0] + 0.05
                      or x > joint_limits[i, 1] - 0.05)
                     for i, x in enumerate(current_joint_pos)]))

            reward += hitting_limits_reward

            # reaching reward
            gripper_site_pos = self.sim.data.site_xpos[self.eef_site_id]
            dist = np.linalg.norm(gripper_site_pos - object_pos)
            reaching_reward = -0.15 * dist
            reward += reaching_reward

            # print(gripper_site_pos, object_pos, reaching_reward)

            # Success Reward
            success = self._check_success()
            if (success):
                reward += 0.1

            # goal distance reward
            goal_pos = self.sim.data.site_xpos[self.goal_site_id]

            dist = np.linalg.norm(goal_pos - object_pos)
            goal_distance_reward = -dist
            reward += goal_distance_reward

            # punish when there is a line of object--gripper--goal
            angle_g_o_g = angle_between(gripper_site_pos - object_pos,
                                        goal_pos - object_pos)
            if not success and angle_g_o_g < np.pi / 2.:
                reward += -0.03 - 0.02 * (np.pi / 2. - angle_g_o_g)

            # print('grippersitepos', gripper_site_pos,
            #       'objpos', object_pos,
            #       'jointangles', hitting_limits_reward,
            #       'reaching', reaching_reward,
            #       'success', success,
            #       'goaldist', goal_distance_reward)

            unstable = reward < -2.5

            # # Return all three types of rewards
            # reward = {"reward": reward, "reaching_distance": -10 * reaching_reward,
            #           "goal_distance": - goal_distance_reward,
            #           "hitting_limits_reward": hitting_limits_reward,
            #           "unstable":unstable}

        return reward

    def _check_success(self):
        """
        Returns True if task has been completed.
        """
        object_pos = self.sim.data.body_xpos[self.cube_body_id]
        goal_pos = self.sim.data.site_xpos[self.goal_site_id]

        dist = np.linalg.norm(goal_pos - object_pos)
        goal_horizontal_radius = self.model.mujoco_objects[
            'goal'].get_horizontal_radius()

        # object centre is within the goal radius
        return dist < goal_horizontal_radius

    def step(self, action):
        """ explicitly shut the gripper """
        joined_action = np.append(action, [1.])
        return super().step(joined_action)

    def world2eef(self, world):
        return self.world_rot_in_eef.dot(world)

    def put_raw_object_obs(self, di):
        # Extract position and velocity of the eef
        eef_pos_in_world = self.sim.data.get_body_xpos("right_hand")
        eef_xvelp_in_world = self.sim.data.get_body_xvelp("right_hand")

        # print('eef_pos_in_world', eef_pos_in_world)

        # Get the position, velocity, rotation  and rotational velocity of the object in the world frame
        object_pos_in_world = self.sim.data.body_xpos[self.cube_body_id]
        object_xvelp_in_world = self.sim.data.get_body_xvelp('cube')
        object_rot_in_world = self.sim.data.get_body_xmat('cube')

        # Get the z-angle with respect to the reference position and do sin-cosine encoding
        # world_rotation_in_reference = np.array([[0., 1., 0., ], [-1., 0., 0., ], [0., 0., 1., ]])
        # object_rotation_in_ref = world_rotation_in_reference.dot(object_rot_in_world)
        # object_euler_in_ref = T.mat2euler(object_rotation_in_ref)
        # z_angle = object_euler_in_ref[2]

        object_quat = convert_quat(self.sim.data.body_xquat[self.cube_body_id],
                                   to='xyzw')

        # Get the goal position in the world
        goal_site_pos_in_world = np.array(
            self.sim.data.site_xpos[self.goal_site_id])

        # Record observations into a dictionary
        di['goal_pos_in_world'] = goal_site_pos_in_world
        di['eef_pos_in_world'] = eef_pos_in_world
        di['eef_vel_in_world'] = eef_xvelp_in_world
        di['object_pos_in_world'] = object_pos_in_world
        di['object_vel_in_world'] = object_xvelp_in_world
        # di["z_angle"] = np.array([z_angle])
        di['object_quat'] = object_quat

    def process_object_obs(self, di):
        # z_angle = di['z_angle']
        # sine_cosine = np.array([np.sin(8*z_angle), np.cos(8*z_angle)]).reshape((2,))

        eef_to_object_in_world = di['object_pos_in_world'] - di[
            'eef_pos_in_world']
        # eef_to_object_in_eef = self.world2eef(eef_to_object_in_world)

        object_to_goal_in_world = di['goal_pos_in_world'] - di[
            'object_pos_in_world']
        # object_to_goal_in_eef = self.world2eef(object_to_goal_in_world)

        # object_xvelp_in_eef = self.world2eef(di['object_vel_in_world'])
        # eef_xvelp_in_eef = self.world2eef(di['eef_vel_in_world'])

        task_state = np.concatenate([
            eef_to_object_in_world, object_to_goal_in_world,
            di['eef_vel_in_world'], di['object_vel_in_world'],
            di['object_quat']
        ])

        di['task_state'] = task_state

    def _get_observation(self):
        """
        Returns an OrderedDict containing observations [(name_string, np.array), ...].

        Important keys:
            gripper_to_object : The x-y component of the gripper to object distance
            object_to_goal : The x-y component of the object-to-goal distance
            object_z_rot : the roation of the object around an axis sticking out the table

            object_xvelp: x-y linear velocity of the object
            gripper_xvelp: x-y linear velocity of the gripper


            task-state : a concatenation of all the above.
        """
        # di = super()._get_observation()  # joint angles & vel, which we don't need.
        di = OrderedDict()

        # camera observations
        if self.use_camera_obs:
            camera_obs = self.sim.render(
                camera_name=self.camera_name,
                width=self.camera_width,
                height=self.camera_height,
                depth=self.camera_depth,
            )
            if self.camera_depth:
                di["image"], di["depth"] = camera_obs
            else:
                di["image"] = camera_obs

        # low-level object information
        if self.use_object_obs:
            self.put_raw_object_obs(di)
            if self.object_obs_process:
                self.process_object_obs(di)

        return di

    def _check_contact(self):
        """
        Returns True if gripper is in contact with an object.
        """
        collision = False
        for contact in self.sim.data.contact[:self.sim.data.ncon]:
            if (self.sim.model.geom_id2name(
                    contact.geom1) in self.gripper.contact_geoms()
                    or self.sim.model.geom_id2name(
                        contact.geom2) in self.gripper.contact_geoms()):
                collision = True
                break
        return collision

    def _check_contact_with(self, object):
        """
        Returns True if gripper is in contact with an object.
        """
        collision = False
        for contact in self.sim.data.contact[:self.sim.data.ncon]:
            if ((self.sim.model.geom_id2name(
                    contact.geom1) in self.gripper.contact_geoms()
                 and contact.geom2 == self.sim.model.geom_name2id(object)) or
                (self.sim.model.geom_id2name(
                    contact.geom2) in self.gripper.contact_geoms()
                 and contact.geom1 == self.sim.model.geom_name2id(object))):
                collision = True
                break
        return collision

    def _gripper_visualization(self):
        """
        Do any needed visualization here. Overrides superclass implementations.
        """

        # color the gripper site appropriately based on distance to cube
        if self.gripper_visualization:
            # get distance to cube
            cube_site_id = self.sim.model.site_name2id("cube")
            dist = np.sum(
                np.square(self.sim.data.site_xpos[cube_site_id] -
                          self.sim.data.get_site_xpos("grip_site")))

            # set RGBA for the EEF site here
            max_dist = 0.1
            scaled = (1.0 - min(dist / max_dist, 1.))**15
            rgba = np.zeros(4)
            rgba[0] = 1 - scaled
            rgba[1] = scaled
            rgba[3] = 0.5

            self.sim.model.site_rgba[self.eef_site_id] = rgba
예제 #3
0
class PandaStack(PandaEnv):
    """
    This class corresponds to the stacking task for the Panda robot arm.
    """
    def __init__(
        self,
        gripper_type="PandaGripper",
        table_full_size=(0.8, 0.8, 0.8),
        table_friction=(1., 5e-3, 1e-4),
        use_camera_obs=True,
        use_object_obs=True,
        reward_shaping=False,
        placement_initializer=None,
        gripper_visualization=False,
        use_indicator_object=False,
        has_renderer=False,
        has_offscreen_renderer=True,
        render_collision_mesh=False,
        render_visual_mesh=True,
        control_freq=10,
        horizon=1000,
        ignore_done=False,
        camera_name="frontview",
        camera_height=256,
        camera_width=256,
        camera_depth=False,
    ):
        """
        Args:

            gripper_type (str): type of gripper, used to instantiate
                gripper models from gripper factory.

            table_full_size (3-tuple): x, y, and z dimensions of the table.

            table_friction (3-tuple): the three mujoco friction parameters for
                the table.

            use_camera_obs (bool): if True, every observation includes a
                rendered image.

            use_object_obs (bool): if True, include object (cube) information in
                the observation.

            reward_shaping (bool): if True, use dense rewards.

            placement_initializer (ObjectPositionSampler instance): if provided, will
                be used to place objects on every reset, else a UniformRandomSampler
                is used by default.

            gripper_visualization (bool): True if using gripper visualization.
                Useful for teleoperation.

            use_indicator_object (bool): if True, sets up an indicator object that
                is useful for debugging.

            has_renderer (bool): If true, render the simulation state in
                a viewer instead of headless mode.

            has_offscreen_renderer (bool): True if using off-screen rendering.

            render_collision_mesh (bool): True if rendering collision meshes
                in camera. False otherwise.

            render_visual_mesh (bool): True if rendering visual meshes
                in camera. False otherwise.

            control_freq (float): how many control signals to receive
                in every second. This sets the amount of simulation time
                that passes between every action input.

            horizon (int): Every episode lasts for exactly @horizon timesteps.

            ignore_done (bool): True if never terminating the environment (ignore @horizon).

            camera_name (str): name of camera to be rendered. Must be
                set if @use_camera_obs is True.

            camera_height (int): height of camera frame.

            camera_width (int): width of camera frame.

            camera_depth (bool): True if rendering RGB-D, and RGB otherwise.
        """

        # settings for table top
        self.table_full_size = table_full_size
        self.table_friction = table_friction

        # whether to show visual aid about where is the gripper
        self.gripper_visualization = gripper_visualization

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # object placement initializer
        if placement_initializer:
            self.placement_initializer = placement_initializer
        else:
            self.placement_initializer = UniformRandomSampler(
                x_range=[-0.08, 0.08],
                y_range=[-0.08, 0.08],
                ensure_object_boundary_in_range=False,
                z_rotation=True,
            )

        super().__init__(
            gripper_type=gripper_type,
            gripper_visualization=gripper_visualization,
            use_indicator_object=use_indicator_object,
            has_renderer=has_renderer,
            has_offscreen_renderer=has_offscreen_renderer,
            render_collision_mesh=render_collision_mesh,
            render_visual_mesh=render_visual_mesh,
            control_freq=control_freq,
            horizon=horizon,
            ignore_done=ignore_done,
            use_camera_obs=use_camera_obs,
            camera_name=camera_name,
            camera_height=camera_height,
            camera_width=camera_width,
            camera_depth=camera_depth,
        )

        # reward configuration
        self.reward_shaping = reward_shaping

        # information of objects
        # self.object_names = [o['object_name'] for o in self.object_metadata]
        self.object_names = list(self.mujoco_objects.keys())
        self.object_site_ids = [
            self.sim.model.site_name2id(ob_name)
            for ob_name in self.object_names
        ]

        # id of grippers for contact checking
        self.finger_names = self.gripper.contact_geoms()

        # self.sim.data.contact # list, geom1, geom2
        self.collision_check_geom_names = self.sim.model._geom_name2id.keys()
        self.collision_check_geom_ids = [
            self.sim.model._geom_name2id[k]
            for k in self.collision_check_geom_names
        ]

    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()
        self.mujoco_robot.set_base_xpos([0, 0, 0])

        # load model for table top workspace
        self.mujoco_arena = TableArena(table_full_size=self.table_full_size,
                                       table_friction=self.table_friction)
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # The panda robot has a pedestal, we want to align it with the table
        self.mujoco_arena.set_origin(
            [0.16 + self.table_full_size[0] / 2, 0, 0])

        # initialize objects of interest
        cubeA = BoxObject(size_min=[0.02, 0.02, 0.02],
                          size_max=[0.02, 0.02, 0.02],
                          rgba=[1, 0, 0, 1])
        cubeB = BoxObject(
            size_min=[0.025, 0.025, 0.025],
            size_max=[0.025, 0.025, 0.025],
            rgba=[0, 1, 0, 1],
        )
        self.mujoco_objects = OrderedDict([("cubeA", cubeA), ("cubeB", cubeB)])
        self.n_objects = len(self.mujoco_objects)

        # task includes arena, robot, and objects of interest
        self.model = TableTopTask(
            self.mujoco_arena,
            self.mujoco_robot,
            self.mujoco_objects,
            initializer=self.placement_initializer,
        )
        self.model.place_objects()

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flatten array, which is how MuJoCo stores physical simulation data.
        """
        super()._get_reference()
        self.cubeA_body_id = self.sim.model.body_name2id("cubeA")
        self.cubeB_body_id = self.sim.model.body_name2id("cubeB")
        self.l_finger_geom_ids = [
            self.sim.model.geom_name2id(x)
            for x in self.gripper.left_finger_geoms
        ]
        self.r_finger_geom_ids = [
            self.sim.model.geom_name2id(x)
            for x in self.gripper.right_finger_geoms
        ]
        self.cubeA_geom_id = self.sim.model.geom_name2id("cubeA")
        self.cubeB_geom_id = self.sim.model.geom_name2id("cubeB")

    def _reset_internal(self):
        """
        Resets simulation internal configurations.
        """
        super()._reset_internal()

        # reset positions of objects
        self.model.place_objects()

        # reset joint positions
        init_pos = self.mujoco_robot.init_qpos
        init_pos += np.random.randn(init_pos.shape[0]) * 0.02
        self.sim.data.qpos[self._ref_joint_pos_indexes] = np.array(init_pos)

    def reward(self, action):
        """
        Reward function for the task.

        The dense reward has five components.

            Reaching: in [0, 1], to encourage the arm to reach the cube
            Grasping: in {0, 0.25}, non-zero if arm is grasping the cube
            Lifting: in {0, 1}, non-zero if arm has lifted the cube
            Aligning: in [0, 0.5], encourages aligning one cube over the other
            Stacking: in {0, 2}, non-zero if cube is stacked on other cube

        The sparse reward only consists of the stacking component.
        However, the sparse reward is either 0 or 1.

        Args:
            action (np array): unused for this task

        Returns:
            reward (float): the reward
        """
        r_reach, r_lift, r_stack = self.staged_rewards()
        if self.reward_shaping:
            reward = max(r_reach, r_lift, r_stack)
        else:
            reward = 1.0 if r_stack > 0 else 0.0

        return reward

    def staged_rewards(self):
        """
        Helper function to return staged rewards based on current physical states.

        Returns:
            r_reach (float): reward for reaching and grasping
            r_lift (float): reward for lifting and aligning
            r_stack (float): reward for stacking
        """
        # reaching is successful when the gripper site is close to
        # the center of the cube
        cubeA_pos = self.sim.data.body_xpos[self.cubeA_body_id]
        cubeB_pos = self.sim.data.body_xpos[self.cubeB_body_id]
        gripper_site_pos = self.sim.data.site_xpos[self.eef_site_id]
        dist = np.linalg.norm(gripper_site_pos - cubeA_pos)
        r_reach = (1 - np.tanh(10.0 * dist)) * 0.25

        # collision checking
        touch_left_finger = False
        touch_right_finger = False
        touch_cubeA_cubeB = False

        for i in range(self.sim.data.ncon):
            c = self.sim.data.contact[i]
            if c.geom1 in self.l_finger_geom_ids and c.geom2 == self.cubeA_geom_id:
                touch_left_finger = True
            if c.geom1 == self.cubeA_geom_id and c.geom2 in self.l_finger_geom_ids:
                touch_left_finger = True
            if c.geom1 in self.r_finger_geom_ids and c.geom2 == self.cubeA_geom_id:
                touch_right_finger = True
            if c.geom1 == self.cubeA_geom_id and c.geom2 in self.r_finger_geom_ids:
                touch_right_finger = True
            if c.geom1 == self.cubeA_geom_id and c.geom2 == self.cubeB_geom_id:
                touch_cubeA_cubeB = True
            if c.geom1 == self.cubeB_geom_id and c.geom2 == self.cubeA_geom_id:
                touch_cubeA_cubeB = True

        # additional grasping reward
        if touch_left_finger and touch_right_finger:
            r_reach += 0.25

        # lifting is successful when the cube is above the table top
        # by a margin
        cubeA_height = cubeA_pos[2]
        table_height = self.table_full_size[2]
        cubeA_lifted = cubeA_height > table_height + 0.04
        r_lift = 1.0 if cubeA_lifted else 0.0

        # Aligning is successful when cubeA is right above cubeB
        if cubeA_lifted:
            horiz_dist = np.linalg.norm(
                np.array(cubeA_pos[:2]) - np.array(cubeB_pos[:2]))
            r_lift += 0.5 * (1 - np.tanh(horiz_dist))

        # stacking is successful when the block is lifted and
        # the gripper is not holding the object
        r_stack = 0
        not_touching = not touch_left_finger and not touch_right_finger
        if not_touching and r_lift > 0 and touch_cubeA_cubeB:
            r_stack = 2.0

        return (r_reach, r_lift, r_stack)

    def _get_observation(self):
        """
        Returns an OrderedDict containing observations [(name_string, np.array), ...].

        Important keys:
            robot-state: contains robot-centric information.
            object-state: requires @self.use_object_obs to be True.
                contains object-centric information.
            image: requires @self.use_camera_obs to be True.
                contains a rendered frame from the simulation.
            depth: requires @self.use_camera_obs and @self.camera_depth to be True.
                contains a rendered depth map from the simulation
        """
        di = super()._get_observation()
        if self.use_camera_obs:
            camera_obs = self.sim.render(
                camera_name=self.camera_name,
                width=self.camera_width,
                height=self.camera_height,
                depth=self.camera_depth,
            )
            if self.camera_depth:
                di["image"], di["depth"] = camera_obs
            else:
                di["image"] = camera_obs

        # low-level object information
        if self.use_object_obs:
            # position and rotation of the first cube
            cubeA_pos = np.array(self.sim.data.body_xpos[self.cubeA_body_id])
            cubeA_quat = convert_quat(np.array(
                self.sim.data.body_xquat[self.cubeA_body_id]),
                                      to="xyzw")
            di["cubeA_pos"] = cubeA_pos
            di["cubeA_quat"] = cubeA_quat

            # position and rotation of the second cube
            cubeB_pos = np.array(self.sim.data.body_xpos[self.cubeB_body_id])
            cubeB_quat = convert_quat(np.array(
                self.sim.data.body_xquat[self.cubeB_body_id]),
                                      to="xyzw")
            di["cubeB_pos"] = cubeB_pos
            di["cubeB_quat"] = cubeB_quat

            # relative positions between gripper and cubes
            gripper_site_pos = np.array(
                self.sim.data.site_xpos[self.eef_site_id])
            di["gripper_to_cubeA"] = gripper_site_pos - cubeA_pos
            di["gripper_to_cubeB"] = gripper_site_pos - cubeB_pos
            di["cubeA_to_cubeB"] = cubeA_pos - cubeB_pos

            di["object-state"] = np.concatenate([
                cubeA_pos,
                cubeA_quat,
                cubeB_pos,
                cubeB_quat,
                di["gripper_to_cubeA"],
                di["gripper_to_cubeB"],
                di["cubeA_to_cubeB"],
            ])

        return di

    def _check_contact(self):
        """
        Returns True if gripper is in contact with an object.
        """
        collision = False
        for contact in self.sim.data.contact[:self.sim.data.ncon]:
            if (self.sim.model.geom_id2name(contact.geom1) in self.finger_names
                    or self.sim.model.geom_id2name(
                        contact.geom2) in self.finger_names):
                collision = True
                break
        return collision

    def _check_success(self):
        """
        Returns True if task has been completed.
        """
        _, _, r_stack = self.staged_rewards()
        return r_stack > 0

    def _gripper_visualization(self):
        """
        Do any needed visualization here. Overrides superclass implementations.
        """

        # color the gripper site appropriately based on distance to nearest object
        if self.gripper_visualization:
            # find closest object
            square_dist = lambda x: np.sum(
                np.square(x - self.sim.data.get_site_xpos("grip_site")))
            dists = np.array(list(map(square_dist, self.sim.data.site_xpos)))
            dists[
                self.
                eef_site_id] = np.inf  # make sure we don't pick the same site
            dists[self.eef_cylinder_id] = np.inf
            ob_dists = dists[
                self.object_site_ids]  # filter out object sites we care about
            min_dist = np.min(ob_dists)

            # set RGBA for the EEF site here
            max_dist = 0.1
            scaled = (1.0 - min(min_dist / max_dist, 1.))**15
            rgba = np.zeros(4)
            rgba[0] = 1 - scaled
            rgba[1] = scaled
            rgba[3] = 0.5

            self.sim.model.site_rgba[self.eef_site_id] = rgba
class SawyerReach(SawyerEnv):
    """
    This class corresponds to the a primitive policy for the reach task on the Sawyer robot arm.

    Uniform random sample on x, y, or z axis in range of 20 to 60 cm for x and y, 0 to 40 cm for z (center of table)
    """
    def __init__(
        self,
        gripper_type="TwoFingerGripper",
        lower_goal_range=[-0.1, -0.1, -0.1],
        upper_goal_range=[0.1, 0.1, 0.2],
        seed=False,
        random_arm_init=None,  # please pass in [low, high]
        table_full_size=(0.8, 0.8, 0.6),
        table_friction=(1., 5e-3, 1e-4),
        use_camera_obs=False,
        use_object_obs=True,
        reward_shaping=True,
        placement_initializer=None,
        gripper_visualization=False,
        use_indicator_object=False,
        has_renderer=False,
        has_offscreen_renderer=True,
        render_collision_mesh=False,
        render_visual_mesh=True,
        control_freq=10,
        horizon=1000,
        ignore_done=False,
        camera_name="frontview",
        camera_height=256,
        camera_width=256,
        camera_depth=False,
    ):
        """
        Args:
            prim_axis (str): which axis is being explored

            gripper_type (str): type of gripper, used to instantiate
                gripper models from gripper factory.

            table_full_size (3-tuple): x, y, and z dimensions of the table.

            table_friction (3-tuple): the three mujoco friction parameters for
                the table.

            use_camera_obs (bool): if True, every observation includes a
                rendered image.

            use_object_obs (bool): if True, include object (cube) information in
                the observation.

            reward_shaping (bool): if True, use dense rewards.

            placement_initializer (ObjectPositionSampler instance): if provided, will
                be used to place objects on every reset, else a UniformRandomSampler
                is used by default.

            gripper_visualization (bool): True if using gripper visualization.
                Useful for teleoperation.

            use_indicator_object (bool): if True, sets up an indicator object that
                is useful for debugging.

            has_renderer (bool): If true, render the simulation state in
                a viewer instead of headless mode.

            has_offscreen_renderer (bool): True if using off-screen rendering.

            render_collision_mesh (bool): True if rendering collision meshes
                in camera. False otherwise.

            render_visual_mesh (bool): True if rendering visual meshes
                in camera. False otherwise.

            control_freq (float): how many control signals to receive
                in every second. This sets the amount of simulation time
                that passes between every action input.

            horizon (int): Every episode lasts for exactly @horizon timesteps.

            ignore_done (bool): True if never terminating the environment (ignore @horizon).

            camera_name (str): name of camera to be rendered. Must be
                set if @use_camera_obs is True.

            camera_height (int): height of camera frame.

            camera_width (int): width of camera frame.

            camera_depth (bool): True if rendering RGB-D, and RGB otherwise.
        """
        self.upper_goal_range = upper_goal_range
        self.lower_goal_range = lower_goal_range
        self.random_arm_init = random_arm_init
        self.seed = seed

        self.distance_threshold = 0.015
        # settings for table top
        self.table_full_size = table_full_size
        self.table_friction = table_friction

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # reward configuration
        self.reward_shaping = reward_shaping

        # object placement initializer
        self.placement_initializer = placement_initializer

        # order: di["eef_pos"] di["gripper_qpos"] di["gripper_to_marker"] self.goal
        # dim: 3, 2, 3, 3
        # low = -np.ones(11) * np.inf
        # high = np.ones(11) * np.inf
        # self.observation_space = spaces.Box(low=low, high=high)

        self.controller = SawyerIKController(
            bullet_data_path=os.path.join(robosuite.models.assets_root,
                                          "bullet_data"),
            robot_jpos_getter=self._robot_jpos_getter,
        )
        super().__init__(
            gripper_type=gripper_type,
            gripper_visualization=gripper_visualization,
            use_indicator_object=use_indicator_object,
            has_renderer=has_renderer,
            has_offscreen_renderer=has_offscreen_renderer,
            render_collision_mesh=render_collision_mesh,
            render_visual_mesh=render_visual_mesh,
            control_freq=control_freq,
            horizon=horizon,
            ignore_done=ignore_done,
            use_camera_obs=use_camera_obs,
            camera_name=camera_name,
            camera_height=camera_height,
            camera_width=camera_width,
            camera_depth=camera_depth,
        )

    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()
        self.mujoco_robot.set_base_xpos([0, 0, 0])

        # load model for table top workspace
        self.mujoco_arena = TableArena(table_full_size=self.table_full_size,
                                       table_friction=self.table_friction)
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # The sawyer robot has a pedestal, we want to align it with the table
        self.mujoco_arena.set_origin(
            [0.16 + self.table_full_size[0] / 2, 0, 0])

        self.mujoco_objects = OrderedDict([])

        # task includes arena, robot, and objects of interest
        self.model = TableTopTask(
            self.mujoco_arena,
            self.mujoco_robot,
            self.mujoco_objects,
            initializer=self.placement_initializer,
        )
        self.model.place_objects()

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flatten array, which is how MuJoCo stores physical simulation data.
        """
        super()._get_reference()
        # self.cube_body_id = self.sim.model.body_name2id("cube")
        self.l_finger_geom_ids = [
            self.sim.model.geom_name2id(x)
            for x in self.gripper.left_finger_geoms
        ]
        self.r_finger_geom_ids = [
            self.sim.model.geom_name2id(x)
            for x in self.gripper.right_finger_geoms
        ]
        # self.cube_geom_id = self.sim.model.geom_name2id("cube")

    def _reset_internal(self):
        """
        Resets simulation internal configurations.
        """
        super()._reset_internal()

        # reset positions of objects
        self.model.place_objects()

        if self.random_arm_init is not None:
            if self.seed:
                constant_quat = np.array(
                    [-0.01704371, -0.99972409, 0.00199679, -0.01603944])
                target_position = np.array([
                    0.5 + np.random.uniform(0.0, 0.0),
                    np.random.uniform(0.0, 0.0),
                    # self.table_full_size[2] + 0.15211762])
                    0.8 + 0.20211762
                ])
            else:
                # random initialization of arm
                constant_quat = np.array(
                    [-0.01704371, -0.99972409, 0.00199679, -0.01603944])
                target_position = np.array([
                    0.5 + np.random.uniform(self.random_arm_init[0],
                                            self.random_arm_init[1]),
                    np.random.uniform(self.random_arm_init[0],
                                      self.random_arm_init[1]),
                    self.table_full_size[2] + 0.20211762
                ])
            self.controller.sync_ik_robot(self._robot_jpos_getter(),
                                          simulate=True)
            joint_list = self.controller.inverse_kinematics(
                target_position, constant_quat)
            init_pos = np.array(joint_list)

        else:
            # default robosuite init
            init_pos = np.array(
                [-0.5538, -0.8208, 0.4155, 1.8409, -0.4955, 0.6482, 1.9628])
            init_pos += np.random.randn(init_pos.shape[0]) * 0.02

        self.sim.data.qpos[self._ref_joint_pos_indexes] = np.array(init_pos)

    def reset(self):
        self._destroy_viewer()
        self._reset_internal()
        self.sim.forward()

        # reset goal (marker)
        gripper_site_pos = np.array(self.sim.data.site_xpos[self.eef_site_id])
        distance = np.random.uniform(self.lower_goal_range,
                                     self.upper_goal_range)
        while np.linalg.norm(distance) <= (self.distance_threshold * 1.75):
            distance = np.random.uniform(self.lower_goal_range,
                                         self.upper_goal_range)
        if self.seed:
            distance = np.array((0.05, -0.1, 0.1))

        self.goal = gripper_site_pos + distance

        return self._get_observation()

    def _robot_jpos_getter(self):
        return np.array([0, -1.18, 0.00, 2.18, 0.00, 0.57, 3.3161])

    def reward(self, action=None):
        """
        Reward function for the task.

        The dense reward has one component.

            Reaching: in [0, 1], to encourage the arm to reach the marker

        The sparse reward only consists of the lifting component.

        Args:
            action (np array): unused for this task

        Returns:
            reward (float): the reward
        """
        #   velocity_pen = np.linalg(np.array(
        #       [self.sim.data.qvel[x] for x in self._ref_joint_vel_indexes]
        #  ))
        marker_pos = self.goal
        gripper_site_pos = self.sim.data.site_xpos[self.eef_site_id]

        return self.compute_reward(gripper_site_pos, marker_pos, None)

    # for goalenv wrapper
    def compute_reward(self, achieved_goal, desired_goal, info=None):
        d = np.linalg.norm(achieved_goal - desired_goal)
        if self.reward_shaping:  # dense
            reward = 1 - np.tanh(10 * d)
            if d <= self.distance_threshold:
                reward = 10.0
        else:  # sparse (-1 or 0)
            #reward = -np.float32(d > distance_threshold)
            if d > self.distance_threshold:
                reward = -1.0
            else:
                reward = 60.0

        return reward

    # for goalenv wrapper
    # TODO check the desired_goal/achieved_goal for indexing
    def get_goalenv_dict(self, obs_dict):
        # using only object-state and robot-state
        ob_lst = []
        di = {}
        for key in obs_dict:
            if key in ["robot-state", "object-state"]:
                ob_lst.append(obs_dict[key])

        di['observation'] = np.concatenate(ob_lst)
        di['desired_goal'] = obs_dict['object-state'][0:3]
        di['achieved_goal'] = obs_dict['robot-state'][23:26]

        return di

    def _get_observation(self):
        """
        Returns an OrderedDict containing observations [(name_string, np.array), ...].

        Important keys:
            robot-state: contains robot-centric information.
            object-state: requires @self.use_object_obs to be True.
                contains object-centric information.
            image: requires @self.use_camera_obs to be True.
                contains a rendered frame from the simulation.
            depth: requires @self.use_camera_obs and @self.camera_depth to be True.
                contains a rendered depth map from the simulation
        """
        di = super()._get_observation()
        # camera observations
        if self.use_camera_obs:
            camera_obs = self.sim.render(
                camera_name=self.camera_name,
                width=self.camera_width,
                height=self.camera_height,
                depth=self.camera_depth,
            )
            if self.camera_depth:
                di["image"], di["depth"] = camera_obs
            else:
                di["image"] = camera_obs

        # low-level object information
        if self.use_object_obs:
            gripper_site_pos = np.array(
                self.sim.data.site_xpos[self.eef_site_id])
            di["gripper_to_marker"] = gripper_site_pos - self.goal

            di["object-state"] = np.concatenate(
                [di["gripper_to_marker"], self.goal])

        return di

    def _gripper_visualization(self):
        """
        Do any needed visualization here. Overrides superclass implementations.
        """

        # color the gripper site appropriately based on distance to cube
        if self.gripper_visualization:
            # get distance to cube
            cube_site_id = self.sim.model.site_name2id("cube")
            dist = np.sum(
                np.square(self.sim.data.site_xpos[cube_site_id] -
                          self.sim.data.get_site_xpos("grip_site")))

            # set RGBA for the EEF site here
            max_dist = 0.1
            scaled = (1.0 - min(dist / max_dist, 1.))**15
            rgba = np.zeros(4)
            rgba[0] = 1 - scaled
            rgba[1] = scaled
            rgba[3] = 0.5

            self.sim.model.site_rgba[self.eef_site_id] = rgba
예제 #5
0
class BaxterLift(BaxterEnv):
    """
    This class corresponds to the bimanual lifting task for the Baxter robot.
    """
    def __init__(self,
                 gripper_type_right="TwoFingerGripper",
                 gripper_type_left="LeftTwoFingerGripper",
                 table_full_size=(0.8, 0.8, 0.8),
                 table_friction=(1., 5e-3, 1e-4),
                 use_object_obs=True,
                 reward_shaping=True,
                 **kwargs):
        """
        Args:

            gripper_type_right (str): type of gripper used on the right hand.

            gripper_type_lefft (str): type of gripper used on the right hand.

            table_full_size (3-tuple): x, y, and z dimensions of the table.

            table_friction (3-tuple): the three mujoco friction parameters for
                the table.

            use_object_obs (bool): if True, include object (pot) information in
                the observation.

            reward_shaping (bool): if True, use dense rewards.

        Inherits the Baxter environment; refer to other parameters described there.
        """

        # initialize the pot
        self.pot = PotWithHandlesObject()
        self.mujoco_objects = OrderedDict([("pot", self.pot)])

        # settings for table top
        self.table_full_size = table_full_size
        self.table_friction = table_friction

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # reward configuration
        self.reward_shaping = reward_shaping

        self.object_initializer = UniformRandomSampler(
            x_range=(-0.15, -0.04),
            y_range=(-0.015, 0.015),
            z_rotation=(-0.15 * np.pi, 0.15 * np.pi),
            ensure_object_boundary_in_range=False,
        )

        super().__init__(gripper_left=gripper_type_left,
                         gripper_right=gripper_type_right,
                         **kwargs)

    def _load_model(self):
        """
        Loads the arena and pot object.
        """
        super()._load_model()
        self.mujoco_robot.set_base_xpos([0, 0, 0])

        # load model for table top workspace
        self.mujoco_arena = TableArena(table_full_size=self.table_full_size,
                                       table_friction=self.table_friction)
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # The sawyer robot has a pedestal, we want to align it with the table
        self.mujoco_arena.set_origin(
            [0.45 + self.table_full_size[0] / 2, 0, 0])

        # task includes arena, robot, and objects of interest
        self.model = TableTopTask(
            self.mujoco_arena,
            self.mujoco_robot,
            self.mujoco_objects,
            self.object_initializer,
        )
        self.model.place_objects()

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flattened array, which is how MuJoCo stores physical simulation data.
        """
        super()._get_reference()
        self.cube_body_id = self.sim.model.body_name2id("pot")
        self.handle_1_site_id = self.sim.model.site_name2id("pot_handle_1")
        self.handle_2_site_id = self.sim.model.site_name2id("pot_handle_2")
        self.table_top_id = self.sim.model.site_name2id("table_top")
        self.pot_center_id = self.sim.model.site_name2id("pot_center")

    def _reset_internal(self):
        """
        Resets simulation internal configurations.
        """
        super()._reset_internal()

        self.model.place_objects()

    def reward(self, action):
        """
        Reward function for the task.

          1. the agent only gets the lifting reward when flipping no more than 30 degrees.
          2. the lifting reward is smoothed and ranged from 0 to 2, capped at 2.0.
             the initial lifting reward is 0 when the pot is on the table;
             the agent gets the maximum 2.0 reward when the pot’s height is above a threshold.
          3. the reaching reward is 0.5 when the left gripper touches the left handle,
             or when the right gripper touches the right handle before the gripper geom
             touches the handle geom, and once it touches we use 0.5
        """
        reward = 0

        cube_height = self.sim.data.site_xpos[
            self.pot_center_id][2] - self.pot.get_top_offset()[2]
        table_height = self.sim.data.site_xpos[self.table_top_id][2]

        # check if the pot is tilted more than 30 degrees
        mat = T.quat2mat(self._pot_quat)
        z_unit = [0, 0, 1]
        z_rotated = np.matmul(mat, z_unit)
        cos_z = np.dot(z_unit, z_rotated)
        cos_30 = np.cos(np.pi / 6)
        direction_coef = 1 if cos_z >= cos_30 else 0

        # cube is higher than the table top above a margin
        if cube_height > table_height + 0.15:
            reward = 1.0 * direction_coef

        # use a shaping reward
        if self.reward_shaping:
            reward = 0

            # lifting reward
            elevation = cube_height - table_height
            r_lift = min(max(elevation - 0.05, 0), 0.2)
            reward += 10. * direction_coef * r_lift

            l_gripper_to_handle = self._l_gripper_to_handle
            r_gripper_to_handle = self._r_gripper_to_handle

            # gh stands for gripper-handle
            # When grippers are far away, tell them to be closer
            l_contacts = list(
                self.find_contacts(self.gripper_left.contact_geoms(),
                                   self.pot.handle_1_geoms()))
            r_contacts = list(
                self.find_contacts(self.gripper_right.contact_geoms(),
                                   self.pot.handle_2_geoms()))
            l_gh_dist = np.linalg.norm(l_gripper_to_handle)
            r_gh_dist = np.linalg.norm(r_gripper_to_handle)

            if len(l_contacts) > 0:
                reward += 0.5
            else:
                reward += 0.5 * (1 - np.tanh(l_gh_dist))

            if len(r_contacts) > 0:
                reward += 0.5
            else:
                reward += 0.5 * (1 - np.tanh(r_gh_dist))

        return reward

    @property
    def _handle_1_xpos(self):
        """Returns the position of the first handle."""
        return self.sim.data.site_xpos[self.handle_1_site_id]

    @property
    def _handle_2_xpos(self):
        """Returns the position of the second handle."""
        return self.sim.data.site_xpos[self.handle_2_site_id]

    @property
    def _pot_quat(self):
        """Returns the orientation of the pot."""
        return T.convert_quat(self.sim.data.body_xquat[self.cube_body_id],
                              to="xyzw")

    @property
    def _world_quat(self):
        """World quaternion."""
        return T.convert_quat(np.array([1, 0, 0, 0]), to="xyzw")

    @property
    def _l_gripper_to_handle(self):
        """Returns vector from the left gripper to the handle."""
        return self._handle_1_xpos - self._l_eef_xpos

    @property
    def _r_gripper_to_handle(self):
        """Returns vector from the right gripper to the handle."""
        return self._handle_2_xpos - self._r_eef_xpos

    def _get_observation(self):
        """
        Returns an OrderedDict containing observations [(name_string, np.array), ...].

        Important keys:
            robot-state: contains robot-centric information.
            object-state: requires @self.use_object_obs to be True.
                contains object-centric information.
            image: requires @self.use_camera_obs to be True.
                contains a rendered frame from the simulation.
            depth: requires @self.use_camera_obs and @self.camera_depth to be True.
                contains a rendered depth map from the simulation
        """
        di = super()._get_observation()
        # camera observations
        if self.use_camera_obs:
            camera_obs = self.sim.render(
                camera_name=self.camera_name,
                width=self.camera_width,
                height=self.camera_height,
                depth=self.camera_depth,
            )
            if self.camera_depth:
                di["image"], di["depth"] = camera_obs
            else:
                di["image"] = camera_obs

        # low-level object information
        if self.use_object_obs:
            # position and rotation of object
            cube_pos = self.sim.data.body_xpos[self.cube_body_id]
            cube_quat = T.convert_quat(
                self.sim.data.body_xquat[self.cube_body_id], to="xyzw")
            di["cube_pos"] = cube_pos
            di["cube_quat"] = cube_quat

            di["l_eef_xpos"] = self._l_eef_xpos
            di["r_eef_xpos"] = self._r_eef_xpos
            di["handle_1_xpos"] = self._handle_1_xpos
            di["handle_2_xpos"] = self._handle_2_xpos
            di["l_gripper_to_handle"] = self._l_gripper_to_handle
            di["r_gripper_to_handle"] = self._r_gripper_to_handle

            di["object-state"] = np.concatenate([
                di["cube_pos"],
                di["cube_quat"],
                di["l_eef_xpos"],
                di["r_eef_xpos"],
                di["handle_1_xpos"],
                di["handle_2_xpos"],
                di["l_gripper_to_handle"],
                di["r_gripper_to_handle"],
            ])

        return di

    def _check_contact(self):
        """
        Returns True if gripper is in contact with an object.
        """
        collision = False
        contact_geoms = (self.gripper_right.contact_geoms() +
                         self.gripper_left.contact_geoms())
        for contact in self.sim.data.contact[:self.sim.data.ncon]:
            if (self.sim.model.geom_id2name(contact.geom1) in contact_geoms
                    or self.sim.model.geom_id2name(
                        contact.geom2) in contact_geoms):
                collision = True
                break
        return collision

    def _check_success(self):
        """
        Returns True if task is successfully completed
        """
        # cube is higher than the table top above a margin
        cube_height = self.sim.data.body_xpos[self.cube_body_id][2]
        table_height = self.table_full_size[2]
        return cube_height > table_height + 0.10
예제 #6
0
class SawyerPrimitivePick(SawyerEnv):
    """
    This class corresponds to the lifting task for the Sawyer robot arm.
    """
    def __init__(
        self,
        instructive=0.0,
        decay=0.0,
        random_arm_init=None,
        gripper_type="TwoFingerGripper",
        table_full_size=(0.8, 0.8, 0.6),
        table_friction=(1., 5e-3, 1e-4),
        use_camera_obs=True,
        use_object_obs=True,
        reward_shaping=False,
        placement_initializer=None,
        gripper_visualization=False,
        use_indicator_object=False,
        has_renderer=False,
        has_offscreen_renderer=True,
        render_collision_mesh=False,
        render_visual_mesh=True,
        control_freq=10,
        horizon=1000,
        ignore_done=False,
        camera_name="frontview",
        camera_height=256,
        camera_width=256,
        camera_depth=False,
    ):
        """
        Args:

            gripper_type (str): type of gripper, used to instantiate
                gripper models from gripper factory.

            table_full_size (3-tuple): x, y, and z dimensions of the table.

            table_friction (3-tuple): the three mujoco friction parameters for
                the table.

            use_camera_obs (bool): if True, every observation includes a
                rendered image.

            use_object_obs (bool): if True, include object (cube) information in
                the observation.

            reward_shaping (bool): if True, use dense rewards.

            placement_initializer (ObjectPositionSampler instance): if provided, will
                be used to place objects on every reset, else a UniformRandomSampler
                is used by default.

            gripper_visualization (bool): True if using gripper visualization.
                Useful for teleoperation.

            use_indicator_object (bool): if True, sets up an indicator object that
                is useful for debugging.

            has_renderer (bool): If true, render the simulation state in
                a viewer instead of headless mode.

            has_offscreen_renderer (bool): True if using off-screen rendering.

            render_collision_mesh (bool): True if rendering collision meshes
                in camera. False otherwise.

            render_visual_mesh (bool): True if rendering visual meshes
                in camera. False otherwise.

            control_freq (float): how many control signals to receive
                in every second. This sets the amount of simulation time
                that passes between every action input.

            horizon (int): Every episode lasts for exactly @horizon timesteps.

            ignore_done (bool): True if never terminating the environment (ignore @horizon).

            camera_name (str): name of camera to be rendered. Must be
                set if @use_camera_obs is True.

            camera_height (int): height of camera frame.

            camera_width (int): width of camera frame.

            camera_depth (bool): True if rendering RGB-D, and RGB otherwise.
        """
        self.random_arm_init = random_arm_init
        self.instructive = instructive
        self.instructive_counter = 0
        self.eval_mode = False
        self.decay = decay
        # settings for table top
        self.table_full_size = table_full_size
        self.table_friction = table_friction

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # reward configuration
        self.reward_shaping = reward_shaping

        # object placement initializer
        if placement_initializer:
            self.placement_initializer = placement_initializer
        else:
            self.placement_initializer = UniformRandomSampler(
                x_range=[-0.00, 0.00],
                y_range=[-0.00, 0.00],
                ensure_object_boundary_in_range=False,
                z_rotation=None,
            )

        # order: di["eef_pos"] di["gripper_qpos"] di["gripper_to_marker"] self.goal
        # dim: 3, 2, 3, 3
        # low = -np.ones(11) * np.inf
        # high = np.ones(11) * np.inf
        # self.observation_space = spaces.Box(low=low, high=high)

        self.controller = SawyerIKController(
            bullet_data_path=os.path.join(robosuite.models.assets_root,
                                          "bullet_data"),
            robot_jpos_getter=self._robot_jpos_getter,
        )
        super().__init__(
            gripper_type=gripper_type,
            gripper_visualization=gripper_visualization,
            use_indicator_object=use_indicator_object,
            has_renderer=has_renderer,
            has_offscreen_renderer=has_offscreen_renderer,
            render_collision_mesh=render_collision_mesh,
            render_visual_mesh=render_visual_mesh,
            control_freq=control_freq,
            horizon=horizon,
            ignore_done=ignore_done,
            use_camera_obs=use_camera_obs,
            camera_name=camera_name,
            camera_height=camera_height,
            camera_width=camera_width,
            camera_depth=camera_depth,
        )

    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()
        self.mujoco_robot.set_base_xpos([0, 0, 0])

        # load model for table top workspace
        self.mujoco_arena = TableArena(table_full_size=self.table_full_size,
                                       table_friction=self.table_friction)
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # The sawyer robot has a pedestal, we want to align it with the table
        self.mujoco_arena.set_origin(
            [0.16 + self.table_full_size[0] / 2, 0, 0])

        # initialize objects of interest
        cube = BoxObject(
            # size_min=[0.020, 0.020, 0.020],  # [0.015, 0.015, 0.015],
            # size_max=[0.022, 0.022, 0.022],  # [0.018, 0.018, 0.018])
            size_min=[0.015, 0.015, 0.015],
            size_max=[0.015, 0.015, 0.015],
            rgba=[1, 0, 0, 1],
        )
        self.mujoco_objects = OrderedDict([("cube", cube)])

        # task includes arena, robot, and objects of interest
        self.model = TableTopTask(
            self.mujoco_arena,
            self.mujoco_robot,
            self.mujoco_objects,
            initializer=self.placement_initializer,
        )
        self.model.place_objects()

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flatten array, which is how MuJoCo stores physical simulation data.
        """
        super()._get_reference()
        self.cube_body_id = self.sim.model.body_name2id("cube")
        self.l_finger_geom_ids = [
            self.sim.model.geom_name2id(x)
            for x in self.gripper.left_finger_geoms
        ]
        self.r_finger_geom_ids = [
            self.sim.model.geom_name2id(x)
            for x in self.gripper.right_finger_geoms
        ]
        self.cube_geom_id = self.sim.model.geom_name2id("cube")

    def _reset_internal(self):
        """
        Resets simulation internal configurations.
        """
        super(SawyerEnv, self)._reset_internal()

        self.model.place_objects()

        if self.random_arm_init:
            # random initialization of arm
            constant_quat = np.array(
                [-0.01704371, -0.99972409, 0.00199679, -0.01603944])

            target_position = np.array([
                0.5 + np.random.uniform(self.random_arm_init[0],
                                        self.random_arm_init[1]),
                np.random.uniform(self.random_arm_init[0],
                                  self.random_arm_init[1]),
                self.table_full_size[2] + 0.15211762
            ])
            self.controller.sync_ik_robot(self._robot_jpos_getter(),
                                          simulate=True)
            joint_list = self.controller.inverse_kinematics(
                target_position, constant_quat)
            init_pos = np.array(joint_list)
        else:
            # default robosuite init
            init_pos = np.array(
                [-0.5538, -0.8208, 0.4155, 1.8409, -0.4955, 0.6482, 1.9628])
            init_pos += np.random.randn(init_pos.shape[0]) * 0.02

        self.sim.data.qpos[self._ref_joint_pos_indexes] = init_pos

        self.sim.data.qpos[self._ref_gripper_joint_pos_indexes] = np.array(
            [0.0115, -0.0115])  # Open

        self.sim.forward()
        self.sim.data.qpos[10:12] = self.sim.data.site_xpos[
            self.eef_site_id][:2]

        # decay rate (1 / (1 + decay_param * #resets))
        chance = self.instructive * (
            1 / (1 + self.decay * self.instructive_counter))
        if np.random.uniform() < chance:  # and not self.eval_mode:
            self.sim.data.qpos[13] = self.sim.data.site_xpos[
                self.eef_site_id][2]
            self.sim.data.qpos[self._ref_gripper_joint_pos_indexes] = np.array(
                [-0.0, -0.0])  #np.array([-0.21021952, -0.00024167])  # gripped

        self.instructive_counter = self.instructive_counter + 1

        cube_pos = np.array(self.sim.data.body_xpos[self.cube_body_id])
        self.goal = cube_pos + np.array((0, 0, 0.075))

    def _robot_jpos_getter(self):
        return np.array([0, -1.18, 0.00, 2.18, 0.00, 0.57, 3.3161])

    def reward(self, action=None):
        """
        Reward function for the task.

        The dense reward has three components.

            Reaching: in [0, 1], to encourage the arm to reach the cube
            Grasping: in {0, 0.25}, non-zero if arm is grasping the cube
            Lifting: in {0, 1}, non-zero if arm has lifted the cube

        The sparse reward only consists of the lifting component.

        Args:
            action (np array): unused for this task

        Returns:
            reward (float): the reward
        """

        cube_height = self.sim.data.body_xpos[self.cube_body_id]

        return self.compute_reward(cube_height, self.goal)

    # for goalenv wrapper
    def compute_reward(self, achieved_goal, desired_goal, info=None):
        # -1 if cube is below, 0 if cube is above
        reward = -1.0
        if achieved_goal[2] > desired_goal[2]:
            reward = 100.0

        return reward

    # for goalenv wrapper
    def get_goalenv_dict(self, obs_dict):
        # using only object-state and robot-state
        ob_lst = []
        di = {}
        for key in obs_dict:
            if key in ["robot-state", "object-state"]:
                ob_lst.append(obs_dict[key])

        di['observation'] = np.concatenate(ob_lst)
        di['desired_goal'] = self.goal
        di['achieved_goal'] = obs_dict['object-state'][0:3]

        return di

    def _get_observation(self):
        """
        Returns an OrderedDict containing observations [(name_string, np.array), ...].

        Important keys:
            robot-state: contains robot-centric information.
            object-state: requires @self.use_object_obs to be True.
                contains object-centric information.
            image: requires @self.use_camera_obs to be True.
                contains a rendered frame from the simulation.
            depth: requires @self.use_camera_obs and @self.camera_depth to be True.
                contains a rendered depth map from the simulation
        """
        di = super()._get_observation()
        # camera observations
        if self.use_camera_obs:
            camera_obs = self.sim.render(
                camera_name=self.camera_name,
                width=self.camera_width,
                height=self.camera_height,
                depth=self.camera_depth,
            )
            if self.camera_depth:
                di["image"], di["depth"] = camera_obs
            else:
                di["image"] = camera_obs

        # low-level object information
        if self.use_object_obs:
            # position and rotation of object
            cube_pos = np.array(self.sim.data.body_xpos[self.cube_body_id])
            cube_quat = convert_quat(np.array(
                self.sim.data.body_xquat[self.cube_body_id]),
                                     to="xyzw")
            di["cube_pos"] = cube_pos
            di["cube_quat"] = cube_quat

            gripper_site_pos = np.array(
                self.sim.data.site_xpos[self.eef_site_id])
            di["gripper_to_cube"] = gripper_site_pos - cube_pos

            # di["object-state"] = np.concatenate(
            #     [cube_pos, cube_quat, di["gripper_to_cube"]]
            # )
            di["object-state"] = np.concatenate(
                [di["gripper_to_cube"], cube_pos])

        return di

    def _gripper_visualization(self):
        """
        Do any needed visualization here. Overrides superclass implementations.
        """

        # color the gripper site appropriately based on distance to cube
        if self.gripper_visualization:
            # get distance to cube
            cube_site_id = self.sim.model.site_name2id("cube")
            dist = np.sum(
                np.square(self.sim.data.site_xpos[cube_site_id] -
                          self.sim.data.get_site_xpos("grip_site")))

            # set RGBA for the EEF site here
            max_dist = 0.1
            scaled = (1.0 - min(dist / max_dist, 1.))**15
            rgba = np.zeros(4)
            rgba[0] = 1 - scaled
            rgba[1] = scaled
            rgba[3] = 0.5

            self.sim.model.site_rgba[self.eef_site_id] = rgba
예제 #7
0
class PandaOpenDoor(change_dof(PandaEnv, 8, 8)): # keep the dimension to control the gripper; better not remove change_dof
    """
    This class corresponds to the pushing task for the Panda robot arm.
    """
    
    parameters_spec = {
        **PandaEnv.parameters_spec,
        'table_size_0': [0.7, 0.9],
        'table_size_1': [0.7, 0.9],
        'table_size_2': [0.7, 0.9],
        #'table_friction_0': [0.4, 1.6],
        'table_friction_1': [0.0025, 0.0075],
        'table_friction_2': [0.00005, 0.00015],
        # 'boxobject_size_0': [0.018, 0.022],
        # 'boxobject_size_1': [0.018, 0.022],
        # 'boxobject_size_2': [0.018, 0.022],
        # 'boxobject_friction_0': [0.04, 1.6],
        #'boxobject_friction_1': [0.0025, 0.0075],    # fixed this to zero
        # 'boxobject_friction_2': [0.00005, 0.00015],
        # 'boxobject_density_1000': [0.6, 1.4],
    }
    
    def reset_props(self,
                    table_size_0=0.8, table_size_1=0.8, table_size_2=0.9,
                    table_friction_0=0., table_friction_1=0.005, table_friction_2=0.0001,
                    # boxobject_size_0=0.020, boxobject_size_1=0.020, boxobject_size_2=0.020,
                    # boxobject_friction_0=0.1, boxobject_friction_1=0.0, boxobject_friction_2=0.0001,
                    # boxobject_density_1000=0.1,
                    **kwargs):
        
        self.table_full_size = (table_size_0, table_size_1, table_size_2)
        self.table_friction = (table_friction_0, table_friction_1, table_friction_2)
        # self.boxobject_size = (boxobject_size_0, boxobject_size_1, boxobject_size_2)
        # self.boxobject_friction = (boxobject_friction_0, boxobject_friction_1, boxobject_friction_2)
        # self.boxobject_density = boxobject_density_1000 * 1000.
        super().reset_props(**kwargs)

    def __init__(self,
                 use_object_obs=True,
                 reward_shaping=True,
                 placement_initializer=None,
                 object_obs_process=True,
                 **kwargs):
        """
        Args:

            use_object_obs (bool): if True, include object (cube) information in
                the observation.

            reward_shaping (bool): if True, use dense rewards.

            placement_initializer (ObjectPositionSampler instance): if provided, will
                be used to place objects on every reset, else a UniformRandomSampler
                is used by default.

            object_obs_process (bool): if True, process the object observation to get a task_state.
                Setting this to False is useful when some transformation (eg. noise) need to be done to object observation raw data prior to the processing.
        """
        
        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # reward configuration
        self.reward_shaping = reward_shaping

        # object placement initializer
        if placement_initializer:
            self.placement_initializer = placement_initializer
        else:
            # self.placement_initializer = UniformRandomSampler(
            #     x_range=[-0.1, 0.1],
            #     y_range=[-0.1, 0.1],
            #     ensure_object_boundary_in_range=False,
            #     z_rotation=None,
            # )
            self.placement_initializer = UniformRandomSamplerObjectSpecific(
                x_ranges=[[-0.03, -0.02], [0.09, 0.1]],
                y_ranges=[[-0.05, -0.04], [-0.05, -0.04]],
                ensure_object_boundary_in_range=False,
                z_rotation=None,
            )
            

        # for first initialization
        self.table_full_size = (0.8, 0.8, 0.8)
        self.table_friction = (0., 0.005, 0.0001)
        # self.boxobject_size = (0.02, 0.02, 0.02)
        # self.boxobject_friction = (0.1, 0.005, 0.0001)
        # self.boxobject_density = 100.

        self.object_obs_process = object_obs_process

        super().__init__(gripper_visualization=True, **kwargs)

    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()
        self.mujoco_robot.set_base_xpos([0, 0, 0])

        # load model for table top workspace
        self.mujoco_arena = TableCabinetArena(
            table_full_size=self.table_full_size, table_friction=self.table_friction
        )
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # The panda robot has a pedestal, we want to align it with the table
        self.mujoco_arena.set_origin([0.16 + self.table_full_size[0] / 2, 0, 0])
        
        # initialize objects of interest

        # cube = FullyFrictionalBoxObject(
        #     size=self.boxobject_size,
        #     friction=self.boxobject_friction,
        #     density=self.boxobject_density,
        #     rgba=[1, 0, 0, 1],
        # )
        # self.mujoco_cube = cube
        # goal = CylinderObject(
        #     size=[0.03, 0.001],
        #     rgba=[0, 1, 0, 1],
        # )
        # self.mujoco_goal = goal
        # self.mujoco_objects = OrderedDict([("cube", cube), ("goal", goal)])
        
        self.mujoco_objects = None

        # task includes arena, robot, and objects of interest
        self.model = TableTopTask(
            self.mujoco_arena,
            self.mujoco_robot,
            self.mujoco_objects,
            initializer=self.placement_initializer,
            visual_objects=[],
        )
        if self.mujoco_objects is not None:
            self.model.place_objects()

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flatten array, which is how MuJoCo stores physical simulation data.
        """
        super()._get_reference()
        # self.cube_body_id = self.sim.model.body_name2id("cube")
        self.l_finger_geom_ids = [
            self.sim.model.geom_name2id(x) for x in self.gripper.left_finger_geoms
        ]
        self.r_finger_geom_ids = [
            self.sim.model.geom_name2id(x) for x in self.gripper.right_finger_geoms
        ]
        # self.cube_geom_id = self.sim.model.geom_name2id("cube")

        # gripper ids
        # self.goal_body_id = self.sim.model.body_name2id('goal')
        # self.goal_site_id = self.sim.model.site_name2id('goal')

    def _reset_internal(self):
        """
        Resets simulation internal configurations.
        """
        super()._reset_internal()
        self.sim.forward()

        # reset positions of objects
        if self.mujoco_objects is not None:
            self.model.place_objects()

        # reset joint positions
        # init_pos = self.mujoco_robot.init_qpos
        # print(init_pos)
        # init_pos += np.random.randn(init_pos.shape[0]) * 0.02
        # self.sim.data.qpos[self._ref_joint_pos_indexes] = np.array(init_pos)

        # self.sim.data.qpos[self._ref_joint_pos_indexes] = [0.02085236,  0.20386552,  0.00569112, -2.60645364,  2.8973697, 3.53509316, 2.89737955]  # a initial gesture: facing downwards
        # self.sim.data.qpos[self._ref_joint_pos_indexes] = [ 0.10234113, -1.32314358,  0.18383693, -2.88485115,  1.64673455,  3.22235274, 2.3644487 ] # a good initial gesture: facing horizontally
        self.sim.data.qpos[self._ref_joint_pos_indexes] = [ 0.10259647, -0.77839656,  0.27246156, -2.35741103,  1.647504,  3.43102572, -0.85707793]
        # open the gripper
        # self.sim.data.qpos[self._ref_joint_gripper_actuator_indexes] = np.array([0.2, -0.2])
        self.sim.data.ctrl[-2:] = np.array([0.04, -0.04])  # panda gripper finger joint range is -0.04~0.04

        # set other reference attributes
        eef_rot_in_world = self.sim.data.get_body_xmat("right_hand").reshape((3, 3))
        self.world_rot_in_eef = copy.deepcopy(eef_rot_in_world.T)  # TODO inspect on this: should we set a golden reference other than a initial position?

    # reward function from sawyer_push
    def reward(self, action=None):
        """
        Reward function for the task.

        The dense reward has three components.

            Reaching: in [-inf, 0], to encourage the arm to reach the object
            Goal Distance: in [-inf, 0] the distance between the pushed object and the goal
            Safety reward in [-inf, 0], -1 for every joint that is at its limit.

        The sparse reward only receives a {0,1} upon reaching the goal

        Args:
            action (np array): The action taken in that timestep

        Returns:
            reward (float): the reward
            previously in robosuite-extra, when dense reward is used, the return value will be a dictionary. but we removed that feature.
        """
        reward = 0.
        self.door_open_angle = abs(self.sim.data.get_joint_qpos("hinge0"))

        reward_door_open = self.door_open_angle

        reward_dist = 0.
        reward_ori = 0.
        if self.door_open_angle < 0.03:
            # A distance reward: minimize the distance between the gripper and door konb when the door is almost closed 
            reward_dist = -np.linalg.norm(self.get_finger2knob_dist_vec())

            # An orientation reward: make the orientation of gripper horizontal (better for knob grasping) when the door is almost closed 
            fingerEulerDesired =  [np.pi, 0, np.pi/2]  # horizontal gesture for gripper
            finger_ori = self.get_finger_ori()

            ori_diff = sin_cos_encoding(fingerEulerDesired) - sin_cos_encoding(finger_ori)  # use sin_cos_encoding to avoid value jump in 2PI measure
            reward_ori = -np.linalg.norm(ori_diff) * 0.04

            # worldHknob = self.sim.data.get_body_xquat("knob_link")
            # knobHee_desired = euler2quat(quat2euler([0.5, 0.5, 0.5, -0.5]))
            # worldHee_desired = quat_mul(worldHknob, knobHee_desired)

            # print(quat2euler(knobHee_desired), self.get_finger_ori())

        reward = reward_door_open + reward_dist + reward_ori  # A summary of reward values


        # Success Reward
        success = self._check_success()
        if (success):
            reward += 0.1
            self.done = True



        # worldHknob = self.sim.data.get_body_xquat("knob_link")
        # knobHee_desired = euler2quat(quat2euler([0.5, 0.5, -0.5, 0.5]))
        # worldHee_desired = quat_mul(worldHknob, knobHee_desired)
        # print(quat2euler(worldHee_desired), quat2euler(worldHknob), self.get_finger_ori() )


        # # sparse completion reward
        # if not self.reward_shaping and self._check_success():
        #     reward = 1.0

        # # use a dense reward
        # if self.reward_shaping:
        #     object_pos = self.sim.data.body_xpos[self.cube_body_id]

        #     # max joint angles reward
        #     joint_limits = self._joint_ranges
        #     current_joint_pos = self._joint_positions

        #     hitting_limits_reward = - int(any([(x < joint_limits[i, 0] + 0.05 or x > joint_limits[i, 1] - 0.05) for i, x in
        #                                       enumerate(current_joint_pos)]))

        #     reward += hitting_limits_reward

        #     # reaching reward
        #     gripper_site_pos = self.sim.data.site_xpos[self.eef_site_id]
        #     dist = np.linalg.norm(gripper_site_pos - object_pos)
        #     reaching_reward = -0.4 * dist
        #     reward += reaching_reward

        #     # print(gripper_site_pos, object_pos, reaching_reward)

        #     # Success Reward
        #     success = self._check_success()
        #     if (success):
        #         reward += 0.1

        #     # goal distance reward
        #     goal_pos = self.sim.data.site_xpos5707963267948966bject--gripper--goal
        #     angle_g_o_g = angle_between(gripper_site_pos - object_pos,
        #                                 goal_pos - object_pos)
        #     if not success and angle_g_o_g < np.pi / 2.:
        #         reward += -0.03 - 0.02 * (np.pi / 2. - angle_g_o_g)

        #     # print('grippersitepos', gripper_site_pos,
        #     #       'objpos', object_pos,
        #     #       'jointangles', hitting_limits_reward,
        #     #       'reaching', reaching_reward,
        #     #       'success', success,
        #     #       'goaldist', goal_distance_reward)

        #     unstable = reward < -2.5

        #     # Return all three types of rewards
        #     reward = {"reward": reward, "reaching_distance": -10 * reaching_reward,
        #               "goal_distance": - goal_distance_reward,
        #               "hitting_limits_reward": hitting_limits_reward,
        #               "unstable":unstable}

        return reward
    
    def _check_success(self):
        """
        Returns True if task has been completed.
        """

        if self.door_open_angle >= 1.55: # 1.57 ~ PI/2
            return True
        else:
            return False

        # object_pos = self.sim.data.body_xpos[self.cube_body_id]
        # goal_pos = self.sim.data.site_xpos[self.goal_site_id]

        # dist = np.linalg.norm(goal_pos - object_pos)
        # goal_horizontal_radius = self.model.mujoco_objects['goal'].get_horizontal_radius()

        # # object centre is within the goal radius
        # return dist < goal_horizontal_radius
        # return False

    def step(self, action):
        # """ explicitly shut the gripper """
        # joined_action = np.append(action, [1.])
        return super().step(action)

    def world2eef(self, world):
        return self.world_rot_in_eef.dot(world)

    # def put_raw_object_obs(self, di):
        # Extract position and velocity of the eef
        # eef_pos_in_world = self.sim.data.get_body_xpos("right_hand")
        # eef_xvelp_in_world = self.sim.data.get_body_xvelp("right_hand")

        # print('eef_pos_in_world', eef_pos_in_world)

        # Get the position, velocity, rotation  and rotational velocity of the object in the world frame
        # object_pos_in_world = self.sim.data.body_xpos[self.cube_body_id]
        # object_xvelp_in_world = self.sim.data.get_body_xvelp('cube')
        # object_rot_in_world = self.sim.data.get_body_xmat('cube')
        
        # Get the z-angle with respect to the reference position and do sin-cosine encoding
        # world_rotation_in_reference = np.array([[0., 1., 0., ], [-1., 0., 0., ], [0., 0., 1., ]])
        # object_rotation_in_ref = world_rotation_in_reference.dot(object_rot_in_world)
        # object_euler_in_ref = T.mat2euler(object_rotation_in_ref)
        # z_angle = object_euler_in_ref[2]
        
        # object_quat = convert_quat(self.sim.data.body_xquat[self.cube_body_id], to='xyzw')
        
        # Get the goal position in the world
        # goal_site_pos_in_world = np.array(self.sim.data.site_xpos[self.goal_site_id])

        # Record observations into a dictionary
        # di['goal_pos_in_world'] = goal_site_pos_in_world
        # di['eef_pos_in_world'] = eef_pos_in_world
        # di['eef_vel_in_world'] = eef_xvelp_in_world
        # di['object_pos_in_world'] = object_pos_in_world
        # di['object_vel_in_world'] = object_xvelp_in_world
        # di["z_angle"] = np.array([z_angle])
        # di['object_quat'] = object_quat

    # def process_object_obs(self, di):
    #     # z_angle = di['z_angle']
    #     # sine_cosine = np.array([np.sin(8*z_angle), np.cos(8*z_angle)]).reshape((2,))

    #     eef_to_object_in_world = di['object_pos_in_world'] - di['eef_pos_in_world']
    #     # eef_to_object_in_eef = self.world2eef(eef_to_object_in_world)

    #     object_to_goal_in_world = di['goal_pos_in_world'] - di['object_pos_in_world']
    #     # object_to_goal_in_eef = self.world2eef(object_to_goal_in_world)

    #     # object_xvelp_in_eef = self.world2eef(di['object_vel_in_world'])
    #     # eef_xvelp_in_eef = self.world2eef(di['eef_vel_in_world'])

        # task_state = np.concatenate([
                                    #  eef_to_object_in_world,
                                    #  object_to_goal_in_world,
                                    #  di['eef_pos_in_world'],
                                    #  di['eef_vel_in_world'],
                                    #  di['object_vel_in_world'],
                                    #  di['object_quat']
                                    # ])
        # di['task_state'] = task_state

    def _get_observation(self):
        """
        Returns an OrderedDict containing observations [(name_string, np.array), ...].

        Important keys:
            gripper_to_object : The x-y component of the gripper to object distance
            object_to_goal : The x-y component of the object-to-goal distance
            object_z_rot : the roation of the object around an axis sticking out the table

            object_xvelp: x-y linear velocity of the object
            gripper_xvelp: x-y linear velocity of the gripper


            task-state : a concatenation of all the above.
        """
        # di = super()._get_observation()  # joint angles & vel, which we don't need.
        di = OrderedDict()

        # camera observations
        if self.use_camera_obs:
            camera_obs = self.sim.render(
                camera_name=self.camera_name,
                width=self.camera_width,
                height=self.camera_height,
                depth=self.camera_depth,
            )
            if self.camera_depth:
                di["image"], di["depth"] = camera_obs
            else:
                di["image"] = camera_obs

        # low-level object information
        if self.use_object_obs:
            eef_pos_in_world = self.sim.data.get_body_xpos("right_hand")
            eef_xvelp_in_world = self.sim.data.get_body_xvelp("right_hand")
            di['eef_pos_in_world'] = eef_pos_in_world  # dim=3
            di['eef_vel_in_world'] = eef_xvelp_in_world  # dim=3
            di['finger_knob_dist'] = self.get_finger2knob_dist_vec()  # dim=3
            di['door_hinge_angle'] = [self.sim.data.get_joint_qpos("hinge0")]  # dim=1

            task_state = np.concatenate([
                                    di['eef_pos_in_world'], 
                                    di['eef_vel_in_world'], 
                                    di['finger_knob_dist'],
                                    di['door_hinge_angle'],
                                ])
            di['task_state'] = task_state
        return di

    def _check_contact(self):
        """
        Returns True if gripper is in contact with an object.
        """
        collision = False
        for contact in self.sim.data.contact[: self.sim.data.ncon]:
            if (
                self.sim.model.geom_id2name(contact.geom1)
                in self.gripper.contact_geoms()
                or self.sim.model.geom_id2name(contact.geom2)
                in self.gripper.contact_geoms()
            ):
                collision = True
                break
        return collision

    def _check_contact_with(self, object):
        """
        Returns True if gripper is in contact with an object.
        """
        collision = False
        for contact in self.sim.data.contact[: self.sim.data.ncon]:
            if (
                    (self.sim.model.geom_id2name(contact.geom1) in self.gripper.contact_geoms()
                     and contact.geom2 == self.sim.model.geom_name2id(object))

                    or (self.sim.model.geom_id2name(contact.geom2) in self.gripper.contact_geoms()
                        and contact.geom1 == self.sim.model.geom_name2id(object))
            ):
                collision = True
                break
        return collision

    def _gripper_visualization(self):
        """
        Do any needed visualization here. Overrides superclass implementations.
        """
        if self.gripper_visualization:
            rgba = np.zeros(4)

            self.sim.model.site_rgba[self.eef_site_id] = rgba


    def get_finger_ori(self):
        finger_rel_quat = self.sim.data.get_body_xquat("rightfinger")
        hand_quat = self.sim.data.get_body_xquat("right_hand")
        finger_world_quat = quat_mul(finger_rel_quat, hand_quat)  # TODO: which one at first?
        return quat2euler(finger_world_quat)

    def get_hand_pos(self):
        return self.sim.data.get_geom_xpos("hand_visual")

    def get_knob_pos(self):
        knob_pos = self.sim.data.get_geom_xpos("center_cabinet_knob")
        return knob_pos

    def get_finger2knob_dist_vec(self):
        return self.get_hand_pos() - self.get_knob_pos()
예제 #8
0
class PandaLift(PandaEnv):
    dof = 8
    """
    This class corresponds to the lifting task for the Panda robot arm.
    """

    parameters_spec = {
        **PandaEnv.parameters_spec,
        'table_size_0': [0.7, 0.9],
        'table_size_1': [0.7, 0.9],
        'table_size_2': [0.7, 0.9],
        'table_friction_0': [0.4, 1.6],
        'table_friction_1': [0.0025, 0.0075],
        'table_friction_2': [0.00005, 0.00015],
        'boxobject_friction_0': [0.4, 1.6],
        # 'boxobject_friction_1': [0.0025, 0.0075],    # fixed this to zero
        'boxobject_friction_2': [0.00005, 0.00015],
        'boxobject_density_1000': [0.6, 1.4],
    }

    def reset_props(self,
                    table_size_0=0.8,
                    table_size_1=0.8,
                    table_size_2=0.8,
                    table_friction_0=1.0,
                    table_friction_1=0.005,
                    table_friction_2=0.0001,
                    boxobject_size_0=0.020,
                    boxobject_size_1=0.020,
                    boxobject_size_2=0.020,
                    boxobject_friction_0=1.0,
                    boxobject_friction_1=0.0,
                    boxobject_friction_2=0.0001,
                    boxobject_density_1000=0.1,
                    **kwargs):

        self.table_full_size = (table_size_0, table_size_1, table_size_2)
        self.table_friction = (table_friction_0, table_friction_1,
                               table_friction_2)
        self.boxobject_size = (boxobject_size_0, boxobject_size_1,
                               boxobject_size_2)
        self.boxobject_friction = (boxobject_friction_0, boxobject_friction_1,
                                   boxobject_friction_2)
        self.boxobject_density = boxobject_density_1000 * 1000.
        super().reset_props(**kwargs)

    def __init__(self,
                 use_object_obs=True,
                 reward_shaping=True,
                 placement_initializer=None,
                 object_obs_process=True,
                 **kwargs):
        """
        Args:

            use_object_obs (bool): if True, include object (cube) information in
                the observation.

            reward_shaping (bool): if True, use dense rewards.

            placement_initializer (ObjectPositionSampler instance): if provided, will
                be used to place objects on every reset, else a UniformRandomSampler
                is used by default.

            object_obs_process (bool): if True, process the object observation to get a task_state.
                Setting this to False is useful when some transformation (eg. noise) need to be done to object observation raw data prior to the processing.
        """

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # reward configuration
        self.reward_shaping = reward_shaping

        # object placement initializer
        if placement_initializer:
            self.placement_initializer = placement_initializer
        else:
            self.placement_initializer = UniformRandomSampler(
                x_range=[-0.03, 0.03],
                y_range=[-0.03, 0.03],
                ensure_object_boundary_in_range=False,
                z_rotation=None,
            )

        # for first initialization
        self.table_full_size = (0.8, 0.8, 0.8)
        self.table_friction = (1.0, 0.005, 0.0001)
        self.boxobject_size = (0.02, 0.02, 0.02)
        self.boxobject_friction = (1.0, 0.005, 0.0001)
        self.boxobject_density = 100.

        self.object_obs_process = object_obs_process

        super().__init__(gripper_visualization=True, **kwargs)

    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()
        self.mujoco_robot.set_base_xpos([0, 0, 0])

        # load model for table top workspace
        self.mujoco_arena = TableArena(table_full_size=self.table_full_size,
                                       table_friction=self.table_friction)
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # The panda robot has a pedestal, we want to align it with the table
        self.mujoco_arena.set_origin(
            [0.16 + self.table_full_size[0] / 2, 0, 0])

        # initialize objects of interest
        # in original robosuite, a simple domain randomization is included in BoxObject implementation, and called here. We choose to discard that implementation.
        cube = FullyFrictionalBoxObject(
            size=self.boxobject_size,
            friction=self.boxobject_friction,
            density=self.boxobject_density,
            rgba=[1, 0, 0, 1],
        )
        self.mujoco_objects = OrderedDict([("cube", cube)])

        # task includes arena, robot, and objects of interest
        self.model = TableTopTask(
            self.mujoco_arena,
            self.mujoco_robot,
            self.mujoco_objects,
            initializer=self.placement_initializer,
        )
        self.model.place_objects()

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flatten array, which is how MuJoCo stores physical simulation data.
        """
        super()._get_reference()
        self.cube_body_id = self.sim.model.body_name2id("cube")
        self.l_finger_geom_ids = [
            self.sim.model.geom_name2id(x)
            for x in self.gripper.left_finger_geoms
        ]
        self.r_finger_geom_ids = [
            self.sim.model.geom_name2id(x)
            for x in self.gripper.right_finger_geoms
        ]
        self.cube_geom_id = self.sim.model.geom_name2id("cube")

    def _reset_internal(self):
        """
        Resets simulation internal configurations.
        """
        super()._reset_internal()

        # reset positions of objects
        self.model.place_objects()

        # reset joint positions
        init_pos = self.mujoco_robot.init_qpos
        init_pos += np.random.randn(init_pos.shape[0]) * 0.02
        self.sim.data.qpos[self._ref_joint_pos_indexes] = np.array(init_pos)

    def reward(self, action=None):
        """
        Reward function for the task.

        The dense reward has three components.

            Reaching: in [0, 1], to encourage the arm to reach the cube
            Grasping: in {0, 0.25}, non-zero if arm is grasping the cube
            Lifting: in {0, 1}, non-zero if arm has lifted the cube

        The sparse reward only consists of the lifting component.

        Args:
            action (np array): unused for this task

        Returns:
            reward (float): the reward
        """
        reward = 0.

        # sparse completion reward
        if self._check_success():
            reward = 1.0

        # use a shaping reward
        if self.reward_shaping:

            # reaching reward
            cube_pos = self.sim.data.body_xpos[self.cube_body_id]
            gripper_site_pos = self.sim.data.site_xpos[self.eef_site_id]
            dist = np.linalg.norm(gripper_site_pos - cube_pos)
            reaching_reward = 1 - np.tanh(10.0 * dist)
            reward += reaching_reward

            # grasping reward
            touch_left_finger = False
            touch_right_finger = False
            for i in range(self.sim.data.ncon):
                c = self.sim.data.contact[i]
                if c.geom1 in self.l_finger_geom_ids and c.geom2 == self.cube_geom_id:
                    touch_left_finger = True
                if c.geom1 == self.cube_geom_id and c.geom2 in self.l_finger_geom_ids:
                    touch_left_finger = True
                if c.geom1 in self.r_finger_geom_ids and c.geom2 == self.cube_geom_id:
                    touch_right_finger = True
                if c.geom1 == self.cube_geom_id and c.geom2 in self.r_finger_geom_ids:
                    touch_right_finger = True
            if touch_left_finger and touch_right_finger:
                reward += 0.25

        return reward

    def put_raw_object_obs(self, di):
        di['cube_pos'] = np.array(self.sim.data.body_xpos[self.cube_body_id])
        di['cube_quat'] = convert_quat(np.array(
            self.sim.data.body_xquat[self.cube_body_id]),
                                       to="xyzw")
        di['gripper_site_pos'] = np.array(
            self.sim.data.site_xpos[self.eef_site_id])

    def process_object_obs(self, di):
        gripper_to_cube = di['gripper_site_pos'] - di['cube_pos']
        task_state = np.concatenate(
            [di['cube_pos'], di['cube_quat'], gripper_to_cube])
        di['task_state'] = task_state

    def _get_observation(self):
        """
        Returns an OrderedDict containing observations [(name_string, np.array), ...].

        Important keys:
            robot-state: contains robot-centric information.
            object-state: requires @self.use_object_obs to be True.
                contains object-centric information.
            image: requires @self.use_camera_obs to be True.
                contains a rendered frame from the simulation.
            depth: requires @self.use_camera_obs and @self.camera_depth to be True.
                contains a rendered depth map from the simulation
        """
        di = super()._get_observation()
        # camera observations
        if self.use_camera_obs:
            camera_obs = self.sim.render(
                camera_name=self.camera_name,
                width=self.camera_width,
                height=self.camera_height,
                depth=self.camera_depth,
            )
            if self.camera_depth:
                di["image"], di["depth"] = camera_obs
            else:
                di["image"] = camera_obs

        # low-level object information
        if self.use_object_obs:
            self.put_raw_object_obs(di)
            if self.object_obs_process:
                self.process_object_obs(di)

        return di

    def _check_contact(self):
        """
        Returns True if gripper is in contact with an object.
        """
        collision = False
        for contact in self.sim.data.contact[:self.sim.data.ncon]:
            if (self.sim.model.geom_id2name(
                    contact.geom1) in self.gripper.contact_geoms()
                    or self.sim.model.geom_id2name(
                        contact.geom2) in self.gripper.contact_geoms()):
                collision = True
                break
        return collision

    def _check_success(self):
        """
        Returns True if task has been completed.
        """
        cube_height = self.sim.data.body_xpos[self.cube_body_id][2]
        table_height = self.table_full_size[2]

        # cube is higher than the table top above a margin
        return cube_height > table_height + 0.04

    def _gripper_visualization(self):
        """
        Do any needed visualization here. Overrides superclass implementations.
        """

        # color the gripper site appropriately based on distance to cube
        if self.gripper_visualization:
            # get distance to cube
            cube_site_id = self.sim.model.site_name2id("cube")
            dist = np.sum(
                np.square(self.sim.data.site_xpos[cube_site_id] -
                          self.sim.data.get_site_xpos("grip_site")))

            # set RGBA for the EEF site here
            max_dist = 0.1
            scaled = (1.0 - min(dist / max_dist, 1.))**15
            rgba = np.zeros(4)
            rgba[0] = 1 - scaled
            rgba[1] = scaled
            rgba[3] = 0.5

            self.sim.model.site_rgba[self.eef_site_id] = rgba
예제 #9
0
class SawyerGrasp(SawyerEnv):
    """
    This class corresponds to the lifting task for the Sawyer robot arm.
    """

    def __init__(
        self,
        gripper_type="TwoFingerGripper",
        table_full_size=(0.8, 0.8, 1.1),
        table_friction=(1., 5e-3, 1e-4),
        use_camera_obs=True,
        use_object_obs=True,
        reward_shaping=False,
        placement_initializer=None,
        gripper_visualization=False,
        use_indicator_object=False,
        has_renderer=False,
        has_offscreen_renderer=True,
        render_collision_mesh=False,
        render_visual_mesh=True,
        control_freq=10,
        horizon=1000,
        ignore_done=False,
        camera_name="frontview",
        camera_height=256,
        camera_width=256,
        camera_depth=False,
        target_object='cube',
    ):
        """
        Args:
            gripper_type (str): type of gripper, used to instantiate
                gripper models from gripper factory.
            table_full_size (3-tuple): x, y, and z dimensions of the table.
            table_friction (3-tuple): the three mujoco friction parameters for
                the table.
            use_camera_obs (bool): if True, every observation includes a
                rendered image.
            use_object_obs (bool): if True, include object (cube) information in
                the observation.
            reward_shaping (bool): if True, use dense rewards.
            placement_initializer (ObjectPositionSampler instance): if provided, will
                be used to place objects on every reset, else a UniformRandomSampler
                is used by default.
            gripper_visualization (bool): True if using gripper visualization.
                Useful for teleoperation.
            use_indicator_object (bool): if True, sets up an indicator object that
                is useful for debugging.
            has_renderer (bool): If true, render the simulation state in
                a viewer instead of headless mode.
            has_offscreen_renderer (bool): True if using off-screen rendering.
            render_collision_mesh (bool): True if rendering collision meshes
                in camera. False otherwise.
            render_visual_mesh (bool): True if rendering visual meshes
                in camera. False otherwise.
            control_freq (float): how many control signals to receive
                in every second. This sets the amount of simulation time
                that passes between every action input.
            horizon (int): Every episode lasts for exactly @horizon timesteps.
            ignore_done (bool): True if never terminating the environment (ignore @horizon).
            camera_name (str): name of camera to be rendered. Must be
                set if @use_camera_obs is True.
            camera_height (int): height of camera frame.
            camera_width (int): width of camera frame.
            camera_depth (bool): True if rendering RGB-D, and RGB otherwise.
        """
        self.target_object = target_object

        # settings for table top
        self.table_full_size = table_full_size
        self.table_friction = table_friction

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # reward configuration
        self.reward_shaping = reward_shaping

        # object placement initializer
        if placement_initializer:
            self.placement_initializer = placement_initializer
        else:
            # self.placement_initializer = UniformRandomSampler(
            #     x_range=[-0.03, 0.03],
            #     y_range=[-0.03, 0.03],
            #     ensure_object_boundary_in_range=False,
            #     z_rotation=True,
            # )
            if self.target_object == 'cube':
                self.placement_initializer = UniformRandomSampler(
                    x_range=[0.08, 0.12],
                    y_range=[0.14, 0.18],
                    ensure_object_boundary_in_range=False,
                    z_rotation=[-np.pi/4, 0], #None for random
                )
            else:
                self.placement_initializer = UniformRandomSampler(
                    x_range=BOX_TEST_OBJECTS[self.target_object]['x_range'],
                    y_range=BOX_TEST_OBJECTS[self.target_object]['y_range'],
                    ensure_object_boundary_in_range=False,
                    z_rotation=BOX_TEST_OBJECTS[self.target_object]['angle'], #None for random
                )

        super().__init__(
            gripper_type=gripper_type,
            gripper_visualization=gripper_visualization,
            use_indicator_object=use_indicator_object,
            has_renderer=has_renderer,
            has_offscreen_renderer=has_offscreen_renderer,
            render_collision_mesh=render_collision_mesh,
            render_visual_mesh=render_visual_mesh,
            control_freq=control_freq,
            horizon=horizon,
            ignore_done=ignore_done,
            use_camera_obs=use_camera_obs,
            camera_name=camera_name,
            camera_height=camera_height,
            camera_width=camera_width,
            camera_depth=camera_depth,
        )

    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()
        self.mujoco_robot.set_base_xpos([0, 0, 0])

        # load model for table top workspace
        self.mujoco_arena = TableArena(
            table_full_size=self.table_full_size, table_friction=self.table_friction
        )
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # The sawyer robot has a pedestal, we want to align it with the table
        self.mujoco_arena.set_origin([0.16 + self.table_full_size[0] / 2, 0, 0])

        # initialize objects of interest
        
        if self.target_object == 'cube':
            cube = BoxObject(
                size_min=[0.05, 0.010, 0.05],  # [0.015, 0.015, 0.015],
                size_max=[0.15, 0.015, 0.08],  # [0.018, 0.018, 0.018])
                rgba=[1, 0, 0, 1],
                friction=20,
                density=50,
            )
        else:
            cube = BoxObject(
                size=BOX_TEST_OBJECTS[self.target_object]['size'],
                rgba=[1, 0, 0, 1],
                friction=20,
                density=50,
            )
        # if self.target_object == 'cylinder':
        #     cube = CylinderObject(
        #         size_min=[0.015, 0.01], 
        #         size_max=[0.025, 0.05],  
        #         rgba=[1, 0, 0, 1],
        #     )
        # elif self.target_object == 'ball':
        #     cube = BallObject(
        #         size_min=[0.01], 
        #         size_max=[0.02],  
        #         rgba=[1, 0, 0, 1],
        #     )

        self.object = cube
        # if self.target_object == 'can':
        #     cube = CanObject()
        # elif self.target_object == 'bottle':
        #     cube = BottleObject()
        # elif self.target_object == 'milk':
        #     cube = MilkObject()
        # elif self.target_object == 'cereal':
        #     cube = CerealObject()
        # elif self.target_object == 'lemon':
        #     cube = LemonObject()

        self.mujoco_objects = OrderedDict([("cube", cube)])

        # task includes arena, robot, and objects of interest
        self.model = TableTopTask(
            self.mujoco_arena,
            self.mujoco_robot,
            self.mujoco_objects,
            initializer=self.placement_initializer,
        )
        self.model.place_objects()

    def _reset_internal(self):
        """
        Sets initial pose of arm and grippers.
        """
        super()._reset_internal()
        self.sim.data.qpos[self._ref_joint_pos_indexes] = np.zeros(self.dof)

        if self.has_gripper:
            self.sim.data.qpos[
                self._ref_gripper_joint_pos_indexes
            ] = self.gripper.init_qpos

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flatten array, which is how MuJoCo stores physical simulation data.
        """
        super()._get_reference()
        self.cube_body_id = self.sim.model.body_name2id("cube")
        self.table_body_id = self.sim.model.body_name2id("table")
        self.l_finger_geom_ids = [
            self.sim.model.geom_name2id(x) for x in self.gripper.left_finger_geoms
        ]
        self.r_finger_geom_ids = [
            self.sim.model.geom_name2id(x) for x in self.gripper.right_finger_geoms
        ]
        self.cube_geom_id = self.sim.model.geom_name2id("cube")

    def _get_target_grasp(self):
        pose = self.pose_in_base_from_name('cube')
        wpos = self._get_observation()['cube_pos']
        height = wpos[2] - self.table_full_size[2]
        pos = pose[:3, 3] + np.array([-0.02, 0.01, GRIPPER_LENGTH - GRIP_CONTACT]) 
        return pos, 0

    # relative to table top and center
    def _get_grasp_gt(self):
        table_top_center, _ = self._get_table_top_center()
        pose = self.pose_in_base_from_name('cube')
        pos = pose[:3, 3]

        pos_gt = pos - table_top_center
        grasp_height = self.object.get_top_offset()[-1]
        if len(self.object.size) == 1: #sphere
            grasp_height = grasp_height / 2

        pos_gt[2] = grasp_height

        rot_gt = pose[:3, :3]
        return pos_gt, rot_gt

    def _gen_grasp_gt(self):
        GRIPPER_LENGTH = 0.15
        GRIPPER_Y_OFFSET = 0.025

        table_top_center, _ = self._get_table_top_center()
        pose = self.pose_in_base_from_name('cube')
        pos = pose[:3, 3]

        pos_gt = pos - table_top_center
        grasp_height = self.object.get_top_offset()[-1]
        if len(self.object.size) == 1: #sphere
            grasp_height = grasp_height / 2

        pos_gt[2] = grasp_height #+ GRIPPER_LENGTH
        # pos_gt[1] += GRIPPER_Y_OFFSET

        cube_length = self.object.size[0]
        # pos_gt[0] += np.random.uniform(low=-cube_length/2, high = cube_length/2)

        rot_gt = pose[:3, :3]
        return pos_gt, rot_gt

    def _get_table_top_center(self):
        pose = self.pose_in_base_from_name('table')
        pos = pose[:3, 3]
        rot = pose[:3, :3]
        pos[2] += self.table_full_size[2]/2
        return pos, rot

    def _get_body_pos(self, name, base=True):
        if base:
            pose = self.pose_in_base_from_name(name)
            body_pos = pose[:3, 3]
            body_quat = pose[:3,:3]
        else:
            body_id = self.sim.model.body_name2id(name)
            body_pos = np.array(self.sim.data.body_xpos[body_id])
            body_quat = convert_quat(
                    np.array(self.sim.data.body_xquat[body_id]), to="xyzw")

        return body_pos, body_quat

    def _get_cam_pos(self, cam_id):
        cam_pos = np.array(self.sim.model.cam_pos[cam_id])
        cam_quat = convert_quat(np.array(self.sim.model.cam_quat[cam_id]), to="xyzw")

        return cam_pos, cam_quat
        
    def _get_cam_K(self, cam_id):
        W = self.camera_width
        H = self.camera_height
        fovy = self.sim.model.cam_fovy[cam_id] 
        f = 0.5 * W / math.tan(fovy * math.pi / 360)
        return f, W/2, H/2 #np.array([[f, 0, W / 2], [0, f, H / 2], [0, 0, 1]])   

    def _set_cam_pos(self, cam_id, cam_pos, cam_quat):
        self.sim.model.cam_pos[cam_id] = cam_pos
        self.sim.model.cam_quat[cam_id] = convert_quat(cam_quat, to="wxyz")

    def _reset_internal(self):
        """
        Resets simulation internal configurations.
        """
        super()._reset_internal()

        # reset positions of objects
        self.model.place_objects()

        # reset joint positions
        init_pos = np.array([-0.5538, -0.8208, 0.4155, 1.8409, -0.4955, 0.6482, 1.9628])
        init_pos += np.random.randn(init_pos.shape[0]) * 0.02
        self.sim.data.qpos[self._ref_joint_pos_indexes] = np.array(init_pos)

    def reward(self, action=None):
        """
        Reward function for the task.
        The dense reward has three components.
            Reaching: in [0, 1], to encourage the arm to reach the cube
            Grasping: in {0, 0.25}, non-zero if arm is grasping the cube
            Lifting: in {0, 1}, non-zero if arm has lifted the cube
        The sparse reward only consists of the lifting component.
        Args:
            action (np array): unused for this task
        Returns:
            reward (float): the reward
        """
        reward = 0.

        # sparse completion reward
        if self._check_success():
            reward = 1.0

        # use a shaping reward
        if self.reward_shaping:

            # reaching reward
            cube_pos = self.sim.data.body_xpos[self.cube_body_id]
            gripper_site_pos = self.sim.data.site_xpos[self.eef_site_id]
            dist = np.linalg.norm(gripper_site_pos - cube_pos)
            reaching_reward = 1 - np.tanh(10.0 * dist)
            reward += reaching_reward

            # grasping reward
            touch_left_finger = False
            touch_right_finger = False
            for i in range(self.sim.data.ncon):
                c = self.sim.data.contact[i]
                if c.geom1 in self.l_finger_geom_ids and c.geom2 == self.cube_geom_id:
                    touch_left_finger = True
                if c.geom1 == self.cube_geom_id and c.geom2 in self.l_finger_geom_ids:
                    touch_left_finger = True
                if c.geom1 in self.r_finger_geom_ids and c.geom2 == self.cube_geom_id:
                    touch_right_finger = True
                if c.geom1 == self.cube_geom_id and c.geom2 in self.r_finger_geom_ids:
                    touch_right_finger = True
            if touch_left_finger and touch_right_finger:
                reward += 0.25

        return reward

    def _get_observation(self):
        """
        Returns an OrderedDict containing observations [(name_string, np.array), ...].
        Important keys:
            robot-state: contains robot-centric information.
            object-state: requires @self.use_object_obs to be True.
                contains object-centric information.
            image: requires @self.use_camera_obs to be True.
                contains a rendered frame from the simulation.
            depth: requires @self.use_camera_obs and @self.camera_depth to be True.
                contains a rendered depth map from the simulation
        """
        di = super()._get_observation()
        # camera observations
        if self.use_camera_obs:
            camera_obs = self.sim.render(
                camera_name=self.camera_name,
                width=self.camera_width,
                height=self.camera_height,
                depth=self.camera_depth,
            )
            if self.camera_depth:
                di["image"], di["depth"] = camera_obs
            else:
                di["image"] = camera_obs

        # low-level object information
        if self.use_object_obs:
            # position and rotation of object
            cube_pos = np.array(self.sim.data.body_xpos[self.cube_body_id])
            cube_quat = convert_quat(
                np.array(self.sim.data.body_xquat[self.cube_body_id]), to="xyzw"
            )
            di["cube_pos"] = cube_pos
            di["cube_quat"] = cube_quat

            gripper_site_pos = np.array(self.sim.data.site_xpos[self.eef_site_id])
            di["gripper_to_cube"] = gripper_site_pos - cube_pos

            di["object-state"] = np.concatenate(
                [cube_pos, cube_quat, di["gripper_to_cube"]]
            )

        return di

    def _check_contact(self):
        """
        Returns True if gripper is in contact with an object.
        """
        collision = False
        for contact in self.sim.data.contact[: self.sim.data.ncon]:
            if (
                self.sim.model.geom_id2name(contact.geom1)
                in self.gripper.contact_geoms()
                or self.sim.model.geom_id2name(contact.geom2)
                in self.gripper.contact_geoms()
            ):
                collision = True
                break
        return collision

    def _check_success(self):
        """
        Returns True if task has been completed.
        """
        cube_height = self.object.get_top_offset()[-1]#self.sim.data.body_xpos[self.cube_body_id][2]
        table_height = self.table_full_size[2]

        # cube is higher than the table top above a margin
        # return cube_height > table_height + 0.04
        return self.sim.data.body_xpos[self.cube_body_id][2] > (cube_height + table_height + 0.05)

    def _gripper_visualization(self):
        """
        Do any needed visualization here. Overrides superclass implementations.
        """

        # color the gripper site appropriately based on distance to cube
        if self.gripper_visualization:
            # get distance to cube
            cube_site_id = self.sim.model.site_name2id("cube")
            dist = np.sum(
                np.square(
                    self.sim.data.site_xpos[cube_site_id]
                    - self.sim.data.get_site_xpos("grip_site")
                )
            )

            # set RGBA for the EEF site here
            max_dist = 0.1
            scaled = (1.0 - min(dist / max_dist, 1.)) ** 15
            rgba = np.zeros(4)
            rgba[0] = 1 - scaled
            rgba[1] = scaled
            rgba[3] = 0.5

            self.sim.model.site_rgba[self.eef_site_id] = rgba