예제 #1
0
    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()
        self.mujoco_robot.set_base_xpos([0, 0, 0])

        # load model for table top workspace
        self.mujoco_arena = TableArena(table_full_size=self.table_full_size,
                                       table_friction=self.table_friction)
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # The panda robot has a pedestal, we want to align it with the table
        self.mujoco_arena.set_origin(
            [0.16 + self.table_full_size[0] / 2, 0, 0])

        # initialize objects of interest
        cube = BoxObject(
            size_min=[0.020, 0.020, 0.020],  # [0.015, 0.015, 0.015],
            size_max=[0.022, 0.022, 0.022],  # [0.018, 0.018, 0.018])
            rgba=[1, 0, 0, 1],
        )
        self.mujoco_objects = OrderedDict([("cube", cube)])

        # task includes arena, robot, and objects of interest
        self.model = TableTopTask(
            self.mujoco_arena,
            self.mujoco_robot,
            self.mujoco_objects,
            initializer=self.placement_initializer,
        )
        self.model.place_objects()
    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()
        self.mujoco_robot.set_base_xpos([0, 0, 0])

        # load model for table top workspace
        self.mujoco_arena = TableArena(table_full_size=self.table_full_size,
                                       table_friction=self.table_friction)
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # The sawyer robot has a pedestal, we want to align it with the table
        self.mujoco_arena.set_origin(
            [0.16 + self.table_full_size[0] / 2, 0, 0])

        self.mujoco_objects = OrderedDict([])

        # task includes arena, robot, and objects of interest
        self.model = TableTopTask(
            self.mujoco_arena,
            self.mujoco_robot,
            self.mujoco_objects,
            initializer=self.placement_initializer,
        )
        self.model.place_objects()
예제 #3
0
    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()
        self.mujoco_robot.set_base_xpos([0, 0, 0])

        # load model for table top workspace
        self.mujoco_arena = TableArena(table_full_size=self.table_full_size,
                                       table_friction=self.table_friction)
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # The panda robot has a pedestal, we want to align it with the table
        self.mujoco_arena.set_origin(
            [0.16 + self.table_full_size[0] / 2, 0, 0])

        # initialize objects of interest
        # in original robosuite, a simple domain randomization is included in BoxObject implementation, and called here. We choose to discard that implementation.
        cube = FullyFrictionalBoxObject(
            size=self.boxobject_size,
            friction=self.boxobject_friction,
            density=self.boxobject_density,
            rgba=[1, 0, 0, 1],
        )
        self.mujoco_objects = OrderedDict([("cube", cube)])

        # task includes arena, robot, and objects of interest
        self.model = TableTopTask(
            self.mujoco_arena,
            self.mujoco_robot,
            self.mujoco_objects,
            initializer=self.placement_initializer,
        )
        self.model.place_objects()
    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()

        # Adjust base pose accordingly
        xpos = self.robots[0].robot_model.base_xpos_offset["table"](
            self.table_full_size[0])
        self.robots[0].robot_model.set_base_xpos(xpos)

        # load model for table top workspace
        mujoco_arena = TableArena(
            table_full_size=self.table_full_size,
            table_friction=self.table_friction,
            table_offset=self.table_offset,
        )

        # Arena always gets set to zero origin
        mujoco_arena.set_origin([0, 0, 0])

        # initialize objects of interest
        self.cube = BoxObject(
            name="cube",
            size_min=[0.020, 0.020, 0.020],  # [0.015, 0.015, 0.015],
            size_max=[0.022, 0.022, 0.022],  # [0.018, 0.018, 0.018])
            rgba=[1, 0, 0, 1],
        )
        self.goal = BoxObject(
            name="goal",
            size_min=[0.005, 0.005, 0.005],
            size_max=[0.005, 0.005, 0.005],
            rgba=[0, 0, 1, 1],
            density=11342,  # Density of lead such that the goal is hard to move
        )

        # Create placement initializer
        if self.placement_initializer is not None:
            self.placement_initializer.reset()
            self.placement_initializer.add_objects([self.cube, self.goal])
        else:
            self.placement_initializer = UniformRandomSampler(
                name="ObjectSampler",
                mujoco_objects=[self.cube, self.goal],
                x_range=[-0.20, 0.20],
                y_range=[-0.20, 0.20],
                rotation=None,
                ensure_object_boundary_in_range=False,
                ensure_valid_placement=True,
                reference_pos=self.table_offset,
                z_offset=0.01,
            )

        # task includes arena, robot, and objects of interest
        self.model = ManipulationTask(
            mujoco_arena=mujoco_arena,
            mujoco_robots=[robot.robot_model for robot in self.robots],
            mujoco_objects=[self.cube, self.goal],
        )
예제 #5
0
    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()

        # Adjust base pose(s) accordingly
        if self.env_configuration == "bimanual":
            xpos = self.robots[0].robot_model.base_xpos_offset["table"](
                self.table_full_size[0])
            self.robots[0].robot_model.set_base_xpos(xpos)
        else:
            if self.env_configuration == "single-arm-opposed":
                # Set up robots facing towards each other by rotating them from their default position
                for robot, rotation in zip(self.robots,
                                           (np.pi / 2, -np.pi / 2)):
                    xpos = robot.robot_model.base_xpos_offset["table"](
                        self.table_full_size[0])
                    rot = np.array((0, 0, rotation))
                    xpos = T.euler2mat(rot) @ np.array(xpos)
                    robot.robot_model.set_base_xpos(xpos)
                    robot.robot_model.set_base_ori(rot)
            else:  # "single-arm-parallel" configuration setting
                # Set up robots parallel to each other but offset from the center
                for robot, offset in zip(self.robots, (-0.25, 0.25)):
                    xpos = robot.robot_model.base_xpos_offset["table"](
                        self.table_full_size[0])
                    xpos = np.array(xpos) + np.array((0, offset, 0))
                    robot.robot_model.set_base_xpos(xpos)

        # load model for table top workspace
        self.mujoco_arena = TableArena(
            table_full_size=self.table_full_size,
            table_friction=self.table_friction,
            table_offset=(0, 0, 0.8),
        )
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # Arena always gets set to zero origin
        self.mujoco_arena.set_origin([0, 0, 0])

        # initialize objects of interest
        self.pot = PotWithHandlesObject(name="pot")
        self.mujoco_objects = OrderedDict([("pot", self.pot)])

        # task includes arena, robot, and objects of interest
        self.model = ManipulationTask(
            mujoco_arena=self.mujoco_arena,
            mujoco_robots=[robot.robot_model for robot in self.robots],
            mujoco_objects=self.mujoco_objects,
            visual_objects=None,
            initializer=self.placement_initializer,
        )
        self.model.place_objects()
예제 #6
0
    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()

        # Verify the correct robot has been loaded
        assert isinstance(self.robots[0], SingleArm), \
            "Error: Expected one single-armed robot! Got {} type instead.".format(type(self.robots[0]))

        # Adjust base pose accordingly
        xpos = self.robots[0].robot_model.base_xpos_offset["table"](
            self.table_full_size[0])
        self.robots[0].robot_model.set_base_xpos(xpos)

        # load model for table top workspace
        self.mujoco_arena = TableArena(
            table_full_size=self.table_full_size,
            table_offset=self.table_offset,
        )
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # Arena always gets set to zero origin
        self.mujoco_arena.set_origin([0, 0, 0])

        # initialize objects of interest
        door = DoorObject(
            name="Door",
            friction=0.0,
            damping=0.1,
            lock=self.use_latch,
            joints=[],  # ensures that door object does not have a free joint
        )
        self.mujoco_objects = OrderedDict([("Door", door)])
        self.n_objects = len(self.mujoco_objects)

        # task includes arena, robot, and objects of interest
        self.model = ManipulationTask(
            mujoco_arena=self.mujoco_arena,
            mujoco_robots=[robot.robot_model for robot in self.robots],
            mujoco_objects=self.mujoco_objects,
            visual_objects=None,
            initializer=self.placement_initializer,
        )
        self.model.place_objects()
예제 #7
0
파일: cube_exp.py 프로젝트: itaicaspi/coach
    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        SingleArmEnv._load_model(self)

        # Adjust base pose accordingly
        xpos = self.robots[0].robot_model.base_xpos_offset["table"](
            self.table_full_size[0])
        self.robots[0].robot_model.set_base_xpos(xpos)

        # load model for table top workspace
        mujoco_arena = TableArena(
            table_full_size=self.table_full_size,
            table_friction=self.table_friction,
            table_offset=self.table_offset,
        )

        # Arena always gets set to zero origin
        mujoco_arena.set_origin([0, 0, 0])

        cube_material = self._get_cube_material()
        self.cube = BoxObject(
            name="cube",
            size_min=(0.025, 0.025, 0.025),
            size_max=(0.025, 0.025, 0.025),
            rgba=[1, 0, 0, 1],
            material=cube_material,
        )

        self.placement_initializer.reset()
        self.placement_initializer.add_objects(self.cube)

        # task includes arena, robot, and objects of interest
        self.model = ManipulationTask(
            mujoco_arena=mujoco_arena,
            mujoco_robots=[robot.robot_model for robot in self.robots],
            mujoco_objects=self.cube,
        )
예제 #8
0
    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()

        # Adjust base pose accordingly
        xpos = self.robots[0].robot_model.base_xpos_offset["table"](self.table_full_size[0])
        self.robots[0].robot_model.set_base_xpos(xpos)

        # load model for table top workspace
        mujoco_arena = TableArena(
            table_full_size=self.table_full_size,
            table_offset=self.table_offset,
        )

        # Arena always gets set to zero origin
        mujoco_arena.set_origin([0, 0, 0])

        # Modify default agentview camera
        mujoco_arena.set_camera(
            camera_name="agentview",
            pos=[0.5986131746834771, -4.392035683362857e-09, 1.5903500240372423],
            quat=[0.6380177736282349, 0.3048497438430786, 0.30484986305236816, 0.6380177736282349]
        )

        # initialize objects of interest
        self.door = DoorObject(
            name="Door",
            friction=0.0,
            damping=0.1,
            lock=self.use_latch,
        )

        # Create placement initializer
        if self.placement_initializer is not None:
            self.placement_initializer.reset()
            self.placement_initializer.add_objects(self.door)
        else:
            self.placement_initializer = UniformRandomSampler(
                    name="ObjectSampler",
                    mujoco_objects=self.door,
                    x_range=[0.07, 0.09],
                    y_range=[-0.01, 0.01],
                    rotation=(-np.pi / 2. - 0.25, -np.pi / 2.),
                    rotation_axis='z',
                    ensure_object_boundary_in_range=False,
                    ensure_valid_placement=True,
                    reference_pos=self.table_offset,
                )

        # task includes arena, robot, and objects of interest
        self.model = ManipulationTask(
            mujoco_arena=mujoco_arena,
            mujoco_robots=[robot.robot_model for robot in self.robots], 
            mujoco_objects=self.door,
        )
예제 #9
0
    def _load_model(self):
        """
        Loads the arena and pot object.
        """
        super()._load_model()
        self.mujoco_robot.set_base_xpos([0, 0, 0])

        # load model for table top workspace
        self.mujoco_arena = TableArena(table_full_size=self.table_full_size,
                                       table_friction=self.table_friction)
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # The sawyer robot has a pedestal, we want to align it with the table
        self.mujoco_arena.set_origin(
            [0.45 + self.table_full_size[0] / 2, 0, 0])

        # task includes arena, robot, and objects of interest
        self.model = TableTopTask(
            self.mujoco_arena,
            self.mujoco_robot,
            self.mujoco_objects,
            self.object_initializer,
        )
예제 #10
0
    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()

        # Adjust base pose accordingly
        xpos = self.robots[0].robot_model.base_xpos_offset["table"](
            self.table_full_size[0])
        self.robots[0].robot_model.set_base_xpos(xpos)

        # load model for table top workspace
        mujoco_arena = TableArena(
            table_full_size=self.table_full_size,
            table_friction=self.table_friction,
            table_offset=self.table_offset,
        )

        # Arena always gets set to zero origin
        mujoco_arena.set_origin([0, 0, 0])

        # Load hole object
        # register object with the corresponding option (objectClass, name, xrange, yrange)
        if self.task_configs['board'] == 'GMC_assembly':
            self.register_object(my_object.GMC_Assembly_Object,
                                 'plate',
                                 xrange=[0, 0],
                                 yrange=[0, 0])
        if self.task_configs['board'] == 'GMC_plate':
            self.register_object(my_object.GMC_Plate_Object,
                                 'plate',
                                 xrange=[0, 0],
                                 yrange=[0, 0])
        if self.task_configs['board'] == 'Square_hole_16mm':
            self.register_object(my_object.square_hole_16mm,
                                 'plate',
                                 xrange=[0, 0],
                                 yrange=[0, 0])

        # Load peg object
        if self.task_configs['peg'] == '16mm':
            self.register_object(my_object.Round_peg_16mm_Object,
                                 'peg',
                                 xrange=[-0.1, -0.13],
                                 yrange=[0.3, 0.33])
        elif self.task_configs['peg'] == '12mm':
            self.register_object(my_object.Round_peg_12mm_Object,
                                 'peg',
                                 xrange=[-0.1, -0.13],
                                 yrange=[0.3, 0.33])
        elif self.task_configs['peg'] == '9mm':
            raise NotImplementedError
        elif self.task_configs['peg'] == 'cylinder_16mm':
            from robosuite.models.objects.primitive import CylinderObject
            self.peg = CylinderObject('peg', size=(0.007, 0.025))
            self.objects_of_interest.append(self.peg)
            self.objectsName_of_interest.append('peg')
            self.objectsXrange_of_interest.append([-0.1, -0.13])
            self.objectsYrange_of_interest.append([0.3, 0.33])

        # Create Sequential Sampler. The order is same as the order of register.
        # Create individual samplers per object
        self.placement_initializer = SequentialCompositeSampler(
            name="ObjectSampler")
        for obj_name, default_xy_range in zip(
                self.objectsName_of_interest,
                zip(self.objectsXrange_of_interest,
                    self.objectsYrange_of_interest)):
            self.placement_initializer.append_sampler(
                sampler=UniformRandomSampler(
                    name=f"{obj_name}Sampler",
                    x_range=default_xy_range[0],
                    y_range=default_xy_range[1],
                    rotation=None,
                    rotation_axis='z',
                    ensure_object_boundary_in_range=True,
                    ensure_valid_placement=True,
                    reference_pos=self.table_offset,
                    z_offset=0.01,
                ))
        # Add objects to the sampler
        for obj_to_put, obj_name in zip(self.objects_of_interest,
                                        self.objectsName_of_interest):
            self.placement_initializer.add_objects_to_sampler(
                sampler_name=f"{obj_name}Sampler", mujoco_objects=obj_to_put)

        if self.task_configs['board'] == 'hole':
            self.plate = PlateWithHoleObject(name='plate')
            plate_obj = self.plate.get_obj()
            plate_obj.set("quat", "0 0 0 1")
            plate_obj.set("pos", "0 0 {}".format(self.table_offset[2]))
            self.objects_of_interest.append(self.plate)

        # task includes arena, robot, and objects of interest
        self.model = ManipulationTask(
            mujoco_arena=mujoco_arena,
            mujoco_robots=[robot.robot_model for robot in self.robots],
            mujoco_objects=self.objects_of_interest,
        )
예제 #11
0
class TwoArmLift(RobotEnv):
    """
    This class corresponds to the lifting task for two robot arms.

    Args:
        robots (str or list of str): Specification for specific robot arm(s) to be instantiated within this env
            (e.g: "Sawyer" would generate one arm; ["Panda", "Panda", "Sawyer"] would generate three robot arms)
            Note: Must be either 2 single single-arm robots or 1 bimanual robot!

        env_configuration (str): Specifies how to position the robots within the environment. Can be either:

            :`'bimanual'`: Only applicable for bimanual robot setups. Sets up the (single) bimanual robot on the -x
                side of the table
            :`'single-arm-parallel'`: Only applicable for multi single arm setups. Sets up the (two) single armed
                robots next to each other on the -x side of the table
            :`'single-arm-opposed'`: Only applicable for multi single arm setups. Sets up the (two) single armed
                robots opposed from each others on the opposite +/-y sides of the table (Default option)

        controller_configs (str or list of dict): If set, contains relevant controller parameters for creating a
            custom controller. Else, uses the default controller for this specific task. Should either be single
            dict if same controller is to be used for all robots or else it should be a list of the same length as
            "robots" param

        gripper_types (str or list of str): type of gripper, used to instantiate
            gripper models from gripper factory. Default is "default", which is the default grippers(s) associated
            with the robot(s) the 'robots' specification. None removes the gripper, and any other (valid) model
            overrides the default gripper. Should either be single str if same gripper type is to be used for all
            robots or else it should be a list of the same length as "robots" param

        gripper_visualizations (bool or list of bool): True if using gripper visualization.
            Useful for teleoperation. Should either be single bool if gripper visualization is to be used for all
            robots or else it should be a list of the same length as "robots" param

        initialization_noise (dict or list of dict): Dict containing the initialization noise parameters.
            The expected keys and corresponding value types are specified below:

            :`'magnitude'`: The scale factor of uni-variate random noise applied to each of a robot's given initial
                joint positions. Setting this value to `None` or 0.0 results in no noise being applied.
                If "gaussian" type of noise is applied then this magnitude scales the standard deviation applied,
                If "uniform" type of noise is applied then this magnitude sets the bounds of the sampling range
            :`'type'`: Type of noise to apply. Can either specify "gaussian" or "uniform"

            Should either be single dict if same noise value is to be used for all robots or else it should be a
            list of the same length as "robots" param

            :Note: Specifying "default" will automatically use the default noise settings.
                Specifying None will automatically create the required dict with "magnitude" set to 0.0.

        table_full_size (3-tuple): x, y, and z dimensions of the table.

        table_friction (3-tuple): the three mujoco friction parameters for
            the table.

        use_camera_obs (bool): if True, every observation includes rendered image(s)

        use_object_obs (bool): if True, include object (cube) information in
            the observation.

        reward_scale (None or float): Scales the normalized reward function by the amount specified.
            If None, environment reward remains unnormalized

        reward_shaping (bool): if True, use dense rewards.

        placement_initializer (ObjectPositionSampler instance): if provided, will
            be used to place objects on every reset, else a UniformRandomSampler
            is used by default.

        use_indicator_object (bool): if True, sets up an indicator object that
            is useful for debugging.

        has_renderer (bool): If true, render the simulation state in
            a viewer instead of headless mode.

        has_offscreen_renderer (bool): True if using off-screen rendering

        render_camera (str): Name of camera to render if `has_renderer` is True. Setting this value to 'None'
            will result in the default angle being applied, which is useful as it can be dragged / panned by
            the user using the mouse

        render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise.

        render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise.

        control_freq (float): how many control signals to receive in every second. This sets the amount of
            simulation time that passes between every action input.

        horizon (int): Every episode lasts for exactly @horizon timesteps.

        ignore_done (bool): True if never terminating the environment (ignore @horizon).

        hard_reset (bool): If True, re-loads model, sim, and render object upon a reset call, else,
            only calls sim.reset and resets all robosuite-internal variables

        camera_names (str or list of str): name of camera to be rendered. Should either be single str if
            same name is to be used for all cameras' rendering or else it should be a list of cameras to render.

            :Note: At least one camera must be specified if @use_camera_obs is True.

            :Note: To render all robots' cameras of a certain type (e.g.: "robotview" or "eye_in_hand"), use the
                convention "all-{name}" (e.g.: "all-robotview") to automatically render all camera images from each
                robot's camera list).

        camera_heights (int or list of int): height of camera frame. Should either be single int if
            same height is to be used for all cameras' frames or else it should be a list of the same length as
            "camera names" param.

        camera_widths (int or list of int): width of camera frame. Should either be single int if
            same width is to be used for all cameras' frames or else it should be a list of the same length as
            "camera names" param.

        camera_depths (bool or list of bool): True if rendering RGB-D, and RGB otherwise. Should either be single
            bool if same depth setting is to be used for all cameras or else it should be a list of the same length as
            "camera names" param.

    Raises:
        ValueError: [Invalid number of robots specified]
        ValueError: [Invalid env configuration]
        ValueError: [Invalid robots for specified env configuration]
    """
    def __init__(
        self,
        robots,
        env_configuration="single-arm-opposed",
        controller_configs=None,
        gripper_types="default",
        gripper_visualizations=False,
        initialization_noise="default",
        table_full_size=(0.8, 0.8, 0.05),
        table_friction=(1., 5e-3, 1e-4),
        use_camera_obs=True,
        use_object_obs=True,
        reward_scale=1.0,
        reward_shaping=False,
        placement_initializer=None,
        use_indicator_object=False,
        has_renderer=False,
        has_offscreen_renderer=True,
        render_camera="frontview",
        render_collision_mesh=False,
        render_visual_mesh=True,
        control_freq=10,
        horizon=1000,
        ignore_done=False,
        hard_reset=True,
        camera_names="agentview",
        camera_heights=256,
        camera_widths=256,
        camera_depths=False,
    ):
        # First, verify that correct number of robots are being inputted
        self.env_configuration = env_configuration
        self._check_robot_configuration(robots)

        # settings for table top
        self.table_full_size = table_full_size
        self.table_friction = table_friction

        # reward configuration
        self.reward_scale = reward_scale
        self.reward_shaping = reward_shaping

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # object placement initializer
        if placement_initializer:
            self.placement_initializer = placement_initializer
        else:
            self.placement_initializer = UniformRandomSampler(
                x_range=[-0.03, 0.03],
                y_range=[-0.03, 0.03],
                ensure_object_boundary_in_range=False,
                rotation=(-np.pi / 3, np.pi / 3),
            )

        super().__init__(
            robots=robots,
            controller_configs=controller_configs,
            gripper_types=gripper_types,
            gripper_visualizations=gripper_visualizations,
            initialization_noise=initialization_noise,
            use_camera_obs=use_camera_obs,
            use_indicator_object=use_indicator_object,
            has_renderer=has_renderer,
            has_offscreen_renderer=has_offscreen_renderer,
            render_camera=render_camera,
            render_collision_mesh=render_collision_mesh,
            render_visual_mesh=render_visual_mesh,
            control_freq=control_freq,
            horizon=horizon,
            ignore_done=ignore_done,
            hard_reset=hard_reset,
            camera_names=camera_names,
            camera_heights=camera_heights,
            camera_widths=camera_widths,
            camera_depths=camera_depths,
        )

    def reward(self, action=None):
        """
        Reward function for the task.

        Sparse un-normalized reward:

            - a discrete reward of 3.0 is provided if the pot is lifted and is parallel within 30 deg to the table

        Un-normalized summed components if using reward shaping:

            - Reaching: in [0, 0.5], per-arm component that is proportional to the distance between each arm and its
              respective pot handle, and exactly 0.5 when grasping the handle
              - Note that the agent only gets the lifting reward when flipping no more than 30 degrees.
            - Grasping: in {0, 0.25}, binary per-arm component awarded if the gripper is grasping its correct handle
            - Lifting: in [0, 1.5], proportional to the pot's height above the table, and capped at a certain threshold

        Note that the final reward is normalized and scaled by reward_scale / 3.0 as
        well so that the max score is equal to reward_scale

        Args:
            action (np array): [NOT USED]

        Returns:
            float: reward value
        """
        reward = 0

        # check if the pot is tilted more than 30 degrees
        mat = T.quat2mat(self._pot_quat)
        z_unit = [0, 0, 1]
        z_rotated = np.matmul(mat, z_unit)
        cos_z = np.dot(z_unit, z_rotated)
        cos_30 = np.cos(np.pi / 6)
        direction_coef = 1 if cos_z >= cos_30 else 0

        # check for goal completion: cube is higher than the table top above a margin
        if self._check_success():
            reward = 3.0 * direction_coef

        # use a shaping reward
        elif self.reward_shaping:
            # lifting reward
            pot_bottom_height = self.sim.data.site_xpos[
                self.pot_center_id][2] - self.pot.get_top_offset()[2]
            table_height = self.sim.data.site_xpos[self.table_top_id][2]
            elevation = pot_bottom_height - table_height
            r_lift = min(max(elevation - 0.05, 0), 0.15)
            reward += 10. * direction_coef * r_lift

            _gripper_0_to_handle = self._gripper_0_to_handle
            _gripper_1_to_handle = self._gripper_1_to_handle

            # gh stands for gripper-handle
            # When grippers are far away, tell them to be closer

            # Single bimanual robot setting
            if self.env_configuration == "bimanual":
                _contacts_0_lf = len(
                    list(
                        self.find_contacts(
                            self.robots[0].gripper["left"].
                            important_geoms["left_finger"],
                            self.pot.handle_2_geoms()))) > 0
                _contacts_0_rf = len(
                    list(
                        self.find_contacts(
                            self.robots[0].gripper["left"].
                            important_geoms["right_finger"],
                            self.pot.handle_2_geoms()))) > 0
                _contacts_1_lf = len(
                    list(
                        self.find_contacts(
                            self.robots[0].gripper["right"].
                            important_geoms["left_finger"],
                            self.pot.handle_1_geoms()))) > 0
                _contacts_1_rf = len(
                    list(
                        self.find_contacts(
                            self.robots[0].gripper["right"].
                            important_geoms["right_finger"],
                            self.pot.handle_1_geoms()))) > 0
            # Multi single arm setting
            else:
                _contacts_0_lf = len(
                    list(
                        self.find_contacts(
                            self.robots[0].gripper.
                            important_geoms["left_finger"],
                            self.pot.handle_2_geoms()))) > 0
                _contacts_0_rf = len(
                    list(
                        self.find_contacts(
                            self.robots[0].gripper.
                            important_geoms["right_finger"],
                            self.pot.handle_2_geoms()))) > 0
                _contacts_1_lf = len(
                    list(
                        self.find_contacts(
                            self.robots[1].gripper.
                            important_geoms["left_finger"],
                            self.pot.handle_1_geoms()))) > 0
                _contacts_1_rf = len(
                    list(
                        self.find_contacts(
                            self.robots[1].gripper.
                            important_geoms["right_finger"],
                            self.pot.handle_1_geoms()))) > 0
            _g0h_dist = np.linalg.norm(_gripper_0_to_handle)
            _g1h_dist = np.linalg.norm(_gripper_1_to_handle)

            # Grasping reward
            if _contacts_0_lf and _contacts_0_rf:
                reward += 0.25
            # Reaching reward
            reward += 0.5 * (1 - np.tanh(10.0 * _g0h_dist))

            # Grasping reward
            if _contacts_1_lf and _contacts_1_rf:
                reward += 0.25
            # Reaching reward
            reward += 0.5 * (1 - np.tanh(10.0 * _g1h_dist))

        if self.reward_scale is not None:
            reward *= self.reward_scale / 3.0

        return reward

    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()

        # Adjust base pose(s) accordingly
        if self.env_configuration == "bimanual":
            xpos = self.robots[0].robot_model.base_xpos_offset["table"](
                self.table_full_size[0])
            self.robots[0].robot_model.set_base_xpos(xpos)
        else:
            if self.env_configuration == "single-arm-opposed":
                # Set up robots facing towards each other by rotating them from their default position
                for robot, rotation in zip(self.robots,
                                           (np.pi / 2, -np.pi / 2)):
                    xpos = robot.robot_model.base_xpos_offset["table"](
                        self.table_full_size[0])
                    rot = np.array((0, 0, rotation))
                    xpos = T.euler2mat(rot) @ np.array(xpos)
                    robot.robot_model.set_base_xpos(xpos)
                    robot.robot_model.set_base_ori(rot)
            else:  # "single-arm-parallel" configuration setting
                # Set up robots parallel to each other but offset from the center
                for robot, offset in zip(self.robots, (-0.25, 0.25)):
                    xpos = robot.robot_model.base_xpos_offset["table"](
                        self.table_full_size[0])
                    xpos = np.array(xpos) + np.array((0, offset, 0))
                    robot.robot_model.set_base_xpos(xpos)

        # load model for table top workspace
        self.mujoco_arena = TableArena(
            table_full_size=self.table_full_size,
            table_friction=self.table_friction,
            table_offset=(0, 0, 0.8),
        )
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # Arena always gets set to zero origin
        self.mujoco_arena.set_origin([0, 0, 0])

        # initialize objects of interest
        self.pot = PotWithHandlesObject(name="pot")
        self.mujoco_objects = OrderedDict([("pot", self.pot)])

        # task includes arena, robot, and objects of interest
        self.model = ManipulationTask(
            mujoco_arena=self.mujoco_arena,
            mujoco_robots=[robot.robot_model for robot in self.robots],
            mujoco_objects=self.mujoco_objects,
            visual_objects=None,
            initializer=self.placement_initializer,
        )
        self.model.place_objects()

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flatten array, which is how MuJoCo stores physical simulation data.
        """
        super()._get_reference()

        # Additional object references from this env
        self.pot_body_id = self.sim.model.body_name2id("pot")
        self.handle_1_site_id = self.sim.model.site_name2id("pot_handle_1")
        self.handle_0_site_id = self.sim.model.site_name2id("pot_handle_2")
        self.table_top_id = self.sim.model.site_name2id("table_top")
        self.pot_center_id = self.sim.model.site_name2id("pot_center")

    def _reset_internal(self):
        """
        Resets simulation internal configurations.
        """
        super()._reset_internal()

        # Reset all object positions using initializer sampler if we're not directly loading from an xml
        if not self.deterministic_reset:

            # Sample from the placement initializer for all objects
            obj_pos, obj_quat = self.model.place_objects()

            # Loop through all objects and reset their positions
            for i, (obj_name, _) in enumerate(self.mujoco_objects.items()):
                self.sim.data.set_joint_qpos(
                    obj_name + "_jnt0",
                    np.concatenate(
                        [np.array(obj_pos[i]),
                         np.array(obj_quat[i])]))

    def _get_observation(self):
        """
        Returns an OrderedDict containing observations [(name_string, np.array), ...].

        Important keys:

            `'robot-state'`: contains robot-centric information.

            `'object-state'`: requires @self.use_object_obs to be True. Contains object-centric information.

            `'image'`: requires @self.use_camera_obs to be True. Contains a rendered frame from the simulation.

            `'depth'`: requires @self.use_camera_obs and @self.camera_depth to be True.
            Contains a rendered depth map from the simulation

        Returns:
            OrderedDict: Observations from the environment
        """
        di = super()._get_observation()

        # low-level object information
        if self.use_object_obs:
            # Get robot prefix
            if self.env_configuration == "bimanual":
                pr0 = self.robots[0].robot_model.naming_prefix + "left_"
                pr1 = self.robots[0].robot_model.naming_prefix + "right_"
            else:
                pr0 = self.robots[0].robot_model.naming_prefix
                pr1 = self.robots[1].robot_model.naming_prefix

            # position and rotation of object
            cube_pos = np.array(self.sim.data.body_xpos[self.pot_body_id])
            cube_quat = T.convert_quat(
                self.sim.data.body_xquat[self.pot_body_id], to="xyzw")
            di["cube_pos"] = cube_pos
            di["cube_quat"] = cube_quat

            di[pr0 + "eef_xpos"] = self._eef0_xpos
            di[pr1 + "eef_xpos"] = self._eef1_xpos
            di["handle_0_xpos"] = np.array(self._handle_0_xpos)
            di["handle_1_xpos"] = np.array(self._handle_1_xpos)
            di[pr0 + "gripper_to_handle"] = np.array(self._gripper_0_to_handle)
            di[pr1 + "gripper_to_handle"] = np.array(self._gripper_1_to_handle)

            di["object-state"] = np.concatenate([
                di["cube_pos"],
                di["cube_quat"],
                di[pr0 + "eef_xpos"],
                di[pr1 + "eef_xpos"],
                di["handle_0_xpos"],
                di["handle_1_xpos"],
                di[pr0 + "gripper_to_handle"],
                di[pr1 + "gripper_to_handle"],
            ])

        return di

    def _check_success(self):
        """
        Check if pot is successfully lifted

        Returns:
            bool: True if pot is lifted
        """
        pot_bottom_height = self.sim.data.site_xpos[
            self.pot_center_id][2] - self.pot.get_top_offset()[2]
        table_height = self.sim.data.site_xpos[self.table_top_id][2]

        # cube is higher than the table top above a margin
        return pot_bottom_height > table_height + 0.10

    def _check_robot_configuration(self, robots):
        """
        Sanity check to make sure the inputted robots and configuration is acceptable

        Args:
            robots (str or list of str): Robots to instantiate within this env
        """
        robots = robots if type(robots) == list or type(robots) == tuple else [
            robots
        ]
        if self.env_configuration == "single-arm-opposed" or self.env_configuration == "single-arm-parallel":
            # Specifically two robots should be inputted!
            is_bimanual = False
            if type(robots) is not list or len(robots) != 2:
                raise ValueError(
                    "Error: Exactly two single-armed robots should be inputted "
                    "for this task configuration!")
        elif self.env_configuration == "bimanual":
            is_bimanual = True
            # Specifically one robot should be inputted!
            if type(robots) is list and len(robots) != 1:
                raise ValueError(
                    "Error: Exactly one bimanual robot should be inputted "
                    "for this task configuration!")
        else:
            # This is an unknown env configuration, print error
            raise ValueError(
                "Error: Unknown environment configuration received. Only 'bimanual',"
                "'single-arm-parallel', and 'single-arm-opposed' are supported. Got: {}"
                .format(self.env_configuration))

        # Lastly, check to make sure all inputted robot names are of their correct type (bimanual / not bimanual)
        for robot in robots:
            if check_type(robot, RobotType.bimanual) != is_bimanual:
                raise ValueError(
                    "Error: For {} configuration, expected bimanual check to return {}; "
                    "instead, got {}.".format(
                        self.env_configuration, is_bimanual,
                        check_type(robot, RobotType.bimanual)))

    @property
    def _handle_0_xpos(self):
        """
        Grab the position of the left (blue) hammer handle.

        Returns:
            np.array: (x,y,z) position of handle
        """
        return self.sim.data.site_xpos[self.handle_0_site_id]

    @property
    def _handle_1_xpos(self):
        """
        Grab the position of the right (green) hammer handle.

        Returns:
            np.array: (x,y,z) position of handle
        """
        return self.sim.data.site_xpos[self.handle_1_site_id]

    @property
    def _pot_quat(self):
        """
        Grab the orientation of the pot body.

        Returns:
            np.array: (x,y,z,w) quaternion of the pot body
        """
        return T.convert_quat(self.sim.data.body_xquat[self.pot_body_id],
                              to="xyzw")

    @property
    def _world_quat(self):
        """
        Grab the world orientation

        Returns:
            np.array: (x,y,z,w) world quaternion
        """
        return T.convert_quat(np.array([1, 0, 0, 0]), to="xyzw")

    @property
    def _eef0_xpos(self):
        """
        Grab the position of Robot 0's end effector.

        Returns:
            np.array: (x,y,z) position of EEF0
        """
        if self.env_configuration == "bimanual":
            return np.array(
                self.sim.data.site_xpos[self.robots[0].eef_site_id["left"]])
        else:
            return np.array(
                self.sim.data.site_xpos[self.robots[0].eef_site_id])

    @property
    def _eef1_xpos(self):
        """
        Grab the position of Robot 1's end effector.

        Returns:
            np.array: (x,y,z) position of EEF1
        """
        if self.env_configuration == "bimanual":
            return np.array(
                self.sim.data.site_xpos[self.robots[0].eef_site_id["right"]])
        else:
            return np.array(
                self.sim.data.site_xpos[self.robots[1].eef_site_id])

    @property
    def _gripper_0_to_handle(self):
        """
        Calculate vector from the left gripper to the left pot handle.

        Returns:
            np.array: (dx,dy,dz) distance vector between handle and EEF0
        """
        return self._handle_0_xpos - self._eef0_xpos

    @property
    def _gripper_1_to_handle(self):
        """
        Calculate vector from the right gripper to the right pot handle.

        Returns:
            np.array: (dx,dy,dz) distance vector between handle and EEF0
        """
        return self._handle_1_xpos - self._eef1_xpos
예제 #12
0
class PandaLift(PandaEnv):
    dof = 8
    """
    This class corresponds to the lifting task for the Panda robot arm.
    """

    parameters_spec = {
        **PandaEnv.parameters_spec,
        'table_size_0': [0.7, 0.9],
        'table_size_1': [0.7, 0.9],
        'table_size_2': [0.7, 0.9],
        'table_friction_0': [0.4, 1.6],
        'table_friction_1': [0.0025, 0.0075],
        'table_friction_2': [0.00005, 0.00015],
        'boxobject_friction_0': [0.4, 1.6],
        # 'boxobject_friction_1': [0.0025, 0.0075],    # fixed this to zero
        'boxobject_friction_2': [0.00005, 0.00015],
        'boxobject_density_1000': [0.6, 1.4],
    }

    def reset_props(self,
                    table_size_0=0.8,
                    table_size_1=0.8,
                    table_size_2=0.8,
                    table_friction_0=1.0,
                    table_friction_1=0.005,
                    table_friction_2=0.0001,
                    boxobject_size_0=0.020,
                    boxobject_size_1=0.020,
                    boxobject_size_2=0.020,
                    boxobject_friction_0=1.0,
                    boxobject_friction_1=0.0,
                    boxobject_friction_2=0.0001,
                    boxobject_density_1000=0.1,
                    **kwargs):

        self.table_full_size = (table_size_0, table_size_1, table_size_2)
        self.table_friction = (table_friction_0, table_friction_1,
                               table_friction_2)
        self.boxobject_size = (boxobject_size_0, boxobject_size_1,
                               boxobject_size_2)
        self.boxobject_friction = (boxobject_friction_0, boxobject_friction_1,
                                   boxobject_friction_2)
        self.boxobject_density = boxobject_density_1000 * 1000.
        super().reset_props(**kwargs)

    def __init__(self,
                 use_object_obs=True,
                 reward_shaping=True,
                 placement_initializer=None,
                 object_obs_process=True,
                 **kwargs):
        """
        Args:

            use_object_obs (bool): if True, include object (cube) information in
                the observation.

            reward_shaping (bool): if True, use dense rewards.

            placement_initializer (ObjectPositionSampler instance): if provided, will
                be used to place objects on every reset, else a UniformRandomSampler
                is used by default.

            object_obs_process (bool): if True, process the object observation to get a task_state.
                Setting this to False is useful when some transformation (eg. noise) need to be done to object observation raw data prior to the processing.
        """

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # reward configuration
        self.reward_shaping = reward_shaping

        # object placement initializer
        if placement_initializer:
            self.placement_initializer = placement_initializer
        else:
            self.placement_initializer = UniformRandomSampler(
                x_range=[-0.03, 0.03],
                y_range=[-0.03, 0.03],
                ensure_object_boundary_in_range=False,
                z_rotation=None,
            )

        # for first initialization
        self.table_full_size = (0.8, 0.8, 0.8)
        self.table_friction = (1.0, 0.005, 0.0001)
        self.boxobject_size = (0.02, 0.02, 0.02)
        self.boxobject_friction = (1.0, 0.005, 0.0001)
        self.boxobject_density = 100.

        self.object_obs_process = object_obs_process

        super().__init__(gripper_visualization=True, **kwargs)

    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()
        self.mujoco_robot.set_base_xpos([0, 0, 0])

        # load model for table top workspace
        self.mujoco_arena = TableArena(table_full_size=self.table_full_size,
                                       table_friction=self.table_friction)
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # The panda robot has a pedestal, we want to align it with the table
        self.mujoco_arena.set_origin(
            [0.16 + self.table_full_size[0] / 2, 0, 0])

        # initialize objects of interest
        # in original robosuite, a simple domain randomization is included in BoxObject implementation, and called here. We choose to discard that implementation.
        cube = FullyFrictionalBoxObject(
            size=self.boxobject_size,
            friction=self.boxobject_friction,
            density=self.boxobject_density,
            rgba=[1, 0, 0, 1],
        )
        self.mujoco_objects = OrderedDict([("cube", cube)])

        # task includes arena, robot, and objects of interest
        self.model = TableTopTask(
            self.mujoco_arena,
            self.mujoco_robot,
            self.mujoco_objects,
            initializer=self.placement_initializer,
        )
        self.model.place_objects()

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flatten array, which is how MuJoCo stores physical simulation data.
        """
        super()._get_reference()
        self.cube_body_id = self.sim.model.body_name2id("cube")
        self.l_finger_geom_ids = [
            self.sim.model.geom_name2id(x)
            for x in self.gripper.left_finger_geoms
        ]
        self.r_finger_geom_ids = [
            self.sim.model.geom_name2id(x)
            for x in self.gripper.right_finger_geoms
        ]
        self.cube_geom_id = self.sim.model.geom_name2id("cube")

    def _reset_internal(self):
        """
        Resets simulation internal configurations.
        """
        super()._reset_internal()

        # reset positions of objects
        self.model.place_objects()

        # reset joint positions
        init_pos = self.mujoco_robot.init_qpos
        init_pos += np.random.randn(init_pos.shape[0]) * 0.02
        self.sim.data.qpos[self._ref_joint_pos_indexes] = np.array(init_pos)

    def reward(self, action=None):
        """
        Reward function for the task.

        The dense reward has three components.

            Reaching: in [0, 1], to encourage the arm to reach the cube
            Grasping: in {0, 0.25}, non-zero if arm is grasping the cube
            Lifting: in {0, 1}, non-zero if arm has lifted the cube

        The sparse reward only consists of the lifting component.

        Args:
            action (np array): unused for this task

        Returns:
            reward (float): the reward
        """
        reward = 0.

        # sparse completion reward
        if self._check_success():
            reward = 1.0

        # use a shaping reward
        if self.reward_shaping:

            # reaching reward
            cube_pos = self.sim.data.body_xpos[self.cube_body_id]
            gripper_site_pos = self.sim.data.site_xpos[self.eef_site_id]
            dist = np.linalg.norm(gripper_site_pos - cube_pos)
            reaching_reward = 1 - np.tanh(10.0 * dist)
            reward += reaching_reward

            # grasping reward
            touch_left_finger = False
            touch_right_finger = False
            for i in range(self.sim.data.ncon):
                c = self.sim.data.contact[i]
                if c.geom1 in self.l_finger_geom_ids and c.geom2 == self.cube_geom_id:
                    touch_left_finger = True
                if c.geom1 == self.cube_geom_id and c.geom2 in self.l_finger_geom_ids:
                    touch_left_finger = True
                if c.geom1 in self.r_finger_geom_ids and c.geom2 == self.cube_geom_id:
                    touch_right_finger = True
                if c.geom1 == self.cube_geom_id and c.geom2 in self.r_finger_geom_ids:
                    touch_right_finger = True
            if touch_left_finger and touch_right_finger:
                reward += 0.25

        return reward

    def put_raw_object_obs(self, di):
        di['cube_pos'] = np.array(self.sim.data.body_xpos[self.cube_body_id])
        di['cube_quat'] = convert_quat(np.array(
            self.sim.data.body_xquat[self.cube_body_id]),
                                       to="xyzw")
        di['gripper_site_pos'] = np.array(
            self.sim.data.site_xpos[self.eef_site_id])

    def process_object_obs(self, di):
        gripper_to_cube = di['gripper_site_pos'] - di['cube_pos']
        task_state = np.concatenate(
            [di['cube_pos'], di['cube_quat'], gripper_to_cube])
        di['task_state'] = task_state

    def _get_observation(self):
        """
        Returns an OrderedDict containing observations [(name_string, np.array), ...].

        Important keys:
            robot-state: contains robot-centric information.
            object-state: requires @self.use_object_obs to be True.
                contains object-centric information.
            image: requires @self.use_camera_obs to be True.
                contains a rendered frame from the simulation.
            depth: requires @self.use_camera_obs and @self.camera_depth to be True.
                contains a rendered depth map from the simulation
        """
        di = super()._get_observation()
        # camera observations
        if self.use_camera_obs:
            camera_obs = self.sim.render(
                camera_name=self.camera_name,
                width=self.camera_width,
                height=self.camera_height,
                depth=self.camera_depth,
            )
            if self.camera_depth:
                di["image"], di["depth"] = camera_obs
            else:
                di["image"] = camera_obs

        # low-level object information
        if self.use_object_obs:
            self.put_raw_object_obs(di)
            if self.object_obs_process:
                self.process_object_obs(di)

        return di

    def _check_contact(self):
        """
        Returns True if gripper is in contact with an object.
        """
        collision = False
        for contact in self.sim.data.contact[:self.sim.data.ncon]:
            if (self.sim.model.geom_id2name(
                    contact.geom1) in self.gripper.contact_geoms()
                    or self.sim.model.geom_id2name(
                        contact.geom2) in self.gripper.contact_geoms()):
                collision = True
                break
        return collision

    def _check_success(self):
        """
        Returns True if task has been completed.
        """
        cube_height = self.sim.data.body_xpos[self.cube_body_id][2]
        table_height = self.table_full_size[2]

        # cube is higher than the table top above a margin
        return cube_height > table_height + 0.04

    def _gripper_visualization(self):
        """
        Do any needed visualization here. Overrides superclass implementations.
        """

        # color the gripper site appropriately based on distance to cube
        if self.gripper_visualization:
            # get distance to cube
            cube_site_id = self.sim.model.site_name2id("cube")
            dist = np.sum(
                np.square(self.sim.data.site_xpos[cube_site_id] -
                          self.sim.data.get_site_xpos("grip_site")))

            # set RGBA for the EEF site here
            max_dist = 0.1
            scaled = (1.0 - min(dist / max_dist, 1.))**15
            rgba = np.zeros(4)
            rgba[0] = 1 - scaled
            rgba[1] = scaled
            rgba[3] = 0.5

            self.sim.model.site_rgba[self.eef_site_id] = rgba
예제 #13
0
파일: stack.py 프로젝트: NagisaZj/robosuite
    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()

        # Adjust base pose accordingly
        xpos = self.robots[0].robot_model.base_xpos_offset["table"](
            self.table_full_size[0])
        self.robots[0].robot_model.set_base_xpos(xpos)

        # load model for table top workspace
        mujoco_arena = TableArena(
            table_full_size=self.table_full_size,
            table_friction=self.table_friction,
            table_offset=self.table_offset,
        )

        # Arena always gets set to zero origin
        mujoco_arena.set_origin([0, 0, 0])

        # initialize objects of interest
        tex_attrib = {
            "type": "cube",
        }
        mat_attrib = {
            "texrepeat": "1 1",
            "specular": "0.4",
            "shininess": "0.1",
        }
        redwood = CustomMaterial(
            texture="WoodRed",
            tex_name="redwood",
            mat_name="redwood_mat",
            tex_attrib=tex_attrib,
            mat_attrib=mat_attrib,
        )
        greenwood = CustomMaterial(
            texture="WoodGreen",
            tex_name="greenwood",
            mat_name="greenwood_mat",
            tex_attrib=tex_attrib,
            mat_attrib=mat_attrib,
        )
        self.cubeA = BoxObject(
            name="cubeA",
            size_min=[0.02, 0.02, 0.02],
            size_max=[0.02, 0.02, 0.02],
            rgba=[1, 0, 0, 1],
            material=redwood,
        )
        self.cubeB = BoxObject(
            name="cubeB",
            size_min=[0.025, 0.025, 0.025],
            size_max=[0.025, 0.025, 0.025],
            rgba=[0, 1, 0, 1],
            material=greenwood,
        )
        cubes = [self.cubeA, self.cubeB]
        # Create placement initializer
        if self.placement_initializer is not None:
            self.placement_initializer.reset()
            self.placement_initializer.add_objects(cubes)
        else:
            self.placement_initializer = UniformRandomSampler(
                name="ObjectSampler",
                mujoco_objects=cubes,
                x_range=[-0.08, 0.08],
                y_range=[-0.08, 0.08],
                rotation=None,
                ensure_object_boundary_in_range=False,
                ensure_valid_placement=True,
                reference_pos=self.table_offset,
                z_offset=0.01,
            )

        # task includes arena, robot, and objects of interest
        self.model = ManipulationTask(
            mujoco_arena=mujoco_arena,
            mujoco_robots=[robot.robot_model for robot in self.robots],
            mujoco_objects=cubes,
        )
예제 #14
0
class BaxterLift(BaxterEnv):
    """
    This class corresponds to the bimanual lifting task for the Baxter robot.
    """
    def __init__(self,
                 gripper_type_right="TwoFingerGripper",
                 gripper_type_left="LeftTwoFingerGripper",
                 table_full_size=(0.8, 0.8, 0.8),
                 table_friction=(1., 5e-3, 1e-4),
                 use_object_obs=True,
                 reward_shaping=True,
                 **kwargs):
        """
        Args:

            gripper_type_right (str): type of gripper used on the right hand.

            gripper_type_lefft (str): type of gripper used on the right hand.

            table_full_size (3-tuple): x, y, and z dimensions of the table.

            table_friction (3-tuple): the three mujoco friction parameters for
                the table.

            use_object_obs (bool): if True, include object (pot) information in
                the observation.

            reward_shaping (bool): if True, use dense rewards.

        Inherits the Baxter environment; refer to other parameters described there.
        """

        # initialize the pot
        self.pot = PotWithHandlesObject()
        self.mujoco_objects = OrderedDict([("pot", self.pot)])

        # settings for table top
        self.table_full_size = table_full_size
        self.table_friction = table_friction

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # reward configuration
        self.reward_shaping = reward_shaping

        self.object_initializer = UniformRandomSampler(
            x_range=(-0.15, -0.04),
            y_range=(-0.015, 0.015),
            z_rotation=(-0.15 * np.pi, 0.15 * np.pi),
            ensure_object_boundary_in_range=False,
        )

        super().__init__(gripper_left=gripper_type_left,
                         gripper_right=gripper_type_right,
                         **kwargs)

    def _load_model(self):
        """
        Loads the arena and pot object.
        """
        super()._load_model()
        self.mujoco_robot.set_base_xpos([0, 0, 0])

        # load model for table top workspace
        self.mujoco_arena = TableArena(table_full_size=self.table_full_size,
                                       table_friction=self.table_friction)
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # The sawyer robot has a pedestal, we want to align it with the table
        self.mujoco_arena.set_origin(
            [0.45 + self.table_full_size[0] / 2, 0, 0])

        # task includes arena, robot, and objects of interest
        self.model = TableTopTask(
            self.mujoco_arena,
            self.mujoco_robot,
            self.mujoco_objects,
            self.object_initializer,
        )
        self.model.place_objects()

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flattened array, which is how MuJoCo stores physical simulation data.
        """
        super()._get_reference()
        self.cube_body_id = self.sim.model.body_name2id("pot")
        self.handle_1_site_id = self.sim.model.site_name2id("pot_handle_1")
        self.handle_2_site_id = self.sim.model.site_name2id("pot_handle_2")
        self.table_top_id = self.sim.model.site_name2id("table_top")
        self.pot_center_id = self.sim.model.site_name2id("pot_center")

    def _reset_internal(self):
        """
        Resets simulation internal configurations.
        """
        super()._reset_internal()

        self.model.place_objects()

    def reward(self, action):
        """
        Reward function for the task.

          1. the agent only gets the lifting reward when flipping no more than 30 degrees.
          2. the lifting reward is smoothed and ranged from 0 to 2, capped at 2.0.
             the initial lifting reward is 0 when the pot is on the table;
             the agent gets the maximum 2.0 reward when the pot’s height is above a threshold.
          3. the reaching reward is 0.5 when the left gripper touches the left handle,
             or when the right gripper touches the right handle before the gripper geom
             touches the handle geom, and once it touches we use 0.5
        """
        reward = 0

        cube_height = self.sim.data.site_xpos[
            self.pot_center_id][2] - self.pot.get_top_offset()[2]
        table_height = self.sim.data.site_xpos[self.table_top_id][2]

        # check if the pot is tilted more than 30 degrees
        mat = T.quat2mat(self._pot_quat)
        z_unit = [0, 0, 1]
        z_rotated = np.matmul(mat, z_unit)
        cos_z = np.dot(z_unit, z_rotated)
        cos_30 = np.cos(np.pi / 6)
        direction_coef = 1 if cos_z >= cos_30 else 0

        # cube is higher than the table top above a margin
        if cube_height > table_height + 0.15:
            reward = 1.0 * direction_coef

        # use a shaping reward
        if self.reward_shaping:
            reward = 0

            # lifting reward
            elevation = cube_height - table_height
            r_lift = min(max(elevation - 0.05, 0), 0.2)
            reward += 10. * direction_coef * r_lift

            l_gripper_to_handle = self._l_gripper_to_handle
            r_gripper_to_handle = self._r_gripper_to_handle

            # gh stands for gripper-handle
            # When grippers are far away, tell them to be closer
            l_contacts = list(
                self.find_contacts(self.gripper_left.contact_geoms(),
                                   self.pot.handle_1_geoms()))
            r_contacts = list(
                self.find_contacts(self.gripper_right.contact_geoms(),
                                   self.pot.handle_2_geoms()))
            l_gh_dist = np.linalg.norm(l_gripper_to_handle)
            r_gh_dist = np.linalg.norm(r_gripper_to_handle)

            if len(l_contacts) > 0:
                reward += 0.5
            else:
                reward += 0.5 * (1 - np.tanh(l_gh_dist))

            if len(r_contacts) > 0:
                reward += 0.5
            else:
                reward += 0.5 * (1 - np.tanh(r_gh_dist))

        return reward

    @property
    def _handle_1_xpos(self):
        """Returns the position of the first handle."""
        return self.sim.data.site_xpos[self.handle_1_site_id]

    @property
    def _handle_2_xpos(self):
        """Returns the position of the second handle."""
        return self.sim.data.site_xpos[self.handle_2_site_id]

    @property
    def _pot_quat(self):
        """Returns the orientation of the pot."""
        return T.convert_quat(self.sim.data.body_xquat[self.cube_body_id],
                              to="xyzw")

    @property
    def _world_quat(self):
        """World quaternion."""
        return T.convert_quat(np.array([1, 0, 0, 0]), to="xyzw")

    @property
    def _l_gripper_to_handle(self):
        """Returns vector from the left gripper to the handle."""
        return self._handle_1_xpos - self._l_eef_xpos

    @property
    def _r_gripper_to_handle(self):
        """Returns vector from the right gripper to the handle."""
        return self._handle_2_xpos - self._r_eef_xpos

    def _get_observation(self):
        """
        Returns an OrderedDict containing observations [(name_string, np.array), ...].

        Important keys:
            robot-state: contains robot-centric information.
            object-state: requires @self.use_object_obs to be True.
                contains object-centric information.
            image: requires @self.use_camera_obs to be True.
                contains a rendered frame from the simulation.
            depth: requires @self.use_camera_obs and @self.camera_depth to be True.
                contains a rendered depth map from the simulation
        """
        di = super()._get_observation()
        # camera observations
        if self.use_camera_obs:
            camera_obs = self.sim.render(
                camera_name=self.camera_name,
                width=self.camera_width,
                height=self.camera_height,
                depth=self.camera_depth,
            )
            if self.camera_depth:
                di["image"], di["depth"] = camera_obs
            else:
                di["image"] = camera_obs

        # low-level object information
        if self.use_object_obs:
            # position and rotation of object
            cube_pos = self.sim.data.body_xpos[self.cube_body_id]
            cube_quat = T.convert_quat(
                self.sim.data.body_xquat[self.cube_body_id], to="xyzw")
            di["cube_pos"] = cube_pos
            di["cube_quat"] = cube_quat

            di["l_eef_xpos"] = self._l_eef_xpos
            di["r_eef_xpos"] = self._r_eef_xpos
            di["handle_1_xpos"] = self._handle_1_xpos
            di["handle_2_xpos"] = self._handle_2_xpos
            di["l_gripper_to_handle"] = self._l_gripper_to_handle
            di["r_gripper_to_handle"] = self._r_gripper_to_handle

            di["object-state"] = np.concatenate([
                di["cube_pos"],
                di["cube_quat"],
                di["l_eef_xpos"],
                di["r_eef_xpos"],
                di["handle_1_xpos"],
                di["handle_2_xpos"],
                di["l_gripper_to_handle"],
                di["r_gripper_to_handle"],
            ])

        return di

    def _check_contact(self):
        """
        Returns True if gripper is in contact with an object.
        """
        collision = False
        contact_geoms = (self.gripper_right.contact_geoms() +
                         self.gripper_left.contact_geoms())
        for contact in self.sim.data.contact[:self.sim.data.ncon]:
            if (self.sim.model.geom_id2name(contact.geom1) in contact_geoms
                    or self.sim.model.geom_id2name(
                        contact.geom2) in contact_geoms):
                collision = True
                break
        return collision

    def _check_success(self):
        """
        Returns True if task is successfully completed
        """
        # cube is higher than the table top above a margin
        cube_height = self.sim.data.body_xpos[self.cube_body_id][2]
        table_height = self.table_full_size[2]
        return cube_height > table_height + 0.10
예제 #15
0
    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()

        # Verify the correct robot has been loaded
        assert isinstance(self.robots[0], SingleArm), \
            "Error: Expected one single-armed robot! Got {} type instead.".format(type(self.robots[0]))

        # Adjust base pose accordingly
        xpos = self.robots[0].robot_model.base_xpos_offset["table"](
            self.table_full_size[0])
        self.robots[0].robot_model.set_base_xpos(xpos)

        # load model for table top workspace
        self.mujoco_arena = TableArena(
            table_full_size=self.table_full_size,
            table_friction=self.table_friction,
            table_offset=(0, 0, 0.8),
        )
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # Arena always gets set to zero origin
        self.mujoco_arena.set_origin([0, 0, 0])

        # initialize objects of interest
        tex_attrib = {
            "type": "cube",
        }
        mat_attrib = {
            "texrepeat": "1 1",
            "specular": "0.4",
            "shininess": "0.1",
        }
        redwood = CustomMaterial(
            texture="WoodRed",
            tex_name="redwood",
            mat_name="redwood_mat",
            tex_attrib=tex_attrib,
            mat_attrib=mat_attrib,
        )
        greenwood = CustomMaterial(
            texture="WoodGreen",
            tex_name="greenwood",
            mat_name="greenwood_mat",
            tex_attrib=tex_attrib,
            mat_attrib=mat_attrib,
        )
        cubeA = BoxObject(
            name="cubeA",
            size_min=[0.02, 0.02, 0.02],
            size_max=[0.02, 0.02, 0.02],
            rgba=[1, 0, 0, 1],
            material=redwood,
        )
        cubeB = BoxObject(
            name="cubeB",
            size_min=[0.025, 0.025, 0.025],
            size_max=[0.025, 0.025, 0.025],
            rgba=[0, 1, 0, 1],
            material=greenwood,
        )
        self.mujoco_objects = OrderedDict([("cubeA", cubeA), ("cubeB", cubeB)])

        # task includes arena, robot, and objects of interest
        self.model = ManipulationTask(
            self.mujoco_arena,
            [robot.robot_model for robot in self.robots],
            self.mujoco_objects,
            initializer=self.placement_initializer,
        )
        self.model.place_objects()
예제 #16
0
class Door(RobotEnv):
    """
    This class corresponds to the door opening task for a single robot arm.

    Args:
        robots (str or list of str): Specification for specific robot arm(s) to be instantiated within this env
            (e.g: "Sawyer" would generate one arm; ["Panda", "Panda", "Sawyer"] would generate three robot arms)
            Note: Must be a single single-arm robot!

        controller_configs (str or list of dict): If set, contains relevant controller parameters for creating a
            custom controller. Else, uses the default controller for this specific task. Should either be single
            dict if same controller is to be used for all robots or else it should be a list of the same length as
            "robots" param

        gripper_types (str or list of str): type of gripper, used to instantiate
            gripper models from gripper factory. Default is "default", which is the default grippers(s) associated
            with the robot(s) the 'robots' specification. None removes the gripper, and any other (valid) model
            overrides the default gripper. Should either be single str if same gripper type is to be used for all
            robots or else it should be a list of the same length as "robots" param

        gripper_visualizations (bool or list of bool): True if using gripper visualization.
            Useful for teleoperation. Should either be single bool if gripper visualization is to be used for all
            robots or else it should be a list of the same length as "robots" param

        initialization_noise (dict or list of dict): Dict containing the initialization noise parameters.
            The expected keys and corresponding value types are specified below:

            :`'magnitude'`: The scale factor of uni-variate random noise applied to each of a robot's given initial
                joint positions. Setting this value to `None` or 0.0 results in no noise being applied.
                If "gaussian" type of noise is applied then this magnitude scales the standard deviation applied,
                If "uniform" type of noise is applied then this magnitude sets the bounds of the sampling range
            :`'type'`: Type of noise to apply. Can either specify "gaussian" or "uniform"

            Should either be single dict if same noise value is to be used for all robots or else it should be a
            list of the same length as "robots" param

            :Note: Specifying "default" will automatically use the default noise settings.
                Specifying None will automatically create the required dict with "magnitude" set to 0.0.

        use_latch (bool): if True, uses a spring-loaded handle and latch to "lock" the door closed initially
            Otherwise, door is instantiated with a fixed handle

        use_camera_obs (bool): if True, every observation includes rendered image(s)

        use_object_obs (bool): if True, include object (cube) information in
            the observation.

        reward_scale (None or float): Scales the normalized reward function by the amount specified.
            If None, environment reward remains unnormalized

        reward_shaping (bool): if True, use dense rewards.

        placement_initializer (ObjectPositionSampler instance): if provided, will
            be used to place objects on every reset, else a UniformRandomSampler
            is used by default.

        use_indicator_object (bool): if True, sets up an indicator object that
            is useful for debugging.

        has_renderer (bool): If true, render the simulation state in
            a viewer instead of headless mode.

        has_offscreen_renderer (bool): True if using off-screen rendering

        render_camera (str): Name of camera to render if `has_renderer` is True. Setting this value to 'None'
            will result in the default angle being applied, which is useful as it can be dragged / panned by
            the user using the mouse

        render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise.

        render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise.

        control_freq (float): how many control signals to receive in every second. This sets the amount of
            simulation time that passes between every action input.

        horizon (int): Every episode lasts for exactly @horizon timesteps.

        ignore_done (bool): True if never terminating the environment (ignore @horizon).

        hard_reset (bool): If True, re-loads model, sim, and render object upon a reset call, else,
            only calls sim.reset and resets all robosuite-internal variables

        camera_names (str or list of str): name of camera to be rendered. Should either be single str if
            same name is to be used for all cameras' rendering or else it should be a list of cameras to render.

            :Note: At least one camera must be specified if @use_camera_obs is True.

            :Note: To render all robots' cameras of a certain type (e.g.: "robotview" or "eye_in_hand"), use the
                convention "all-{name}" (e.g.: "all-robotview") to automatically render all camera images from each
                robot's camera list).

        camera_heights (int or list of int): height of camera frame. Should either be single int if
            same height is to be used for all cameras' frames or else it should be a list of the same length as
            "camera names" param.

        camera_widths (int or list of int): width of camera frame. Should either be single int if
            same width is to be used for all cameras' frames or else it should be a list of the same length as
            "camera names" param.

        camera_depths (bool or list of bool): True if rendering RGB-D, and RGB otherwise. Should either be single
            bool if same depth setting is to be used for all cameras or else it should be a list of the same length as
            "camera names" param.

    Raises:
        AssertionError: [Invalid number of robots specified]
    """
    def __init__(
        self,
        robots,
        controller_configs=None,
        gripper_types="default",
        gripper_visualizations=False,
        initialization_noise="default",
        use_latch=True,
        use_camera_obs=True,
        use_object_obs=True,
        reward_scale=1.0,
        reward_shaping=False,
        placement_initializer=None,
        use_indicator_object=False,
        has_renderer=False,
        has_offscreen_renderer=True,
        render_camera="frontview",
        render_collision_mesh=False,
        render_visual_mesh=True,
        control_freq=10,
        horizon=1000,
        ignore_done=False,
        hard_reset=True,
        camera_names="agentview",
        camera_heights=256,
        camera_widths=256,
        camera_depths=False,
    ):
        # First, verify that only one robot is being inputted
        self._check_robot_configuration(robots)

        # settings for table top (hardcoded since it's not an essential part of the environment)
        self.table_full_size = (0.8, 0.3, 0.05)
        self.table_offset = (-0.2, -0.35, 0.8)

        # reward configuration
        self.use_latch = use_latch
        self.reward_scale = reward_scale
        self.reward_shaping = reward_shaping

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # object placement initializer
        if placement_initializer:
            self.placement_initializer = placement_initializer
        else:
            self.placement_initializer = UniformRandomSampler(
                x_range=[0.07, 0.09],
                y_range=[-0.01, 0.01],
                ensure_object_boundary_in_range=False,
                rotation=(-np.pi / 2. - 0.25, -np.pi / 2.),
                rotation_axis='z',
            )

        super().__init__(
            robots=robots,
            controller_configs=controller_configs,
            gripper_types=gripper_types,
            gripper_visualizations=gripper_visualizations,
            initialization_noise=initialization_noise,
            use_camera_obs=use_camera_obs,
            use_indicator_object=use_indicator_object,
            has_renderer=has_renderer,
            has_offscreen_renderer=has_offscreen_renderer,
            render_camera=render_camera,
            render_collision_mesh=render_collision_mesh,
            render_visual_mesh=render_visual_mesh,
            control_freq=control_freq,
            horizon=horizon,
            ignore_done=ignore_done,
            hard_reset=hard_reset,
            camera_names=camera_names,
            camera_heights=camera_heights,
            camera_widths=camera_widths,
            camera_depths=camera_depths,
        )

    def reward(self, action=None):
        """
        Reward function for the task.

        Sparse un-normalized reward:

            - a discrete reward of 1.0 is provided if the door is opened

        Un-normalized summed components if using reward shaping:

            - Reaching: in [0, 0.25], proportional to the distance between door handle and robot arm
            - Rotating: in [0, 0.25], proportional to angle rotated by door handled
              - Note that this component is only relevant if the environment is using the locked door version

        Note that a successfully completed task (door opened) will return 1.0 irregardless of whether the environment
        is using sparse or shaped rewards

        Note that the final reward is normalized and scaled by reward_scale / 1.0 as
        well so that the max score is equal to reward_scale

        Args:
            action (np.array): [NOT USED]

        Returns:
            float: reward value
        """
        reward = 0.

        # sparse completion reward
        if self._check_success():
            reward = 1.0

        # else, we consider only the case if we're using shaped rewards
        elif self.reward_shaping:
            # Add reaching component
            dist = np.linalg.norm(self._gripper_to_handle)
            reaching_reward = 0.25 * (1 - np.tanh(10.0 * dist))
            reward += reaching_reward
            # Add rotating component if we're using a locked door
            if self.use_latch:
                handle_qpos = self.sim.data.qpos[self.handle_qpos_addr]
                reward += np.clip(0.25 * np.abs(handle_qpos / (0.5 * np.pi)),
                                  -0.25, 0.25)

        # Scale reward if requested
        if self.reward_scale is not None:
            reward *= self.reward_scale / 1.0

        return reward

    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()

        # Verify the correct robot has been loaded
        assert isinstance(self.robots[0], SingleArm), \
            "Error: Expected one single-armed robot! Got {} type instead.".format(type(self.robots[0]))

        # Adjust base pose accordingly
        xpos = self.robots[0].robot_model.base_xpos_offset["table"](
            self.table_full_size[0])
        self.robots[0].robot_model.set_base_xpos(xpos)

        # load model for table top workspace
        self.mujoco_arena = TableArena(
            table_full_size=self.table_full_size,
            table_offset=self.table_offset,
        )
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # Arena always gets set to zero origin
        self.mujoco_arena.set_origin([0, 0, 0])

        # initialize objects of interest
        door = DoorObject(
            name="Door",
            friction=0.0,
            damping=0.1,
            lock=self.use_latch,
            joints=[],  # ensures that door object does not have a free joint
        )
        self.mujoco_objects = OrderedDict([("Door", door)])
        self.n_objects = len(self.mujoco_objects)

        # task includes arena, robot, and objects of interest
        self.model = ManipulationTask(
            mujoco_arena=self.mujoco_arena,
            mujoco_robots=[robot.robot_model for robot in self.robots],
            mujoco_objects=self.mujoco_objects,
            visual_objects=None,
            initializer=self.placement_initializer,
        )
        self.model.place_objects()

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flatten array, which is how MuJoCo stores physical simulation data.
        """
        super()._get_reference()

        # Additional object references from this env
        self.object_body_ids = dict()
        self.object_body_ids["door"] = self.sim.model.body_name2id("door")
        self.object_body_ids["frame"] = self.sim.model.body_name2id("frame")
        self.object_body_ids["latch"] = self.sim.model.body_name2id("latch")
        self.door_handle_site_id = self.sim.model.site_name2id("door_handle")
        self.hinge_qpos_addr = self.sim.model.get_joint_qpos_addr("door_hinge")
        if self.use_latch:
            self.handle_qpos_addr = self.sim.model.get_joint_qpos_addr(
                "latch_joint")

        self.l_finger_geom_ids = [
            self.sim.model.geom_name2id(x)
            for x in self.robots[0].gripper.important_geoms["left_finger"]
        ]
        self.r_finger_geom_ids = [
            self.sim.model.geom_name2id(x)
            for x in self.robots[0].gripper.important_geoms["right_finger"]
        ]

    def _reset_internal(self):
        """
        Resets simulation internal configurations.
        """
        super()._reset_internal()

        # Reset all object positions using initializer sampler if we're not directly loading from an xml
        if not self.deterministic_reset:

            # Sample from the placement initializer for the Door object
            door_pos, door_quat = self.model.place_objects()
            door_body_id = self.sim.model.body_name2id("Door")
            self.sim.model.body_pos[door_body_id] = door_pos[0]
            self.sim.model.body_quat[door_body_id] = door_quat[0]

    def _get_observation(self):
        """
        Returns an OrderedDict containing observations [(name_string, np.array), ...].

        Important keys:

            `'robot-state'`: contains robot-centric information.

            `'object-state'`: requires @self.use_object_obs to be True. Contains object-centric information.

            `'image'`: requires @self.use_camera_obs to be True. Contains a rendered frame from the simulation.

            `'depth'`: requires @self.use_camera_obs and @self.camera_depth to be True.
            Contains a rendered depth map from the simulation

        Returns:
            OrderedDict: Observations from the environment
        """
        di = super()._get_observation()

        # low-level object information
        if self.use_object_obs:
            # Get robot prefix
            pr = self.robots[0].robot_model.naming_prefix

            eef_pos = self._eef_xpos
            door_pos = np.array(
                self.sim.data.body_xpos[self.object_body_ids["door"]])
            handle_pos = self._handle_xpos
            hinge_qpos = np.array([self.sim.data.qpos[self.hinge_qpos_addr]])

            di["door_pos"] = door_pos
            di["handle_pos"] = handle_pos
            di[pr + "door_to_eef_pos"] = door_pos - eef_pos
            di[pr + "handle_to_eef_pos"] = handle_pos - eef_pos
            di["hinge_qpos"] = hinge_qpos

            di['object-state'] = np.concatenate([
                di["door_pos"], di["handle_pos"], di[pr + "door_to_eef_pos"],
                di[pr + "handle_to_eef_pos"], di["hinge_qpos"]
            ])

            # Also append handle qpos if we're using a locked door version with rotatable handle
            if self.use_latch:
                handle_qpos = np.array(
                    [self.sim.data.qpos[self.handle_qpos_addr]])
                di["handle_qpos"] = handle_qpos
                di['object-state'] = np.concatenate(
                    [di["object-state"], di["handle_qpos"]])

        return di

    def _check_success(self):
        """
        Check if door has been opened.

        Returns:
            bool: True if door has been opened
        """
        hinge_qpos = self.sim.data.qpos[self.hinge_qpos_addr]
        return hinge_qpos > 0.3

    def _visualization(self):
        """
        Do any needed visualization here. Overrides superclass implementations.
        """

        # color the gripper site appropriately based on distance to door handle
        if self.robots[0].gripper_visualization:
            # get distance to door handle
            dist = np.sum(
                np.square(self._handle_xpos - self.sim.data.get_site_xpos(
                    self.robots[0].gripper.visualization_sites["grip_site"])))

            # set RGBA for the EEF site here
            max_dist = 0.1
            scaled = (1.0 - min(dist / max_dist, 1.))**15
            rgba = np.zeros(4)
            rgba[0] = 1 - scaled
            rgba[1] = scaled
            rgba[3] = 0.5

            self.sim.model.site_rgba[self.robots[0].eef_site_id] = rgba

    def _check_robot_configuration(self, robots):
        """
        Sanity check to make sure the inputted robots and configuration is acceptable

        Args:
            robots (str or list of str): Robots to instantiate within this env
        """
        if type(robots) is list:
            assert len(
                robots
            ) == 1, "Error: Only one robot should be inputted for this task!"

    @property
    def _eef_xpos(self):
        """
        Grabs End Effector position

        Returns:
            np.array: End effector(x,y,z)
        """
        return np.array(self.sim.data.site_xpos[self.robots[0].eef_site_id])

    @property
    def _handle_xpos(self):
        """
        Grabs the position of the door handle handle.

        Returns:
            np.array: Door handle (x,y,z)
        """
        return self.sim.data.site_xpos[self.door_handle_site_id]

    @property
    def _gripper_to_handle(self):
        """
        Calculates distance from the gripper to the door handle.

        Returns:
            np.array: (x,y,z) distance between handle and eef
        """
        return self._handle_xpos - self._eef_xpos
예제 #17
0
# 2. Add the Robot
from robosuite.models.robots import Panda
mujoco_robot = Panda()

mujoco_robot.set_base_xpos([0, 0, 0])  # xyz
mujoco_robot.set_base_ori([0, 0, 0])  # rpy
world.merge(mujoco_robot)  # takes xml/list of xmls
#------------------------------------------------------------------------
# 3. Create Arena: table

# Initialize the TableArena instance taht creates a table and floorplane
from robosuite.models.arenas import TableArena
#mujoco_arena = TableArena()
#mujoco_arena = TableArena(table_full_size=(0.4, 0.8, 0.05))
#mujoco_arena = TableArena(table_full_size=(0.4, 0.8, 0.05),has_legs=False)
mujoco_arena = TableArena(table_full_size=(0.4, 0.8, 0.05),
                          table_offset=(0.4, 0, 0.1))
mujoco_arena.set_origin([0.4, 0, 0])
world.merge(mujoco_arena)

#------------------------------------------------------------------------

# 4. Run simulation
model = world.get_model(mode="mujoco_py")

from mujoco_py import MjSim, MjViewer
sim = MjSim(model)

# view it
viewer = MjViewer(sim)  # creates viewing window
viewer.vopt.geomgroup[0] = 0  # disable visualization of collision mesh
예제 #18
0
    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()

        # Adjust base pose(s) accordingly
        if self.env_configuration == "bimanual":
            xpos = self.robots[0].robot_model.base_xpos_offset["table"](self.table_full_size[0])
            self.robots[0].robot_model.set_base_xpos(xpos)
        else:
            if self.env_configuration == "single-arm-opposed":
                # Set up robots facing towards each other by rotating them from their default position
                for robot, rotation in zip(self.robots, (np.pi/2, -np.pi/2)):
                    xpos = robot.robot_model.base_xpos_offset["table"](self.table_full_size[0])
                    rot = np.array((0, 0, rotation))
                    xpos = T.euler2mat(rot) @ np.array(xpos)
                    robot.robot_model.set_base_xpos(xpos)
                    robot.robot_model.set_base_ori(rot)
            else:   # "single-arm-parallel" configuration setting
                # Set up robots parallel to each other but offset from the center
                for robot, offset in zip(self.robots, (-0.25, 0.25)):
                    xpos = robot.robot_model.base_xpos_offset["table"](self.table_full_size[0])
                    xpos = np.array(xpos) + np.array((0, offset, 0))
                    robot.robot_model.set_base_xpos(xpos)

        # load model for table top workspace
        mujoco_arena = TableArena(
            table_full_size=self.table_full_size,
            table_friction=self.table_friction,
            table_offset=self.table_offset,
        )

        # Arena always gets set to zero origin
        mujoco_arena.set_origin([0, 0, 0])

        # initialize objects of interest
        self.pot = PotWithHandlesObject(name="pot")

        # Create placement initializer
        if self.placement_initializer is not None:
            self.placement_initializer.reset()
            self.placement_initializer.add_objects(self.pot)
        else:
            self.placement_initializer = UniformRandomSampler(
                name="ObjectSampler",
                mujoco_objects=self.pot,
                x_range=[-0.03, 0.03],
                y_range=[-0.03, 0.03],
                ensure_object_boundary_in_range=False,
                ensure_valid_placement=True,
                reference_pos=self.table_offset,
                rotation=(np.pi + -np.pi / 3, np.pi + np.pi / 3),
            )

        # task includes arena, robot, and objects of interest
        self.model = ManipulationTask(
            mujoco_arena=mujoco_arena,
            mujoco_robots=[robot.robot_model for robot in self.robots], 
            mujoco_objects=self.pot,
        )
예제 #19
0
from robosuite.models.robots import Panda

mujoco_robot = Panda()

from robosuite.models.grippers import gripper_factory

gripper = gripper_factory('PandaGripper')
# gripper.hide_visualization()          # this doesnt work
mujoco_robot.add_gripper(gripper)

mujoco_robot.set_base_xpos([0, 0, 0])
world.merge(mujoco_robot)

from robosuite.models.arenas import TableArena

mujoco_arena = TableArena()
mujoco_arena.set_origin([0.8, 0, 0])
world.merge(mujoco_arena)

from robosuite.models.objects import BallObject
from robosuite.utils.mjcf_utils import new_joint

sphere = BallObject(name="sphere", size=[0.04], rgba=[0, 0.5, 0.5,
                                                      1]).get_obj()
sphere.set('pos', '1.0 0 1.0')
world.worldbody.append(sphere)

model = world.get_model(mode="mujoco_py")

from mujoco_py import MjSim, MjViewer
예제 #20
0
class FetchPush(RobotEnv):
    """
    This class corresponds to the fetch/push task for a single robot arm. Many similarities with lifting task.
    Args:
        robots (str or list of str): Specification for specific robot arm(s) to be instantiated within this env
            (e.g: "Sawyer" would generate one arm; ["Panda", "Panda", "Sawyer"] would generate three robot arms)
            Note: Must be a single single-arm robot!
        controller_configs (str or list of dict): If set, contains relevant controller parameters for creating a
            custom controller. Else, uses the default controller for this specific task. Should either be single
            dict if same controller is to be used for all robots or else it should be a list of the same length as
            "robots" param
        gripper_types (str or list of str): type of gripper, used to instantiate
            gripper models from gripper factory. Default is "default", which is the default grippers(s) associated
            with the robot(s) the 'robots' specification. None removes the gripper, and any other (valid) model
            overrides the default gripper. Should either be single str if same gripper type is to be used for all
            robots or else it should be a list of the same length as "robots" param
        gripper_visualizations (bool or list of bool): True if using gripper visualization.
            Useful for teleoperation. Should either be single bool if gripper visualization is to be used for all
            robots or else it should be a list of the same length as "robots" param
        initialization_noise (dict or list of dict): Dict containing the initialization noise parameters.
            The expected keys and corresponding value types are specified below:
            :`'magnitude'`: The scale factor of uni-variate random noise applied to each of a robot's given initial
                joint positions. Setting this value to `None` or 0.0 results in no noise being applied.
                If "gaussian" type of noise is applied then this magnitude scales the standard deviation applied,
                If "uniform" type of noise is applied then this magnitude sets the bounds of the sampling range
            :`'type'`: Type of noise to apply. Can either specify "gaussian" or "uniform"
            Should either be single dict if same noise value is to be used for all robots or else it should be a
            list of the same length as "robots" param
            :Note: Specifying "default" will automatically use the default noise settings.
                Specifying None will automatically create the required dict with "magnitude" set to 0.0.
        table_full_size (3-tuple): x, y, and z dimensions of the table.
        table_friction (3-tuple): the three mujoco friction parameters for
            the table.
        use_camera_obs (bool): if True, every observation includes rendered image(s)
        use_object_obs (bool): if True, include object (cube) information in
            the observation.
        reward_scale (None or float): Scales the normalized reward function by the amount specified.
            If None, environment reward remains unnormalized
        reward_shaping (bool): if True, use dense rewards.
        placement_initializer (ObjectPositionSampler instance): if provided, will
            be used to place objects on every reset, else a UniformRandomSampler
            is used by default.
        use_indicator_object (bool): if True, sets up an indicator object that
            is useful for debugging.
        has_renderer (bool): If true, render the simulation state in
            a viewer instead of headless mode.
        has_offscreen_renderer (bool): True if using off-screen rendering
        render_camera (str): Name of camera to render if `has_renderer` is True. Setting this value to 'None'
            will result in the default angle being applied, which is useful as it can be dragged / panned by
            the user using the mouse
        render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise.
        render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise.
        control_freq (float): how many control signals to receive in every second. This sets the amount of
            simulation time that passes between every action input.
        horizon (int): Every episode lasts for exactly @horizon timesteps.
        ignore_done (bool): True if never terminating the environment (ignore @horizon).
        hard_reset (bool): If True, re-loads model, sim, and render object upon a reset call, else,
            only calls sim.reset and resets all robosuite-internal variables
        camera_names (str or list of str): name of camera to be rendered. Should either be single str if
            same name is to be used for all cameras' rendering or else it should be a list of cameras to render.
            :Note: At least one camera must be specified if @use_camera_obs is True.
            :Note: To render all robots' cameras of a certain type (e.g.: "robotview" or "eye_in_hand"), use the
                convention "all-{name}" (e.g.: "all-robotview") to automatically render all camera images from each
                robot's camera list).
        camera_heights (int or list of int): height of camera frame. Should either be single int if
            same height is to be used for all cameras' frames or else it should be a list of the same length as
            "camera names" param.
        camera_widths (int or list of int): width of camera frame. Should either be single int if
            same width is to be used for all cameras' frames or else it should be a list of the same length as
            "camera names" param.
        camera_depths (bool or list of bool): True if rendering RGB-D, and RGB otherwise. Should either be single
            bool if same depth setting is to be used for all cameras or else it should be a list of the same length as
            "camera names" param.
    Raises:
        AssertionError: [Invalid number of robots specified]
    """
    def __init__(
        self,
        robots,
        controller_configs=None,
        gripper_types="default",
        gripper_visualizations=False,
        initialization_noise="default",
        table_full_size=(0.8, 0.8, 0.05),
        table_friction=(5e-3, 5e-3, 1e-4),
        use_camera_obs=True,
        use_object_obs=True,
        reward_scale=1.0,
        reward_shaping=False,
        distance_threshold=0.06,
        placement_initializer=None,
        use_indicator_object=False,
        has_renderer=False,
        has_offscreen_renderer=True,
        render_camera="frontview",
        render_collision_mesh=False,
        render_visual_mesh=True,
        control_freq=10,
        horizon=1000,
        ignore_done=False,
        hard_reset=True,
        camera_names="agentview",
        camera_heights=256,
        camera_widths=256,
        camera_depths=False,
    ):
        # First, verify that only one robot is being inputted
        self._check_robot_configuration(robots)

        # settings for table top
        self.table_full_size = table_full_size
        self.table_friction = table_friction

        # reward configuration
        self.reward_scale = reward_scale
        self.reward_shaping = reward_shaping
        self.distance_threshold = distance_threshold

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # object placement initializer
        if placement_initializer:
            self.placement_initializer = placement_initializer
        else:
            self.placement_initializer = UniformRandomSampler(
                x_range=[-0.22, 0.22],
                y_range=[-0.22, 0.22],
                ensure_object_boundary_in_range=False,
                rotation=None,
                z_offset=0.002,
            )

        super().__init__(
            robots=robots,
            controller_configs=controller_configs,
            gripper_types=gripper_types,
            gripper_visualizations=gripper_visualizations,
            initialization_noise=initialization_noise,
            use_camera_obs=use_camera_obs,
            use_indicator_object=use_indicator_object,
            has_renderer=has_renderer,
            has_offscreen_renderer=has_offscreen_renderer,
            render_camera=render_camera,
            render_collision_mesh=render_collision_mesh,
            render_visual_mesh=render_visual_mesh,
            control_freq=control_freq,
            horizon=horizon,
            ignore_done=ignore_done,
            hard_reset=hard_reset,
            camera_names=camera_names,
            camera_heights=camera_heights,
            camera_widths=camera_widths,
            camera_depths=camera_depths,
        )

    def _distance(self, goal_a, goal_b):
        assert goal_a.shape == goal_b.shape
        return np.linalg.norm(goal_a - goal_b)

    def reward(self, action=None):
        """
        Reward function for the task.

        Args:
            action (np array): [NOT USED]
        Returns:
            float: reward value
        """
        reward = .0

        cube_pos = self.sim.data.body_xpos[self.cube_body_id]
        goal_pos = self.sim.data.body_xpos[self.goal_cube_body_id]
        gripper_site_pos = self.sim.data.site_xpos[self.robots[0].eef_site_id]
        cube_to_goal_dist = self._distance(cube_pos, goal_pos)
        gripper_to_cube_dist = self._distance(gripper_site_pos, cube_pos)

        if self.reward_shaping:
            # Return large reward if cube has been moved to goal
            if self._check_success():
                reward += 2.25

            # Reaching reward
            reward += 1 - np.tanh(10.0 * gripper_to_cube_dist)

            if self._is_gripper_touching_cube():
                reward += 0.5  # Reward for touching cube
                reward += 1.5 - 1.5 * np.tanh(
                    10.0 * cube_to_goal_dist)  # Reward for pushing cube

            # Return large penalty if robot has moved significantly away from cube
            #if self._has_moved_significantly_away_from_cube():
            #    reward += -2

            # Reward for touching cube and pushing toward goal
            #if self._is_gripper_touching_cube():
            #    reward += 0.1 + (1 - np.tanh(10.0 * cube_to_goal_dist))

            # Give penalty for touching table
            #if self._is_gripper_touching_table():
            #    reward -= 0.1

        else:
            reward += -float(not self._check_success())

        return reward

    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()

        # Verify the correct robot has been loaded
        assert isinstance(self.robots[0], SingleArm), \
            "Error: Expected one single-armed robot! Got {} type instead.".format(type(self.robots[0]))

        # Adjust base pose accordingly
        xpos = self.robots[0].robot_model.base_xpos_offset["table"](
            self.table_full_size[0])
        self.robots[0].robot_model.set_base_xpos(xpos)

        # load model for table top workspace
        self.mujoco_arena = TableArena(
            table_full_size=self.table_full_size,
            table_friction=self.table_friction,
            table_offset=(0, 0, 0.8),
        )
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # Arena always gets set to zero origin
        self.mujoco_arena.set_origin([0, 0, 0])

        # initialize objects of interest
        tex_attrib = {
            "type": "cube",
        }
        mat_attrib = {
            "texrepeat": "1 1",
            "specular": "0.4",
            "shininess": "0.1",
        }
        redwood = CustomMaterial(
            texture="WoodRed",
            tex_name="redwood",
            mat_name="redwood_mat",
            tex_attrib=tex_attrib,
            mat_attrib=mat_attrib,
        )
        cube = BoxObject(
            name="cube",
            size_min=[0.020, 0.020, 0.020],  # [0.015, 0.015, 0.015],
            size_max=[0.022, 0.022, 0.022],  # [0.018, 0.018, 0.018])
            rgba=[1, 0, 0, 1],
            material=redwood,
            friction=[0.5, 5e-3, 1e-4])

        goal_cube = BoxObject(
            name="goal_cube",
            size_min=[0.005, 0.005, 0.005],
            size_max=[0.005, 0.005, 0.005],
            rgba=[0, 0, 1, 1],
            density=11342,  # Density of lead such that the goal won't be moved
        )

        self.mujoco_objects = OrderedDict([("cube", cube),
                                           ("goal_cube", goal_cube)])
        self.n_objects = len(self.mujoco_objects)

        # task includes arena, robot, and objects of interest
        self.model = ManipulationTask(
            mujoco_arena=self.mujoco_arena,
            mujoco_robots=[robot.robot_model for robot in self.robots],
            mujoco_objects=self.mujoco_objects,
            visual_objects=None,
            initializer=self.placement_initializer,
        )
        self.model.place_objects()

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flatten array, which is how MuJoCo stores physical simulation data.
        """
        super()._get_reference()

        # Additional object references from this env
        self.cube_body_id = self.sim.model.body_name2id("cube")
        self.goal_cube_body_id = self.sim.model.body_name2id("goal_cube")

        if self.robots[0].gripper_type == 'UltrasoundProbeGripper':
            self.probe_geom_id = [
                self.sim.model.geom_name2id(x)
                for x in self.robots[0].gripper.important_geoms["probe"]
            ]
        elif self.robots[0].has_gripper:
            self.l_finger_geom_ids = [
                self.sim.model.geom_name2id(x)
                for x in self.robots[0].gripper.important_geoms["left_finger"]
            ]
            self.r_finger_geom_ids = [
                self.sim.model.geom_name2id(x)
                for x in self.robots[0].gripper.important_geoms["right_finger"]
            ]
        self.cube_geom_id = self.sim.model.geom_name2id("cube")
        self.goal_cube_geom_id = self.sim.model.geom_name2id("goal_cube")
        self.table_geom_id = self.sim.model.geom_name2id("table_collision")

    def _reset_internal(self):
        """
        Resets simulation internal configurations.
        """
        super()._reset_internal()

        # Reset all object positions using initializer sampler if we're not directly loading from an xml
        if not self.deterministic_reset:

            # Sample from the placement initializer for all objects
            obj_pos, obj_quat = self.model.place_objects()

            # Loop through all objects and reset their positions
            for i, (obj_name, _) in enumerate(self.mujoco_objects.items()):
                self.sim.data.set_joint_qpos(
                    obj_name + "_jnt0",
                    np.concatenate(
                        [np.array(obj_pos[i]),
                         np.array(obj_quat[i])]))

    def _get_observation(self):
        """
        Returns an OrderedDict containing observations [(name_string, np.array), ...].
        Important keys:
            `'robot-state'`: contains robot-centric information.
            `'object-state'`: requires @self.use_object_obs to be True. Contains object-centric information.
            `'image'`: requires @self.use_camera_obs to be True. Contains a rendered frame from the simulation.
            `'depth'`: requires @self.use_camera_obs and @self.camera_depth to be True.
            Contains a rendered depth map from the simulation
        Returns:
            OrderedDict: Observations from the environment
        """
        di = super()._get_observation()

        # Get robot prefix
        pr = self.robots[0].robot_model.naming_prefix

        if self.robots[0].has_gripper:
            # Checking if the UltrasoundProbeGripper is used
            if self.robots[0].gripper.dof == 0:
                # Remove unused keys (no joints in gripper)
                di.pop('robot0_gripper_qpos', None)
                di.pop('robot0_gripper_qvel', None)

        # low-level object information
        if self.use_object_obs:

            # position and rotation of object
            goal_pos = np.array(
                self.sim.data.body_xpos[self.goal_cube_body_id])
            cube_pos = np.array(self.sim.data.body_xpos[self.cube_body_id])
            di["cube_pos"] = cube_pos
            di["goal_pos"] = goal_pos
            di[pr + "cube_to_goal"] = cube_pos - goal_pos

            cube_quat = convert_quat(np.array(
                self.sim.data.body_xquat[self.cube_body_id]),
                                     to="xyzw")

            gripper_site_pos = np.array(
                self.sim.data.site_xpos[self.robots[0].eef_site_id])
            di[pr + "gripper_to_cube"] = gripper_site_pos - cube_pos

            # Used for GymWrapper observations (Robot state will also default be added e.g. eef position)
            di["object-state"] = np.concatenate([
                cube_pos,
                cube_quat,
                goal_pos,
                di[pr + "gripper_to_cube"],
                di[pr + "gripper_to_cube"],
            ])
        return di

    def _check_success(self):
        """
        Check if cube has been pushed to goal.
        Returns:
            bool: True if cube has been pushed to goal
        """
        cube_pos = self.sim.data.body_xpos[self.cube_body_id]
        goal_pos = self.sim.data.body_xpos[self.goal_cube_body_id]
        cube_to_goal_dist = self._distance(cube_pos, goal_pos)

        return cube_to_goal_dist < self.distance_threshold

    '''
    def _has_moved_significantly_away_from_cube(self):
        """
        Check if the robot has moved away from cube between steps.
        Returns:
            bool: True if episode is the robot's end-effector has moved far away from cube
        """
        return self.gripper_to_cube_dist > self.initial_gripper_to_cube_dist + 0.12


    def _check_terminated(self):
        """
        Check if the task has completed one way or another. The following conditions lead to termination:
            - Task completion (cube pushed to goal) Should this be added?
            - Robot moving away from cube
        Returns:
            bool: True if episode is terminated
        """
        return self._has_moved_significantly_away_from_cube()
    '''

    def _is_gripper_touching_cube(self):
        """
        Check if the gripper is in contact with the cube    
        Returns:
            bool: True if contact
        """
        for contact in self.sim.data.contact[:self.sim.data.ncon]:
            geom_name1 = self.sim.model.geom_id2name(contact.geom1)
            geom_name2 = self.sim.model.geom_id2name(contact.geom2)

            if (geom_name1 in self.robots[0].gripper.contact_geoms
                    and geom_name2 == self.sim.model.geom_id2name(
                        self.cube_geom_id)
                    or geom_name2 in self.robots[0].gripper.contact_geoms
                    and geom_name1 == self.sim.model.geom_id2name(
                        self.cube_geom_id)):
                return True

        return False

    def _is_gripper_touching_table(self):
        """
        Check if the gripper is in contact with the tabletop
        Returns:
            bool: True if contact
        """
        for contact in self.sim.data.contact[:self.sim.data.ncon]:
            geom_name1 = self.sim.model.geom_id2name(contact.geom1)
            geom_name2 = self.sim.model.geom_id2name(contact.geom2)

            if (geom_name1 in self.robots[0].gripper.contact_geoms
                    and geom_name2 == self.sim.model.geom_id2name(
                        self.table_geom_id)
                    or geom_name2 in self.robots[0].gripper.contact_geoms
                    and geom_name1 == self.sim.model.geom_id2name(
                        self.table_geom_id)):
                return True

        return False

    def _post_action(self, action):
        """
        In addition to super method, add additional info if requested
        Args:
            action (np.array): Action to execute within the environment
        Returns:
            3-tuple:
                - (float) reward from the environment
                - (bool) whether the current episode is completed or not
                - (dict) info about current env step
        """
        reward, done, info = super()._post_action(action)
        #done = done or self._check_terminated()
        return reward, done, info

    def _visualization(self):
        """
        Do any needed visualization here. Overrides superclass implementations.
        """

        # color the gripper site appropriately based on distance to cube
        if self.robots[0].gripper_visualization:
            # get distance to cube
            cube_site_id = self.sim.model.site_name2id("cube")
            dist = np.sum(
                np.square(self.sim.data.site_xpos[cube_site_id] -
                          self.sim.data.get_site_xpos(
                              self.robots[0].gripper.
                              visualization_sites["grip_site"])))

            # set RGBA for the EEF site here
            max_dist = 0.1
            scaled = (1.0 - min(dist / max_dist, 1.))**15
            rgba = np.zeros(4)
            rgba[0] = 1 - scaled
            rgba[1] = scaled
            rgba[3] = 0.5

            self.sim.model.site_rgba[self.robots[0].eef_site_id] = rgba

    def _check_robot_configuration(self, robots):
        """
        Sanity check to make sure the inputted robots and configuration is acceptable
        Args:
            robots (str or list of str): Robots to instantiate within this env
        """
        if type(robots) is list:
            assert len(
                robots
            ) == 1, "Error: Only one robot should be inputted for this task!"
예제 #21
0
class SawyerPrimitivePick(SawyerEnv):
    """
    This class corresponds to the lifting task for the Sawyer robot arm.
    """
    def __init__(
        self,
        instructive=0.0,
        decay=0.0,
        random_arm_init=None,
        gripper_type="TwoFingerGripper",
        table_full_size=(0.8, 0.8, 0.6),
        table_friction=(1., 5e-3, 1e-4),
        use_camera_obs=True,
        use_object_obs=True,
        reward_shaping=False,
        placement_initializer=None,
        gripper_visualization=False,
        use_indicator_object=False,
        has_renderer=False,
        has_offscreen_renderer=True,
        render_collision_mesh=False,
        render_visual_mesh=True,
        control_freq=10,
        horizon=1000,
        ignore_done=False,
        camera_name="frontview",
        camera_height=256,
        camera_width=256,
        camera_depth=False,
    ):
        """
        Args:

            gripper_type (str): type of gripper, used to instantiate
                gripper models from gripper factory.

            table_full_size (3-tuple): x, y, and z dimensions of the table.

            table_friction (3-tuple): the three mujoco friction parameters for
                the table.

            use_camera_obs (bool): if True, every observation includes a
                rendered image.

            use_object_obs (bool): if True, include object (cube) information in
                the observation.

            reward_shaping (bool): if True, use dense rewards.

            placement_initializer (ObjectPositionSampler instance): if provided, will
                be used to place objects on every reset, else a UniformRandomSampler
                is used by default.

            gripper_visualization (bool): True if using gripper visualization.
                Useful for teleoperation.

            use_indicator_object (bool): if True, sets up an indicator object that
                is useful for debugging.

            has_renderer (bool): If true, render the simulation state in
                a viewer instead of headless mode.

            has_offscreen_renderer (bool): True if using off-screen rendering.

            render_collision_mesh (bool): True if rendering collision meshes
                in camera. False otherwise.

            render_visual_mesh (bool): True if rendering visual meshes
                in camera. False otherwise.

            control_freq (float): how many control signals to receive
                in every second. This sets the amount of simulation time
                that passes between every action input.

            horizon (int): Every episode lasts for exactly @horizon timesteps.

            ignore_done (bool): True if never terminating the environment (ignore @horizon).

            camera_name (str): name of camera to be rendered. Must be
                set if @use_camera_obs is True.

            camera_height (int): height of camera frame.

            camera_width (int): width of camera frame.

            camera_depth (bool): True if rendering RGB-D, and RGB otherwise.
        """
        self.random_arm_init = random_arm_init
        self.instructive = instructive
        self.instructive_counter = 0
        self.eval_mode = False
        self.decay = decay
        # settings for table top
        self.table_full_size = table_full_size
        self.table_friction = table_friction

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # reward configuration
        self.reward_shaping = reward_shaping

        # object placement initializer
        if placement_initializer:
            self.placement_initializer = placement_initializer
        else:
            self.placement_initializer = UniformRandomSampler(
                x_range=[-0.00, 0.00],
                y_range=[-0.00, 0.00],
                ensure_object_boundary_in_range=False,
                z_rotation=None,
            )

        # order: di["eef_pos"] di["gripper_qpos"] di["gripper_to_marker"] self.goal
        # dim: 3, 2, 3, 3
        # low = -np.ones(11) * np.inf
        # high = np.ones(11) * np.inf
        # self.observation_space = spaces.Box(low=low, high=high)

        self.controller = SawyerIKController(
            bullet_data_path=os.path.join(robosuite.models.assets_root,
                                          "bullet_data"),
            robot_jpos_getter=self._robot_jpos_getter,
        )
        super().__init__(
            gripper_type=gripper_type,
            gripper_visualization=gripper_visualization,
            use_indicator_object=use_indicator_object,
            has_renderer=has_renderer,
            has_offscreen_renderer=has_offscreen_renderer,
            render_collision_mesh=render_collision_mesh,
            render_visual_mesh=render_visual_mesh,
            control_freq=control_freq,
            horizon=horizon,
            ignore_done=ignore_done,
            use_camera_obs=use_camera_obs,
            camera_name=camera_name,
            camera_height=camera_height,
            camera_width=camera_width,
            camera_depth=camera_depth,
        )

    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()
        self.mujoco_robot.set_base_xpos([0, 0, 0])

        # load model for table top workspace
        self.mujoco_arena = TableArena(table_full_size=self.table_full_size,
                                       table_friction=self.table_friction)
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # The sawyer robot has a pedestal, we want to align it with the table
        self.mujoco_arena.set_origin(
            [0.16 + self.table_full_size[0] / 2, 0, 0])

        # initialize objects of interest
        cube = BoxObject(
            # size_min=[0.020, 0.020, 0.020],  # [0.015, 0.015, 0.015],
            # size_max=[0.022, 0.022, 0.022],  # [0.018, 0.018, 0.018])
            size_min=[0.015, 0.015, 0.015],
            size_max=[0.015, 0.015, 0.015],
            rgba=[1, 0, 0, 1],
        )
        self.mujoco_objects = OrderedDict([("cube", cube)])

        # task includes arena, robot, and objects of interest
        self.model = TableTopTask(
            self.mujoco_arena,
            self.mujoco_robot,
            self.mujoco_objects,
            initializer=self.placement_initializer,
        )
        self.model.place_objects()

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flatten array, which is how MuJoCo stores physical simulation data.
        """
        super()._get_reference()
        self.cube_body_id = self.sim.model.body_name2id("cube")
        self.l_finger_geom_ids = [
            self.sim.model.geom_name2id(x)
            for x in self.gripper.left_finger_geoms
        ]
        self.r_finger_geom_ids = [
            self.sim.model.geom_name2id(x)
            for x in self.gripper.right_finger_geoms
        ]
        self.cube_geom_id = self.sim.model.geom_name2id("cube")

    def _reset_internal(self):
        """
        Resets simulation internal configurations.
        """
        super(SawyerEnv, self)._reset_internal()

        self.model.place_objects()

        if self.random_arm_init:
            # random initialization of arm
            constant_quat = np.array(
                [-0.01704371, -0.99972409, 0.00199679, -0.01603944])

            target_position = np.array([
                0.5 + np.random.uniform(self.random_arm_init[0],
                                        self.random_arm_init[1]),
                np.random.uniform(self.random_arm_init[0],
                                  self.random_arm_init[1]),
                self.table_full_size[2] + 0.15211762
            ])
            self.controller.sync_ik_robot(self._robot_jpos_getter(),
                                          simulate=True)
            joint_list = self.controller.inverse_kinematics(
                target_position, constant_quat)
            init_pos = np.array(joint_list)
        else:
            # default robosuite init
            init_pos = np.array(
                [-0.5538, -0.8208, 0.4155, 1.8409, -0.4955, 0.6482, 1.9628])
            init_pos += np.random.randn(init_pos.shape[0]) * 0.02

        self.sim.data.qpos[self._ref_joint_pos_indexes] = init_pos

        self.sim.data.qpos[self._ref_gripper_joint_pos_indexes] = np.array(
            [0.0115, -0.0115])  # Open

        self.sim.forward()
        self.sim.data.qpos[10:12] = self.sim.data.site_xpos[
            self.eef_site_id][:2]

        # decay rate (1 / (1 + decay_param * #resets))
        chance = self.instructive * (
            1 / (1 + self.decay * self.instructive_counter))
        if np.random.uniform() < chance:  # and not self.eval_mode:
            self.sim.data.qpos[13] = self.sim.data.site_xpos[
                self.eef_site_id][2]
            self.sim.data.qpos[self._ref_gripper_joint_pos_indexes] = np.array(
                [-0.0, -0.0])  #np.array([-0.21021952, -0.00024167])  # gripped

        self.instructive_counter = self.instructive_counter + 1

        cube_pos = np.array(self.sim.data.body_xpos[self.cube_body_id])
        self.goal = cube_pos + np.array((0, 0, 0.075))

    def _robot_jpos_getter(self):
        return np.array([0, -1.18, 0.00, 2.18, 0.00, 0.57, 3.3161])

    def reward(self, action=None):
        """
        Reward function for the task.

        The dense reward has three components.

            Reaching: in [0, 1], to encourage the arm to reach the cube
            Grasping: in {0, 0.25}, non-zero if arm is grasping the cube
            Lifting: in {0, 1}, non-zero if arm has lifted the cube

        The sparse reward only consists of the lifting component.

        Args:
            action (np array): unused for this task

        Returns:
            reward (float): the reward
        """

        cube_height = self.sim.data.body_xpos[self.cube_body_id]

        return self.compute_reward(cube_height, self.goal)

    # for goalenv wrapper
    def compute_reward(self, achieved_goal, desired_goal, info=None):
        # -1 if cube is below, 0 if cube is above
        reward = -1.0
        if achieved_goal[2] > desired_goal[2]:
            reward = 100.0

        return reward

    # for goalenv wrapper
    def get_goalenv_dict(self, obs_dict):
        # using only object-state and robot-state
        ob_lst = []
        di = {}
        for key in obs_dict:
            if key in ["robot-state", "object-state"]:
                ob_lst.append(obs_dict[key])

        di['observation'] = np.concatenate(ob_lst)
        di['desired_goal'] = self.goal
        di['achieved_goal'] = obs_dict['object-state'][0:3]

        return di

    def _get_observation(self):
        """
        Returns an OrderedDict containing observations [(name_string, np.array), ...].

        Important keys:
            robot-state: contains robot-centric information.
            object-state: requires @self.use_object_obs to be True.
                contains object-centric information.
            image: requires @self.use_camera_obs to be True.
                contains a rendered frame from the simulation.
            depth: requires @self.use_camera_obs and @self.camera_depth to be True.
                contains a rendered depth map from the simulation
        """
        di = super()._get_observation()
        # camera observations
        if self.use_camera_obs:
            camera_obs = self.sim.render(
                camera_name=self.camera_name,
                width=self.camera_width,
                height=self.camera_height,
                depth=self.camera_depth,
            )
            if self.camera_depth:
                di["image"], di["depth"] = camera_obs
            else:
                di["image"] = camera_obs

        # low-level object information
        if self.use_object_obs:
            # position and rotation of object
            cube_pos = np.array(self.sim.data.body_xpos[self.cube_body_id])
            cube_quat = convert_quat(np.array(
                self.sim.data.body_xquat[self.cube_body_id]),
                                     to="xyzw")
            di["cube_pos"] = cube_pos
            di["cube_quat"] = cube_quat

            gripper_site_pos = np.array(
                self.sim.data.site_xpos[self.eef_site_id])
            di["gripper_to_cube"] = gripper_site_pos - cube_pos

            # di["object-state"] = np.concatenate(
            #     [cube_pos, cube_quat, di["gripper_to_cube"]]
            # )
            di["object-state"] = np.concatenate(
                [di["gripper_to_cube"], cube_pos])

        return di

    def _gripper_visualization(self):
        """
        Do any needed visualization here. Overrides superclass implementations.
        """

        # color the gripper site appropriately based on distance to cube
        if self.gripper_visualization:
            # get distance to cube
            cube_site_id = self.sim.model.site_name2id("cube")
            dist = np.sum(
                np.square(self.sim.data.site_xpos[cube_site_id] -
                          self.sim.data.get_site_xpos("grip_site")))

            # set RGBA for the EEF site here
            max_dist = 0.1
            scaled = (1.0 - min(dist / max_dist, 1.))**15
            rgba = np.zeros(4)
            rgba[0] = 1 - scaled
            rgba[1] = scaled
            rgba[3] = 0.5

            self.sim.model.site_rgba[self.eef_site_id] = rgba
예제 #22
0
class BaxterReach(BaxterEnv):
    def __init__(self,
                 table_full_size=(0.8, 0.8, 0.8),
                 table_friction=(1., 5e-3, 1e-4),
                 use_object_obs=True,
                 **kwargs):
        """
        Args:

            table_full_size (3-tuple): x, y, and z dimensions of the table.

            table_friction (3-tuple): the three mujoco friction parameters for
                the table.

            use_object_obs (bool): if True, include object (pot) information in
                the observation.

            reward_shaping (bool): if True, use dense rewards.

        Inherits the Baxter environment; refer to other parameters described there.
        """

        self.mujoco_objects = OrderedDict()

        # settings for table top
        self.table_full_size = table_full_size
        self.table_friction = table_friction

        # whether to use ground-truth object states
        self.use_object_obs = False

        self.object_initializer = None

        super().__init__(use_indicator_object=True,
                         gripper_left="LeftTwoFingerGripper",
                         gripper_right="TwoFingerGripper",
                         **kwargs)

    def _load_model(self):
        """
        Loads the arena and pot object.
        """
        super()._load_model()
        self.mujoco_robot.set_base_xpos([0, 0, 0])

        # load model for table top workspace
        self.mujoco_arena = TableArena(table_full_size=self.table_full_size,
                                       table_friction=self.table_friction)
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # The sawyer robot has a pedestal, we want to align it with the table
        self.mujoco_arena.set_origin(
            [0.45 + self.table_full_size[0] / 2, 0, 0])

        # task includes arena, robot, and objects of interest
        self.model = TableTopTask(
            self.mujoco_arena,
            self.mujoco_robot,
            self.mujoco_objects,
            self.object_initializer,
        )

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flattened array, which is how MuJoCo stores physical simulation data.
        """
        super()._get_reference()
        self.table_top_id = self.sim.model.site_name2id("table_top")

    def _reset_internal(self):
        """
        Resets simulation internal configurations.
        """
        super()._reset_internal()
        goal = np.random.uniform(-0.3, 0.3, size=3)
        goal[2] = 0
        goal += self.model.table_top_offset
        self.goal = goal

    @property
    def goal(self):
        return self.sim.data.qpos[self._ref_indicator_pos_low:self.
                                  _ref_indicator_pos_low + 3]

    @goal.setter
    def goal(self, new_goal):
        self.move_indicator(new_goal)

    def get_dist(self):
        goal = self.goal
        return np.linalg.norm(self._l_eef_xpos -
                              goal) + np.linalg.norm(self._r_eef_xpos - goal)

    def reward(self, action):
        """
        Reward function for the task.
        """
        dist = self.get_dist()
        return -dist

    @property
    def _world_quat(self):
        """World quaternion."""
        return T.convert_quat(np.array([1, 0, 0, 0]), to="xyzw")

    def _get_observation(self):
        """
        Returns an OrderedDict containing observations [(name_string, np.array), ...].

        Important keys:
            robot-state: contains robot-centric information.
            object-state: requires @self.use_object_obs to be True.
                contains object-centric information.
            image: requires @self.use_camera_obs to be True.
                contains a rendered frame from the simulation.
            depth: requires @self.use_camera_obs and @self.camera_depth to be True.
                contains a rendered depth map from the simulation
        """
        di = super()._get_observation()
        di['object-state'] = self.goal.copy()
        # camera observations
        if self.use_camera_obs:
            camera_obs = self.sim.render(
                camera_name=self.camera_name,
                width=self.camera_width,
                height=self.camera_height,
                depth=self.camera_depth,
            )
            if self.camera_depth:
                di["image"], di["depth"] = camera_obs
            else:
                di["image"] = camera_obs

        return di

    def _check_contact(self):
        """
        Returns True if gripper is in contact with an object.
        """
        collision = False
        contact_geoms = (self.gripper_right.contact_geoms() +
                         self.gripper_left.contact_geoms())
        for contact in self.sim.data.contact[:self.sim.data.ncon]:
            if (self.sim.model.geom_id2name(contact.geom1) in contact_geoms
                    or self.sim.model.geom_id2name(
                        contact.geom2) in contact_geoms):
                collision = True
                break
        return collision

    def _check_success(self):
        """
        Returns True if task is successfully completed
        """
        dist = self.get_dist()
        # if dist < 1:
        #    print('dist:', dist)
        if dist < 0.2:
            return True
        return False
예제 #23
0
class SawyerReach(SawyerEnv):
    """
    This class corresponds to the a primitive policy for the reach task on the Sawyer robot arm.

    Uniform random sample on x, y, or z axis in range of 20 to 60 cm for x and y, 0 to 40 cm for z (center of table)
    """
    def __init__(
        self,
        gripper_type="TwoFingerGripper",
        lower_goal_range=[-0.1, -0.1, -0.1],
        upper_goal_range=[0.1, 0.1, 0.2],
        seed=False,
        random_arm_init=None,  # please pass in [low, high]
        table_full_size=(0.8, 0.8, 0.6),
        table_friction=(1., 5e-3, 1e-4),
        use_camera_obs=False,
        use_object_obs=True,
        reward_shaping=True,
        placement_initializer=None,
        gripper_visualization=False,
        use_indicator_object=False,
        has_renderer=False,
        has_offscreen_renderer=True,
        render_collision_mesh=False,
        render_visual_mesh=True,
        control_freq=10,
        horizon=1000,
        ignore_done=False,
        camera_name="frontview",
        camera_height=256,
        camera_width=256,
        camera_depth=False,
    ):
        """
        Args:
            prim_axis (str): which axis is being explored

            gripper_type (str): type of gripper, used to instantiate
                gripper models from gripper factory.

            table_full_size (3-tuple): x, y, and z dimensions of the table.

            table_friction (3-tuple): the three mujoco friction parameters for
                the table.

            use_camera_obs (bool): if True, every observation includes a
                rendered image.

            use_object_obs (bool): if True, include object (cube) information in
                the observation.

            reward_shaping (bool): if True, use dense rewards.

            placement_initializer (ObjectPositionSampler instance): if provided, will
                be used to place objects on every reset, else a UniformRandomSampler
                is used by default.

            gripper_visualization (bool): True if using gripper visualization.
                Useful for teleoperation.

            use_indicator_object (bool): if True, sets up an indicator object that
                is useful for debugging.

            has_renderer (bool): If true, render the simulation state in
                a viewer instead of headless mode.

            has_offscreen_renderer (bool): True if using off-screen rendering.

            render_collision_mesh (bool): True if rendering collision meshes
                in camera. False otherwise.

            render_visual_mesh (bool): True if rendering visual meshes
                in camera. False otherwise.

            control_freq (float): how many control signals to receive
                in every second. This sets the amount of simulation time
                that passes between every action input.

            horizon (int): Every episode lasts for exactly @horizon timesteps.

            ignore_done (bool): True if never terminating the environment (ignore @horizon).

            camera_name (str): name of camera to be rendered. Must be
                set if @use_camera_obs is True.

            camera_height (int): height of camera frame.

            camera_width (int): width of camera frame.

            camera_depth (bool): True if rendering RGB-D, and RGB otherwise.
        """
        self.upper_goal_range = upper_goal_range
        self.lower_goal_range = lower_goal_range
        self.random_arm_init = random_arm_init
        self.seed = seed

        self.distance_threshold = 0.015
        # settings for table top
        self.table_full_size = table_full_size
        self.table_friction = table_friction

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # reward configuration
        self.reward_shaping = reward_shaping

        # object placement initializer
        self.placement_initializer = placement_initializer

        # order: di["eef_pos"] di["gripper_qpos"] di["gripper_to_marker"] self.goal
        # dim: 3, 2, 3, 3
        # low = -np.ones(11) * np.inf
        # high = np.ones(11) * np.inf
        # self.observation_space = spaces.Box(low=low, high=high)

        self.controller = SawyerIKController(
            bullet_data_path=os.path.join(robosuite.models.assets_root,
                                          "bullet_data"),
            robot_jpos_getter=self._robot_jpos_getter,
        )
        super().__init__(
            gripper_type=gripper_type,
            gripper_visualization=gripper_visualization,
            use_indicator_object=use_indicator_object,
            has_renderer=has_renderer,
            has_offscreen_renderer=has_offscreen_renderer,
            render_collision_mesh=render_collision_mesh,
            render_visual_mesh=render_visual_mesh,
            control_freq=control_freq,
            horizon=horizon,
            ignore_done=ignore_done,
            use_camera_obs=use_camera_obs,
            camera_name=camera_name,
            camera_height=camera_height,
            camera_width=camera_width,
            camera_depth=camera_depth,
        )

    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()
        self.mujoco_robot.set_base_xpos([0, 0, 0])

        # load model for table top workspace
        self.mujoco_arena = TableArena(table_full_size=self.table_full_size,
                                       table_friction=self.table_friction)
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # The sawyer robot has a pedestal, we want to align it with the table
        self.mujoco_arena.set_origin(
            [0.16 + self.table_full_size[0] / 2, 0, 0])

        self.mujoco_objects = OrderedDict([])

        # task includes arena, robot, and objects of interest
        self.model = TableTopTask(
            self.mujoco_arena,
            self.mujoco_robot,
            self.mujoco_objects,
            initializer=self.placement_initializer,
        )
        self.model.place_objects()

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flatten array, which is how MuJoCo stores physical simulation data.
        """
        super()._get_reference()
        # self.cube_body_id = self.sim.model.body_name2id("cube")
        self.l_finger_geom_ids = [
            self.sim.model.geom_name2id(x)
            for x in self.gripper.left_finger_geoms
        ]
        self.r_finger_geom_ids = [
            self.sim.model.geom_name2id(x)
            for x in self.gripper.right_finger_geoms
        ]
        # self.cube_geom_id = self.sim.model.geom_name2id("cube")

    def _reset_internal(self):
        """
        Resets simulation internal configurations.
        """
        super()._reset_internal()

        # reset positions of objects
        self.model.place_objects()

        if self.random_arm_init is not None:
            if self.seed:
                constant_quat = np.array(
                    [-0.01704371, -0.99972409, 0.00199679, -0.01603944])
                target_position = np.array([
                    0.5 + np.random.uniform(0.0, 0.0),
                    np.random.uniform(0.0, 0.0),
                    # self.table_full_size[2] + 0.15211762])
                    0.8 + 0.20211762
                ])
            else:
                # random initialization of arm
                constant_quat = np.array(
                    [-0.01704371, -0.99972409, 0.00199679, -0.01603944])
                target_position = np.array([
                    0.5 + np.random.uniform(self.random_arm_init[0],
                                            self.random_arm_init[1]),
                    np.random.uniform(self.random_arm_init[0],
                                      self.random_arm_init[1]),
                    self.table_full_size[2] + 0.20211762
                ])
            self.controller.sync_ik_robot(self._robot_jpos_getter(),
                                          simulate=True)
            joint_list = self.controller.inverse_kinematics(
                target_position, constant_quat)
            init_pos = np.array(joint_list)

        else:
            # default robosuite init
            init_pos = np.array(
                [-0.5538, -0.8208, 0.4155, 1.8409, -0.4955, 0.6482, 1.9628])
            init_pos += np.random.randn(init_pos.shape[0]) * 0.02

        self.sim.data.qpos[self._ref_joint_pos_indexes] = np.array(init_pos)

    def reset(self):
        self._destroy_viewer()
        self._reset_internal()
        self.sim.forward()

        # reset goal (marker)
        gripper_site_pos = np.array(self.sim.data.site_xpos[self.eef_site_id])
        distance = np.random.uniform(self.lower_goal_range,
                                     self.upper_goal_range)
        while np.linalg.norm(distance) <= (self.distance_threshold * 1.75):
            distance = np.random.uniform(self.lower_goal_range,
                                         self.upper_goal_range)
        if self.seed:
            distance = np.array((0.05, -0.1, 0.1))

        self.goal = gripper_site_pos + distance

        return self._get_observation()

    def _robot_jpos_getter(self):
        return np.array([0, -1.18, 0.00, 2.18, 0.00, 0.57, 3.3161])

    def reward(self, action=None):
        """
        Reward function for the task.

        The dense reward has one component.

            Reaching: in [0, 1], to encourage the arm to reach the marker

        The sparse reward only consists of the lifting component.

        Args:
            action (np array): unused for this task

        Returns:
            reward (float): the reward
        """
        #   velocity_pen = np.linalg(np.array(
        #       [self.sim.data.qvel[x] for x in self._ref_joint_vel_indexes]
        #  ))
        marker_pos = self.goal
        gripper_site_pos = self.sim.data.site_xpos[self.eef_site_id]

        return self.compute_reward(gripper_site_pos, marker_pos, None)

    # for goalenv wrapper
    def compute_reward(self, achieved_goal, desired_goal, info=None):
        d = np.linalg.norm(achieved_goal - desired_goal)
        if self.reward_shaping:  # dense
            reward = 1 - np.tanh(10 * d)
            if d <= self.distance_threshold:
                reward = 10.0
        else:  # sparse (-1 or 0)
            #reward = -np.float32(d > distance_threshold)
            if d > self.distance_threshold:
                reward = -1.0
            else:
                reward = 60.0

        return reward

    # for goalenv wrapper
    # TODO check the desired_goal/achieved_goal for indexing
    def get_goalenv_dict(self, obs_dict):
        # using only object-state and robot-state
        ob_lst = []
        di = {}
        for key in obs_dict:
            if key in ["robot-state", "object-state"]:
                ob_lst.append(obs_dict[key])

        di['observation'] = np.concatenate(ob_lst)
        di['desired_goal'] = obs_dict['object-state'][0:3]
        di['achieved_goal'] = obs_dict['robot-state'][23:26]

        return di

    def _get_observation(self):
        """
        Returns an OrderedDict containing observations [(name_string, np.array), ...].

        Important keys:
            robot-state: contains robot-centric information.
            object-state: requires @self.use_object_obs to be True.
                contains object-centric information.
            image: requires @self.use_camera_obs to be True.
                contains a rendered frame from the simulation.
            depth: requires @self.use_camera_obs and @self.camera_depth to be True.
                contains a rendered depth map from the simulation
        """
        di = super()._get_observation()
        # camera observations
        if self.use_camera_obs:
            camera_obs = self.sim.render(
                camera_name=self.camera_name,
                width=self.camera_width,
                height=self.camera_height,
                depth=self.camera_depth,
            )
            if self.camera_depth:
                di["image"], di["depth"] = camera_obs
            else:
                di["image"] = camera_obs

        # low-level object information
        if self.use_object_obs:
            gripper_site_pos = np.array(
                self.sim.data.site_xpos[self.eef_site_id])
            di["gripper_to_marker"] = gripper_site_pos - self.goal

            di["object-state"] = np.concatenate(
                [di["gripper_to_marker"], self.goal])

        return di

    def _gripper_visualization(self):
        """
        Do any needed visualization here. Overrides superclass implementations.
        """

        # color the gripper site appropriately based on distance to cube
        if self.gripper_visualization:
            # get distance to cube
            cube_site_id = self.sim.model.site_name2id("cube")
            dist = np.sum(
                np.square(self.sim.data.site_xpos[cube_site_id] -
                          self.sim.data.get_site_xpos("grip_site")))

            # set RGBA for the EEF site here
            max_dist = 0.1
            scaled = (1.0 - min(dist / max_dist, 1.))**15
            rgba = np.zeros(4)
            rgba[0] = 1 - scaled
            rgba[1] = scaled
            rgba[3] = 0.5

            self.sim.model.site_rgba[self.eef_site_id] = rgba
class SawyerReach(SawyerEnv):
    """
    This class corresponds to the reaching for the Sawyer robot arm. The gripper needs to arrive 2cm above
    a specified goal on the table.
    """

    def __init__(
            self,
            gripper_type="PushingGripper",
            parameters_to_randomise=None,
            randomise_initial_conditions=True,
            table_full_size=(0.8, 1.6, 0.719),
            use_camera_obs=False,
            use_object_obs=True,
            reward_shaping=True,
            use_indicator_object=False,
            has_renderer=False,
            has_offscreen_renderer=True,
            render_collision_mesh=False,
            render_visual_mesh=True,
            control_freq=10,
            horizon=50,
            ignore_done=False,
            camera_name="frontview",
            camera_height=256,
            camera_width=256,
            camera_depth=False,
            pid=True,
            success_radius=0.01
    ):
        """
        Args:

            gripper_type (str): type of gripper, used to instantiate
                gripper models from gripper factory.

            parameters_to_randomise [string,] : List of keys for parameters to randomise, None means all the available parameters are randomised

            randomise_initial_conditions [bool,]: Whether or not to randomise the starting configuration of the task.

            table_full_size (3-tuple): x, y, and z dimensions of the table.


            use_camera_obs (bool): if True, every observation includes a
                rendered image.

            use_object_obs (bool): if True, include object (cube) information in
                the observation.

            reward_shaping (bool): if True, use dense rewards.

            use_indicator_object (bool): if True, sets up an indicator object that
                is useful for debugging.

            has_renderer (bool): If true, render the simulation state in
                a viewer instead of headless mode.

            has_offscreen_renderer (bool): True if using off-screen rendering.

            render_collision_mesh (bool): True if rendering collision meshes
                in camera. False otherwise.

            render_visual_mesh (bool): True if rendering visual meshes
                in camera. False otherwise.

            control_freq (float): how many control signals to receive
                in every second. This sets the amount of simulation time
                that passes between every action input.

            horizon (int): Every episode lasts for exactly @horizon timesteps.

            ignore_done (bool): True if never terminating the environment (ignore @horizon).

            camera_name (str): name of camera to be rendered. Must be
                set if @use_camera_obs is True.

            camera_height (int): height of camera frame.

            camera_width (int): width of camera frame.

            camera_depth (bool): True if rendering RGB-D, and RGB otherwise.

            pid (bool) : Whether to use a  velocity pid contoler or the mujoco proportional velocity contoler

            success_radius (bool) : how close to the goal si considered a success.
        """

        self.initialised = False

        # settings for table
        self.table_full_size = table_full_size

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # reward configuration
        self.reward_shaping = reward_shaping
        if (self.reward_shaping):
            self.reward_range = [-np.inf, horizon * (0.1)]
        else:
            self.reward_range = [0, 1]

        # Whether to use dynamics domain randomisation
        self.parameters_to_randomise = parameters_to_randomise
        self.randomise_initial_conditions = randomise_initial_conditions
        self.dynamics_parameters = OrderedDict()
        self.default_dynamics_parameters = OrderedDict()
        self.parameter_sampling_ranges = OrderedDict()
        self.factors_for_param_randomisation = OrderedDict()

        self.success_radius = success_radius


        #Param for storing a specific goal starting position
        self.specific_goal_position = None

        super().__init__(
            gripper_type=gripper_type,
            gripper_visualization=False,
            use_indicator_object=use_indicator_object,
            has_renderer=has_renderer,
            has_offscreen_renderer=has_offscreen_renderer,
            render_collision_mesh=render_collision_mesh,
            render_visual_mesh=render_visual_mesh,
            control_freq=control_freq,
            horizon=horizon,
            ignore_done=ignore_done,
            use_camera_obs=use_camera_obs,
            camera_name=camera_name,
            camera_height=camera_height,
            camera_width=camera_width,
            camera_depth=camera_depth,
            pid=pid,
        )

        self._set_default_dynamics_parameters(pid)
        self._set_default_parameter_sampling_ranges()
        self._set_dynamics_parameters(self.default_dynamics_parameters)
        self._set_factors_for_param_randomisation(self.default_dynamics_parameters)

        # Check that the parameters to randomise are within the allowed parameters
        if (self.parameters_to_randomise is not None):
            self._check_allowed_parameters(self.parameters_to_randomise)

        # IK solver for placing the arm at desired locations during reset
        self.IK_solver = SawyerEEFVelocityController()

        self.init_control_timestep = self.control_timestep
        self.init_qpos = self.mujoco_robot.init_qpos

        gripper_tip_xpos = self.sim.data.get_site_xpos("grip_site").copy()
        gripper_to_eef_in_world = self.sim.data.get_body_xpos("right_hand") - gripper_tip_xpos
        self.gripper_size = np.linalg.norm(gripper_to_eef_in_world)

        # Storing  parameters for temporary switching
        self.cached_parameters_to_randomise = None
        self.cached_dynamics_parameters = None

        self.initialised = True
        self.reset()

    def _set_dynamics_parameters(self, parameters):
        self.dynamics_parameters = copy.deepcopy(parameters)

    def _default_damping_params(self):
        # return np.array([0.01566, 1.171, 0.4906, 0.1573, 1.293, 0.08688, 0.1942]) # -real world calibration
        # return np.array([0.8824,2.3357,1.1729, 0.0 , 0.5894, 0.0  ,0.0082]) #- command calibration
        return np.array([8.19520686e-01, 1.25425414e+00, 1.04222253e+00,
       0.00000000e+00, 1.43146116e+00, 1.26807887e-01, 1.53680244e-01,]) #- command calibration 2

    def _default_armature_params(self):
        return np.array([0.00000000e+00, 0.00000000e+00, 2.70022664e-02, 5.35581203e-02,
       3.31204140e-01, 2.59623415e-01, 2.81964631e-01,])

    def _default_joint_friction_params(self):
        return np.array([4.14390483e-03,
       9.30938506e-02, 2.68656509e-02, 0.00000000e+00, 0.00000000e+00,
       4.24867204e-04, 8.62040317e-04])

    def _set_default_dynamics_parameters(self, use_pid):
        """
        Setting the the default environment parameters.
        """
        self.default_dynamics_parameters['joint_forces'] = np.zeros((7,))
        self.default_dynamics_parameters['acceleration_forces'] = np.zeros((7,))
        self.default_dynamics_parameters['eef_forces'] = np.zeros((6,))

        self.default_dynamics_parameters['eef_timedelay'] = np.asarray(0)
        self.default_dynamics_parameters['timestep_parameter'] = np.asarray(0.0)
        self.default_dynamics_parameters['pid_iteration_time'] = np.asarray(0.)
        self.default_dynamics_parameters['mujoco_timestep'] = np.asarray(0.002)

        self.default_dynamics_parameters['action_additive_noise'] = np.asarray(0.0)
        self.default_dynamics_parameters['action_multiplicative_noise'] = np.asarray(0.0)
        self.default_dynamics_parameters['action_systematic_noise'] = np.asarray(0.0)

        self.default_dynamics_parameters['eef_obs_position_noise'] = np.asarray(0.0)
        self.default_dynamics_parameters['eef_obs_velocity_noise'] = np.asarray(0.0)

        link_masses = np.zeros((7,))
        for link_name, idx, body_node, mass_node, joint_node in self._robot_link_nodes_generator():
            if (mass_node is not None):
                dynamics_parameter_value = float(mass_node.get("mass"))
                link_masses[idx] = dynamics_parameter_value

        self.default_dynamics_parameters['link_masses'] = link_masses
        self.default_dynamics_parameters['joint_dampings'] = self._default_damping_params()
        self.default_dynamics_parameters['armatures'] = self._default_armature_params()
        self.default_dynamics_parameters['joint_frictions'] = self._default_joint_friction_params()

        if (use_pid):
            gains = self.mujoco_robot.velocity_pid_gains
            kps = np.array([gains['right_j{}'.format(actuator)]['p'] for actuator in range(7)])
            kis = np.array([gains['right_j{}'.format(actuator)]['i'] for actuator in range(7)])
            kds = np.array([gains['right_j{}'.format(actuator)]['d'] for actuator in range(7)])
            #
            self.default_dynamics_parameters['kps'] = kps
            self.default_dynamics_parameters['kis'] = kis
            self.default_dynamics_parameters['kds'] = kds
        else:
            kvs = np.zeros((7,))
            for target_joint, jnt_idx, node in self._velocity_actuator_nodes_generator():
                gains_value = float(node.get("kv"))
                kvs[jnt_idx] = gains_value

            self.default_dynamics_parameters['kvs'] = kvs

    def _set_default_parameter_sampling_ranges(self):
        """
        Returns the parameter ranges to draw samples from in the domain randomisation.
        """
        parameter_ranges = {
            'joint_forces': np.array([[0.,0.,0.,0.,0.,0.,0.],[1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5]]),# [2., 1.5, 1.5, 1.5 ,0.75 ,0.5,0.3]
            'acceleration_forces': np.array([[0.,0.,0.,0.,0.,0.,0.], [0.12,0.12,0.12,0.12,0.12,0.12,0.12]]),#
            'eef_forces': np.array([[0.,0.,0.,0.,0.,0.], [0.06 ,0.06,0.06,0.01,0.01,0.01,]]), #

            'eef_timedelay': np.array([0, 1]),
            'timestep_parameter': np.array([0.0, 0.01]),
            'pid_iteration_time': np.array([0., 0.04]), #-1 and 0 are not allowed values
            'mujoco_timestep': np.array([0.001,0.002]),

            'action_additive_noise': np.array([0.01, 0.1]),
            'action_multiplicative_noise': np.array([0.005,0.02]),
            'action_systematic_noise': np.array([-0.05, 0.05]),

            'eef_obs_position_noise': np.array([0.0005, 0.001]),
            'eef_obs_velocity_noise': np.array([0.0005, 0.001]),

            'link_masses': np.array([0.98, 1.02]),
            'joint_dampings': np.array([0.5, 2.]),
            'armatures': np.array([0.66, 1.5]),
            'joint_frictions': np.array([0.66, 1.5]),
        }

        if (self.pid):
            parameter_ranges['kps'] = np.array([0.66, 1.5])
            parameter_ranges['kis'] = np.array([0.66, 1.5])
            parameter_ranges['kds'] = np.array([0.66, 1.5])
        else:
            parameter_ranges['kvs'] = [0.5, 2]

        self.parameter_sampling_ranges = parameter_ranges


    def _set_factors_for_param_randomisation(self, parameters):
        factors = copy.deepcopy(parameters)

        factors['joint_forces'] = np.ones((7,))
        factors['acceleration_forces'] = np.ones((7,))
        factors['eef_forces'] = np.ones((1,))

        factors['eef_timedelay'] = 1.0
        factors['timestep_parameter'] = 1.0
        factors['pid_iteration_time'] = 1.0
        factors['mujoco_timestep'] = 1.0

        factors['action_additive_noise'] = 1.0
        factors['action_multiplicative_noise'] = 1.0
        factors['action_systematic_noise'] = 1.0

        factors['eef_obs_position_noise'] = 1.0
        factors['eef_obs_velocity_noise'] = 1.0


        self.factors_for_param_randomisation = factors

    def _velocity_actuator_nodes_generator(self):
        """
        Caching the xml nodes for the velocity actuators for use when setting the parameters
        """

        for node in self.model.root.findall(".//velocity[@kv]"):
            target_joint = node.get("joint")
            jnt_idx = int(target_joint[-1])
            yield target_joint, jnt_idx, node

    def _robot_link_nodes_generator(self):
        """
        Caching the xml nodes for the velocity actuators for use when setting the parameters
        """

        for link_idx, link_name in enumerate(self.mujoco_robot.links):
            body_node = self.mujoco_robot.root.find(".//body[@name='{}']".format(link_name))
            mass_node = body_node.find("./inertial[@mass]")
            joint_node = body_node.find("./joint")

            yield link_name, link_idx, body_node, mass_node, joint_node

    def _check_allowed_parameters(self, parameters):
        allowed_parameters = self.get_parameter_keys()

        for param in parameters:
            assert param in allowed_parameters, '{} not allowed. Only allowed parameters are {}'.format(param,
                                                                                                        allowed_parameters)

    def _select_appropriate_distribution(self, key):
        '''
        Which distribution to use to sample the different dynamics parameters.
        :param key: The parameter to consider.
        '''
        if (
                key == 'joint_forces'
                or key == 'acceleration_forces'
                or key == 'eef_forces'

                or key == 'timestep_parameter'
                or key == 'pid_iteration_time'
                or key == 'mujoco_timestep'

                or key == 'action_additive_noise'
                or key == 'action_multiplicative_noise'
                or key == 'action_systematic_noise'

                or key == 'eef_obs_position_noise'
                or key == 'eef_obs_velocity_noise'

                or key == 'link_masses'
                ):
            return self.np_random.uniform
        elif (
                key == 'eef_timedelay'
        ):
            return self._ranged_random_choice
        else:
            return self._loguniform

    def _loguniform(self, low=1e-10, high=1., size=None):
        return np.asarray(np.exp(self.np_random.uniform(np.log(low), np.log(high), size)))

    def _ranged_random_choice(self,low, high, size=1):
        vals = np.arange(low,high+1)
        return self.np_random.choice(vals, size)

    def _parameter_for_randomisation_generator(self, parameters=None):
        '''
        Generates (key,value) pairs of sampled dynamics parameters.
         :param parameters: The parameters to be sampled for randomisation, if None, all the allowed parameters are sampled.
        '''
        parameter_ranges = self.parameter_sampling_ranges

        if (parameters is None):
            parameters = self.get_parameter_keys()

        for key in parameters:

            parameter_range = parameter_ranges[key]

            if (parameter_range.shape[0] == 1):
                yield key, np.asarray(parameter_range[0])
            elif (parameter_range.shape[0] == 2):
                distribution = self._select_appropriate_distribution(key)
                size = self.default_dynamics_parameters[key].shape
                yield key, np.asarray(
                    self.factors_for_param_randomisation[key] * distribution(*parameter_ranges[key], size=size))
            else:
                raise RuntimeError('Parameter radomisation range needs to be of shape {1,2}xN')


    def _load_model(self):
        """
        Loads an xml model, puts it in self.model. This sets up the mujoco xml for the scene.
        """
        super()._load_model()
        self.mujoco_robot.set_base_xpos([0, 0, 0])

        ### Domain Randomisation ###
        if (self.initialised):
            for key, val in self._parameter_for_randomisation_generator(parameters=self.parameters_to_randomise):
                self.dynamics_parameters[key] = val

            ## Queues for adding time delays
            self.eef_pos_queue = deque(maxlen=int(self.dynamics_parameters['eef_timedelay'] + 1))
            self.eef_vel_queue = deque(maxlen=int(self.dynamics_parameters['eef_timedelay'] + 1))

            if (self.pid is not None):
                self.pid.sample_time = self.dynamics_parameters['pid_iteration_time']

        ### Create the Task ###
        ## Load the Arena ##
        self.mujoco_arena = TableArena(
            table_full_size=self.table_full_size, table_friction=(0.1, 0.001, 0.001)
        )
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # The sawyer robot has a pedestal, we want to align it with the table
        self.mujoco_arena.set_origin([0.16 + self.table_full_size[0] / 2, 0, 0])

        goal = BoxObject(size=[0.023 / 2, 0.023 / 2, 0.001 / 2],
                         rgba=[0, 1, 0, 1], )

        ## Put everything together into the task ##
        self.model = ReachTask(self.mujoco_arena, self.mujoco_robot, goal)

        ### Set the goal position ###
        # Gripper position at neutral
        gripper_pos_neutral = [0.44969246, 0.16029991, 1.00875409]

        if(self.specific_goal_position is not None):
            init_pos = np.array([gripper_pos_neutral[0] +self.specific_goal_position[0],
                                       gripper_pos_neutral[1] + self.specific_goal_position[1],
                                       self.model.table_top_offset[2]])
            init_pos = array_to_string(init_pos)


        elif (self.randomise_initial_conditions):
            # Place goal in a 20x20cm square directly below the eef neutral position.
            noise = self.np_random.uniform(-1, 1, 3) * np.array([0.20, 0.20, 0.0])
            offset = np.array([gripper_pos_neutral[0], gripper_pos_neutral[1], self.model.table_top_offset[2]])
            init_pos = array_to_string(noise + offset)
        else:
            init_pos = np.concatenate([gripper_pos_neutral[:2], [self.model.table_top_offset[2]]])
            init_pos = array_to_string(init_pos)

        self.model.xml_goal.set("pos", init_pos)

        ### Set the xml parameters to the values given by the dynamics_parameters attribute ###
        if (self.initialised):
            self._apply_xml_dynamics_parameters()

    def _apply_xml_dynamics_parameters(self):
        """
        Applying the values contained in dynamics_parameters to the xml elements of the model. If a pid is used this
        also applied the pid gains contained in the dynamics parameters.
        """

        opt_node = self.model.root.find('option')
        opt_node.set("timestep", str(self.dynamics_parameters['mujoco_timestep']))

        for link_name, idx, body_node, mass_node, joint_node in self._robot_link_nodes_generator():
            if (mass_node is not None):
                mass_node.set("mass", str(self.dynamics_parameters['link_masses'][idx]))

            if (joint_node is not None):
                joint_node.set("damping", str(self.dynamics_parameters['joint_dampings'][idx]))
                joint_node.set("armature", str(self.dynamics_parameters['armatures'][idx]))
                joint_node.set("frictionloss", str(self.dynamics_parameters['joint_frictions'][idx]))

        if (self.pid):
            self.pid.tunings = (self.dynamics_parameters['kps'],
                                self.dynamics_parameters['kis'],
                                self.dynamics_parameters['kds'],
                                )
        else:
            for target_joint, jnt_idx, node in self._velocity_actuator_nodes_generator():
                node.set("kv", str(self.dynamics_parameters['kvs'][jnt_idx]))


    def set_parameter_sampling_ranges(self, sampling_ranges):
        '''
        Set a new sampling range for the dynamics parameters.
        :param sampling_ranges: (Dict) Dictionary of the sampling ranges for the different parameters of the form
        (param_name, range) where param_name is a valid param name string and range is a numpy array of dimensionality
        {1,2}xN where N is the dimension of the given parameter
        '''
        for candidate_name, candidate_value in sampling_ranges.items():
            assert candidate_name in  self.parameter_sampling_ranges, 'Valid parameters are {}'.format(self.parameter_sampling_ranges.keys())
            assert candidate_value.shape[0] == 1 or candidate_value.shape[0]==2, 'First dimension of the sampling parameter needs to have value 1 or 2'
            assert len(candidate_value.shape) == len(self.parameter_sampling_ranges[candidate_name].shape), '{} has the wrong number of dimensions'.format(candidate_name)
            if(len(self.parameter_sampling_ranges[candidate_name].shape) >1):
                assert self.parameter_sampling_ranges[candidate_name].shape[1] == candidate_value.shape[1], '{} has the wrong shape'.format(candidate_name)

            self.parameter_sampling_ranges[candidate_name] = candidate_value

    def get_parameter_sampling_ranges(self):
        return copy.deepcopy(self.parameter_sampling_ranges)

    def get_parameter_keys(self):
        return self.default_dynamics_parameters.keys()

    def get_total_parameter_dimension(self):
        total_size = 0.
        for key, val in self.default_dynamics_parameters.items():
            total_size += val.size

        return total_size

    def get_internal_state(self):
        return np.concatenate([self._joint_positions, self._joint_velocities]).tolist()

    def get_internal_state_dimension(self):
        internal_state = self.get_internal_state()
        return len(internal_state)

    def change_parameters_to_randomise(self, parameters):
        self._check_allowed_parameters(parameters)
        self._set_dynamics_parameters(self.default_dynamics_parameters)
        self.parameters_to_randomise = parameters

    def get_randomised_parameters(self):
        if (self.parameters_to_randomise is not None):
            return self.parameters_to_randomise
        else:
            return self.get_parameter_keys()

    def get_randomised_parameter_dimensions(self):
        """ Return the number of dimensions of the ranomised parameters"""
        randomised_parameter_names = self.get_randomised_parameters()

        total_dimension = 0
        for param in randomised_parameter_names:
            param_shape = self.default_dynamics_parameters[param].shape
            if(param_shape ==()):
                total_dimension += 1
            else:
                total_dimension += param_shape[0]
        return total_dimension

    def get_dynamics_parameters(self):
        """
        Returns the values of the current dynamics parameters.
        """
        return copy.deepcopy(self.dynamics_parameters)

    def get_default_dynamics_parameters(self):
        """
        Returns the default values of the  dynamics parameters.
        """
        return copy.deepcopy(self.default_dynamics_parameters)

    def get_factors_for_randomisation(self):
        """
        Returns the factor used for domain randomisation.
        """
        return copy.deepcopy(self.factors_for_param_randomisation)

    def set_dynamics_parameters(self, dynamics_parameter_dict):
        """
        Setting the dynamics parameters of the environment to specific values. These are going to be used the next
        time the environment is reset, and will be overriden if domain randomisation is on.
        :param dynamics_parameter_dict: Dictionary with the values of the parameters to set.
        """
        for key, value in dynamics_parameter_dict.items():
            assert key in self.dynamics_parameters, 'Setting a parameter that does not exist'
            self.dynamics_parameters[key] = value

    def randomisation_off(self,):
        '''
        Disable the parameter randomisation temporarily and cache the current set of parameters and
        which parameters are being randomised.This can be useful for evaluation.
        '''
        current_params_to_randomise = self.get_randomised_parameters()
        current_params = self.get_dynamics_parameters()

        self.cached_parameters_to_randomise = current_params_to_randomise
        self.cached_dynamics_parameters = current_params

        self.parameters_to_randomise = []

        return current_params,  current_params_to_randomise

    def randomisation_on(self):
        '''
        Restoring the randomisation as they were before the call to switch_params
        '''
        if(self.cached_dynamics_parameters is None):
            print("Randomisation was not switched off before switching it back on.")
            return

        self.parameters_to_randomise = self.cached_parameters_to_randomise
        self.set_dynamics_parameters(self.cached_dynamics_parameters)
        self.cached_parameters_to_randomise = None
        self.cached_dynamics_parameters = None

    def sample_parameter_randomisation(self, parameters=None):
        ''' Samples a dictionary of dynamics parameters values using the randomisation process currently set in the environment
            parameters ([string,]) : List of parameters to sample a randomisation from. If None, all the allowed parameters are sampled.
        '''
        if (not self.initialised):
            print('Function has undefined behaviour if environment fully initialised, returning with no effect')
            return

        parameters_sample = {}

        for key, val in self._parameter_for_randomisation_generator(parameters):
            assert key in self.get_parameter_keys(), '{} not allowed. Choose from {}'.format(key,
                                                                                             self.get_parameter_keys())
            parameters_sample[key] = val

        return parameters_sample


    def _set_goal_neutral_offset(self, goal_x, goal_y):
        self.specific_goal_position =  np.array([goal_x, goal_y])

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flatten array, which is how MuJoCo stores physical simulation data.
        """
        super()._get_reference()

        # goal ids
        self.goal_body_id = self.sim.model.body_name2id("goal")
        self.goal_site_id = self.sim.model.site_name2id("goal")

        # Gripper ids
        self.l_finger_geom_ids = [
            self.sim.model.geom_name2id(x) for x in self.gripper.left_finger_geoms
        ]
        self.r_finger_geom_ids = [
            self.sim.model.geom_name2id(x) for x in self.gripper.right_finger_geoms
        ]

    def _reset_internal(self):
        """
        Resets simulation internal configurations.
        """
        super()._reset_internal()
        self.sim.forward()
        self.init_right_hand_quat = self._right_hand_quat
        self.init_right_hand_orn = self._right_hand_orn
        self.init_right_hand_pos = self._right_hand_pos

        eef_rot_in_world = self.sim.data.get_body_xmat("right_hand").reshape((3, 3))
        self.world_rot_in_eef = copy.deepcopy(eef_rot_in_world.T)

        if (self.randomise_initial_conditions and self.initialised):
            # Start the gripper in a 10x10 cm area around the neutral position
            eef_pos = self.sim.data.get_body_xpos('right_hand')
            noise = self.np_random.uniform(-1, 1, 3) * np.array([0.10, 0.10, 0.0])
            init_pos = eef_pos + noise
            init_pose = T.make_pose(init_pos, np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 0.0], [0.0, 0.0, -1.0]]))

            # Start the IK search from the rest qpos
            ref_q = self.mujoco_robot.init_qpos

            # Express init_pose in the base frame of the robot
            init_pose_in_base = self.pose_in_base(init_pose)

            # Do the IK
            joint_angles = self.IK_solver.compute_joint_angles_for_endpoint_pose(init_pose_in_base, ref_q)

            # Set the robot joint angles
            self.set_robot_joint_positions(joint_angles)
            self.sim.forward()

    def reward(self, action=None):
        """
        Reward function for the task.

        The dense reward has three components.

            Reaching: in [-inf, 0], to encourage the arm to reach the object
            Goal Distance: in [-inf, 0] the distance between the pushed object and the goal

        The sparse reward only receives a {0,1} upon reaching the goal

        Args:
            action (np array): The action taken in that timestep

        Returns:
            reward (float or dict): the reward if sparce rewards are used otherwise a dictionary
            with the total reward, and the subcoponents of the dense reward.
        """
        reward = 0.

        # sparse completion reward
        if not self.reward_shaping and self._check_success():
            reward = 1.0

        # use a dense reward
        if self.reward_shaping:
            # max joint angles reward
            joint_limits = self._joint_ranges
            current_joint_pos = self._joint_positions

            hitting_limits_reward = - np.sum([(x < joint_limits[i, 0] + 0.1 or x > joint_limits[i, 1] - 0.1) for i, x in
                                              enumerate(current_joint_pos)])

            reward += hitting_limits_reward

            # reaching reward
            goal_pos = self.sim.data.site_xpos[self.goal_site_id]
            goal_pos_actual = goal_pos + np.array([0., 0., 0.025 + self.gripper_size])

            eef_pos = self.sim.data.get_body_xpos("right_hand")
            dist = np.linalg.norm(eef_pos - goal_pos_actual)
            reaching_reward = -dist
            reward += reaching_reward

            # Hitting the table reward
            hitting_the_table_reward = 0.0
            hitting_the_table = self._check_contact_with("table_collision")
            if (hitting_the_table):
                hitting_the_table_reward -= 1.0

            reward += hitting_the_table_reward

            # Success Reward
            success = self._check_success()
            if (success):
                reward += 0.1

            # Return all three types of rewards
            reward = {"reward": reward, "reaching_distance": reaching_reward,
                      "hitting_limits_reward": hitting_limits_reward,
                      "hitting_the_table_reward": hitting_the_table_reward,
                      "unstable":False}

        return reward

    def _check_success(self):
        """
        Returns True if task has been completed.
        """
        eef_pos = self.sim.data.get_body_xpos("right_hand")
        goal_pos = self.sim.data.site_xpos[self.goal_site_id]
        goal_pos_actual = goal_pos + np.array([0., 0., 0.025 + self.gripper_size])

        dist = np.linalg.norm(goal_pos_actual - eef_pos)
        success_radius = self.success_radius

        # object centre is within the goal radius
        return dist < success_radius

    def _pre_action(self, action):
        """ Takes the action, randomised the control timestep, and adds some additional random noise to the action."""
        # Change control timestep to simulate various random time delays
        timestep_parameter = self.dynamics_parameters['timestep_parameter']
        self.control_timestep = self.init_control_timestep + self.np_random.exponential(scale=timestep_parameter)

        super()._pre_action(action)


        # Adding forces
        self.sim.data.qfrc_applied[
            self._ref_joint_vel_indexes
        ] += self.dynamics_parameters['joint_forces'] * self.np_random.uniform(-1, 1, 7)

        self.sim.data.xfrc_applied[
            self._ref_gripper_body_indx
        ] = self.dynamics_parameters['eef_forces'] * self.np_random.uniform(-1, 1, 6)


        # Adding force proportional to acceleration
        self.sim.data.qfrc_applied[
            self._ref_joint_vel_indexes
        ] +=  self.dynamics_parameters['acceleration_forces']  * self.sim.data.qacc[
            self._ref_joint_vel_indexes
        ]


    def _post_action(self, action):
        """
        Add dense reward subcomponents to info
        """
        reward, done, info = super()._post_action(action)

        if self.reward_shaping:
            info = reward
            reward = reward["reward"]

        info["success"] = self._check_success()

        return reward, done, info



    def _get_observation(self):
        """
        Returns an OrderedDict containing observations [(name_string, np.array), ...].

        Important keys:
            robot-state: contains robot-centric information.
            object-state: requires @self.use_object_obs to be True.
                contains object-centric information.
            image: requires @self.use_camera_obs to be True.
                contains a rendered frame from the simulation.
            depth: requires @self.use_camera_obs and @self.camera_depth to be True.
                contains a rendered depth map from the simulation
        """
        di = OrderedDict()
        # camera observations
        if self.use_camera_obs:
            camera_obs = self.sim.render(
                camera_name=self.camera_name,
                width=self.camera_width,
                height=self.camera_height,
                depth=self.camera_depth,
            )
            if self.camera_depth:
                di["image"], di["depth"] = camera_obs
            else:
                di["image"] = camera_obs

        # low-level object information
        if self.use_object_obs:
            # Extract position and velocity of the eef
            eef_pos_in_world = self.sim.data.get_body_xpos("right_hand")
            eef_xvelp_in_world = self.sim.data.get_body_xvelp("right_hand")

            # Apply time delays
            eef_pos_in_world = self._apply_time_delay(eef_pos_in_world, self.eef_pos_queue)
            eef_xvelp_in_world = self._apply_time_delay(eef_xvelp_in_world, self.eef_vel_queue)

            # Add random noise to the observations
            position_noise = self.dynamics_parameters['eef_obs_position_noise']
            velocity_noise = self.dynamics_parameters['eef_obs_velocity_noise']

            eef_pos_in_world = eef_pos_in_world + self.np_random.normal(loc=0., scale=position_noise)
            eef_xvelp_in_world = eef_xvelp_in_world + self.np_random.normal(loc=0., scale=velocity_noise)

            # Get the goal position in the world
            goal_pos_in_world = np.array(self.sim.data.site_xpos[self.goal_site_id])

            # Correct for the fact that in the real robot we record the eef position at the goal as the observation
            goal_pos_in_world = goal_pos_in_world + np.array([0., 0., self.gripper_size])

            # Get  object to goal vectors in EEF frame
            eef_to_goal_in_world = goal_pos_in_world - eef_pos_in_world
            eef_to_goal_in_eef = self.world_rot_in_eef.dot(eef_to_goal_in_world)

            eef_xvelp_in_eef = self.world_rot_in_eef.dot(eef_xvelp_in_world)

            # Record observations into a dictionary
            di["eef_pos_in_world"] = eef_pos_in_world
            di["eef_vel_in_world"] = eef_xvelp_in_world
            di["goal_pos_in_world"] = goal_pos_in_world
            di["task-state"] = np.concatenate([eef_to_goal_in_eef, eef_xvelp_in_eef])

        return di

    def _apply_time_delay(self, object, queue):
        queue.appendleft(copy.deepcopy(object))

        if (len(queue) == queue.maxlen):
            return queue.pop()
        else:
            return queue[-1]


    def _check_contact(self):
        """
        Returns True if gripper is in contact with an object.
        """
        collision = False
        for contact in self.sim.data.contact[: self.sim.data.ncon]:
            if (
                    self.sim.model.geom_id2name(contact.geom1)
                    in self.gripper.contact_geoms()
                    or self.sim.model.geom_id2name(contact.geom2)
                    in self.gripper.contact_geoms()
            ):
                collision = True
                break
        return collision

    def _check_contact_with(self, object):
        """
        Returns True if gripper is in contact with an object.
        """
        collision = False
        for contact in self.sim.data.contact[: self.sim.data.ncon]:
            if (
                    (self.sim.model.geom_id2name(contact.geom1) in self.gripper.contact_geoms()
                     and contact.geom2 == self.sim.model.geom_name2id(object))

                    or (self.sim.model.geom_id2name(contact.geom2) in self.gripper.contact_geoms()
                        and contact.geom1 == self.sim.model.geom_name2id(object))
            ):
                collision = True
                break
        return collision



    def _gripper_visualization(self):
        """
        Do any needed visualization here. Overrides superclass implementations.
        """

        # color the gripper site appropriately based on distance to object
        if self.gripper_visualization:
            # get distance to object
            object_site_id = self.sim.model.site_name2id(self.model.object_names[self.model.push_object_idx])
            dist = np.sum(
                np.square(
                    self.sim.data.site_xpos[object_site_id]
                    - self.sim.data.get_site_xpos("grip_site")
                )
            )

            # set RGBA for the EEF site here
            max_dist = 0.1
            scaled = (1.0 - min(dist / max_dist, 1.)) ** 15
            rgba = np.zeros(4)
            rgba[0] = 1 - scaled
            rgba[1] = scaled
            rgba[3] = 0.5

            self.sim.model.site_rgba[self.eef_site_id] = rgba
예제 #25
0
    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()

        # Adjust base pose(s) accordingly
        if self.env_configuration == "bimanual":
            xpos = self.robots[0].robot_model.base_xpos_offset["table"](
                self.table_full_size[0])
            self.robots[0].robot_model.set_base_xpos(xpos)
        else:
            if self.env_configuration == "single-arm-opposed":
                # Set up robots facing towards each other by rotating them from their default position
                for robot, rotation, offset in zip(self.robots,
                                                   (np.pi / 2, -np.pi / 2),
                                                   (-0.25, 0.25)):
                    xpos = robot.robot_model.base_xpos_offset["table"](
                        self.table_full_size[0])
                    rot = np.array((0, 0, rotation))
                    xpos = T.euler2mat(rot) @ np.array(xpos)
                    xpos += np.array((0, offset, 0))
                    robot.robot_model.set_base_xpos(xpos)
                    robot.robot_model.set_base_ori(rot)
            else:  # "single-arm-parallel" configuration setting
                # Set up robots parallel to each other but offset from the center
                for robot, offset in zip(self.robots, (-0.6, 0.6)):
                    xpos = robot.robot_model.base_xpos_offset["table"](
                        self.table_full_size[0])
                    xpos = np.array(xpos) + np.array((0, offset, 0))
                    robot.robot_model.set_base_xpos(xpos)

        # load model for table top workspace
        mujoco_arena = TableArena(table_full_size=self.table_true_size,
                                  table_friction=self.table_friction,
                                  table_offset=self.table_offset)

        # Arena always gets set to zero origin
        mujoco_arena.set_origin([0, 0, 0])

        # Modify default agentview camera
        mujoco_arena.set_camera(camera_name="agentview",
                                pos=[
                                    0.8894354364730311, -3.481824231498976e-08,
                                    1.7383813133506494
                                ],
                                quat=[
                                    0.6530981063842773, 0.2710406184196472,
                                    0.27104079723358154, 0.6530979871749878
                                ])

        # initialize objects of interest
        self.hammer = HammerObject(name="hammer")

        # Create placement initializer
        if self.placement_initializer is not None:
            self.placement_initializer.reset()
            self.placement_initializer.add_objects(self.hammer)
        else:
            # Set rotation about y-axis if hammer starts on table else rotate about z if it starts in gripper
            rotation_axis = 'y' if self.prehensile else 'z'
            self.placement_initializer = UniformRandomSampler(
                name="ObjectSampler",
                mujoco_objects=self.hammer,
                x_range=[-0.1, 0.1],
                y_range=[-0.05, 0.05],
                rotation=None,
                rotation_axis=rotation_axis,
                ensure_object_boundary_in_range=False,
                ensure_valid_placement=True,
                reference_pos=self.table_offset,
            )

        # task includes arena, robot, and objects of interest
        self.model = ManipulationTask(
            mujoco_arena=mujoco_arena,
            mujoco_robots=[robot.robot_model for robot in self.robots],
            mujoco_objects=self.hammer,
        )
    def _load_model(self):
        """
        Loads an xml model, puts it in self.model. This sets up the mujoco xml for the scene.
        """
        super()._load_model()
        self.mujoco_robot.set_base_xpos([0, 0, 0])

        ### Domain Randomisation ###
        if (self.initialised):
            for key, val in self._parameter_for_randomisation_generator(parameters=self.parameters_to_randomise):
                self.dynamics_parameters[key] = val

            ## Queues for adding time delays
            self.eef_pos_queue = deque(maxlen=int(self.dynamics_parameters['eef_timedelay'] + 1))
            self.eef_vel_queue = deque(maxlen=int(self.dynamics_parameters['eef_timedelay'] + 1))

            if (self.pid is not None):
                self.pid.sample_time = self.dynamics_parameters['pid_iteration_time']

        ### Create the Task ###
        ## Load the Arena ##
        self.mujoco_arena = TableArena(
            table_full_size=self.table_full_size, table_friction=(0.1, 0.001, 0.001)
        )
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # The sawyer robot has a pedestal, we want to align it with the table
        self.mujoco_arena.set_origin([0.16 + self.table_full_size[0] / 2, 0, 0])

        goal = BoxObject(size=[0.023 / 2, 0.023 / 2, 0.001 / 2],
                         rgba=[0, 1, 0, 1], )

        ## Put everything together into the task ##
        self.model = ReachTask(self.mujoco_arena, self.mujoco_robot, goal)

        ### Set the goal position ###
        # Gripper position at neutral
        gripper_pos_neutral = [0.44969246, 0.16029991, 1.00875409]

        if(self.specific_goal_position is not None):
            init_pos = np.array([gripper_pos_neutral[0] +self.specific_goal_position[0],
                                       gripper_pos_neutral[1] + self.specific_goal_position[1],
                                       self.model.table_top_offset[2]])
            init_pos = array_to_string(init_pos)


        elif (self.randomise_initial_conditions):
            # Place goal in a 20x20cm square directly below the eef neutral position.
            noise = self.np_random.uniform(-1, 1, 3) * np.array([0.20, 0.20, 0.0])
            offset = np.array([gripper_pos_neutral[0], gripper_pos_neutral[1], self.model.table_top_offset[2]])
            init_pos = array_to_string(noise + offset)
        else:
            init_pos = np.concatenate([gripper_pos_neutral[:2], [self.model.table_top_offset[2]]])
            init_pos = array_to_string(init_pos)

        self.model.xml_goal.set("pos", init_pos)

        ### Set the xml parameters to the values given by the dynamics_parameters attribute ###
        if (self.initialised):
            self._apply_xml_dynamics_parameters()
예제 #27
0
class PandaLift(PandaEnv):
    """
    This class corresponds to the lifting task for the Panda robot arm.
    """
    def __init__(
        self,
        gripper_type="PandaGripper",
        table_full_size=(0.8, 0.8, 0.8),
        table_friction=(1., 5e-3, 1e-4),
        use_camera_obs=True,
        use_object_obs=True,
        reward_shaping=False,
        placement_initializer=None,
        gripper_visualization=False,
        use_indicator_object=False,
        has_renderer=False,
        has_offscreen_renderer=True,
        render_collision_mesh=False,
        render_visual_mesh=True,
        control_freq=10,
        horizon=1000,
        ignore_done=False,
        camera_name="frontview",
        camera_height=256,
        camera_width=256,
        camera_depth=False,
    ):
        """
        Args:

            gripper_type (str): type of gripper, used to instantiate
                gripper models from gripper factory.

            table_full_size (3-tuple): x, y, and z dimensions of the table.

            table_friction (3-tuple): the three mujoco friction parameters for
                the table.

            use_camera_obs (bool): if True, every observation includes a
                rendered image.

            use_object_obs (bool): if True, include object (cube) information in
                the observation.

            reward_shaping (bool): if True, use dense rewards.

            placement_initializer (ObjectPositionSampler instance): if provided, will
                be used to place objects on every reset, else a UniformRandomSampler
                is used by default.

            gripper_visualization (bool): True if using gripper visualization.
                Useful for teleoperation.

            use_indicator_object (bool): if True, sets up an indicator object that
                is useful for debugging.

            has_renderer (bool): If true, render the simulation state in
                a viewer instead of headless mode.

            has_offscreen_renderer (bool): True if using off-screen rendering.

            render_collision_mesh (bool): True if rendering collision meshes
                in camera. False otherwise.

            render_visual_mesh (bool): True if rendering visual meshes
                in camera. False otherwise.

            control_freq (float): how many control signals to receive
                in every second. This sets the amount of simulation time
                that passes between every action input.

            horizon (int): Every episode lasts for exactly @horizon timesteps.

            ignore_done (bool): True if never terminating the environment (ignore @horizon).

            camera_name (str): name of camera to be rendered. Must be
                set if @use_camera_obs is True.

            camera_height (int): height of camera frame.

            camera_width (int): width of camera frame.

            camera_depth (bool): True if rendering RGB-D, and RGB otherwise.
        """

        # settings for table top
        self.table_full_size = table_full_size
        self.table_friction = table_friction

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # reward configuration
        self.reward_shaping = reward_shaping

        # object placement initializer
        if placement_initializer:
            self.placement_initializer = placement_initializer
        else:
            self.placement_initializer = UniformRandomSampler(
                x_range=[-0.03, 0.03],
                y_range=[-0.03, 0.03],
                ensure_object_boundary_in_range=False,
                z_rotation=True,
            )

        super().__init__(
            gripper_type=gripper_type,
            gripper_visualization=gripper_visualization,
            use_indicator_object=use_indicator_object,
            has_renderer=has_renderer,
            has_offscreen_renderer=has_offscreen_renderer,
            render_collision_mesh=render_collision_mesh,
            render_visual_mesh=render_visual_mesh,
            control_freq=control_freq,
            horizon=horizon,
            ignore_done=ignore_done,
            use_camera_obs=use_camera_obs,
            camera_name=camera_name,
            camera_height=camera_height,
            camera_width=camera_width,
            camera_depth=camera_depth,
        )

    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()
        self.mujoco_robot.set_base_xpos([0, 0, 0])

        # load model for table top workspace
        self.mujoco_arena = TableArena(table_full_size=self.table_full_size,
                                       table_friction=self.table_friction)
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # The panda robot has a pedestal, we want to align it with the table
        self.mujoco_arena.set_origin(
            [0.16 + self.table_full_size[0] / 2, 0, 0])

        # initialize objects of interest
        cube = BoxObject(
            size_min=[0.020, 0.020, 0.020],  # [0.015, 0.015, 0.015],
            size_max=[0.022, 0.022, 0.022],  # [0.018, 0.018, 0.018])
            rgba=[1, 0, 0, 1],
        )
        self.mujoco_objects = OrderedDict([("cube", cube)])

        # task includes arena, robot, and objects of interest
        self.model = TableTopTask(
            self.mujoco_arena,
            self.mujoco_robot,
            self.mujoco_objects,
            initializer=self.placement_initializer,
        )
        self.model.place_objects()

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flatten array, which is how MuJoCo stores physical simulation data.
        """
        super()._get_reference()
        self.cube_body_id = self.sim.model.body_name2id("cube")
        self.l_finger_geom_ids = [
            self.sim.model.geom_name2id(x)
            for x in self.gripper.left_finger_geoms
        ]
        self.r_finger_geom_ids = [
            self.sim.model.geom_name2id(x)
            for x in self.gripper.right_finger_geoms
        ]
        self.cube_geom_id = self.sim.model.geom_name2id("cube")

    def _reset_internal(self):
        """
        Resets simulation internal configurations.
        """
        super()._reset_internal()

        # reset positions of objects
        self.model.place_objects()

        # reset joint positions
        init_pos = self.mujoco_robot.init_qpos
        init_pos += np.random.randn(init_pos.shape[0]) * 0.02
        self.sim.data.qpos[self._ref_joint_pos_indexes] = np.array(init_pos)

    def reward(self, action=None):
        """
        Reward function for the task.

        The dense reward has three components.

            Reaching: in [0, 1], to encourage the arm to reach the cube
            Grasping: in {0, 0.25}, non-zero if arm is grasping the cube
            Lifting: in {0, 1}, non-zero if arm has lifted the cube

        The sparse reward only consists of the lifting component.

        Args:
            action (np array): unused for this task

        Returns:
            reward (float): the reward
        """
        reward = 0.

        # sparse completion reward
        if self._check_success():
            reward = 1.0

        # use a shaping reward
        if self.reward_shaping:

            # reaching reward
            cube_pos = self.sim.data.body_xpos[self.cube_body_id]
            gripper_site_pos = self.sim.data.site_xpos[self.eef_site_id]
            dist = np.linalg.norm(gripper_site_pos - cube_pos)
            reaching_reward = 1 - np.tanh(10.0 * dist)
            reward += reaching_reward

            # grasping reward
            touch_left_finger = False
            touch_right_finger = False
            for i in range(self.sim.data.ncon):
                c = self.sim.data.contact[i]
                if c.geom1 in self.l_finger_geom_ids and c.geom2 == self.cube_geom_id:
                    touch_left_finger = True
                if c.geom1 == self.cube_geom_id and c.geom2 in self.l_finger_geom_ids:
                    touch_left_finger = True
                if c.geom1 in self.r_finger_geom_ids and c.geom2 == self.cube_geom_id:
                    touch_right_finger = True
                if c.geom1 == self.cube_geom_id and c.geom2 in self.r_finger_geom_ids:
                    touch_right_finger = True
            if touch_left_finger and touch_right_finger:
                reward += 0.25

        return reward

    def _get_observation(self):
        """
        Returns an OrderedDict containing observations [(name_string, np.array), ...].

        Important keys:
            robot-state: contains robot-centric information.
            object-state: requires @self.use_object_obs to be True.
                contains object-centric information.
            image: requires @self.use_camera_obs to be True.
                contains a rendered frame from the simulation.
            depth: requires @self.use_camera_obs and @self.camera_depth to be True.
                contains a rendered depth map from the simulation
        """
        di = super()._get_observation()
        # camera observations
        if self.use_camera_obs:
            camera_obs = self.sim.render(
                camera_name=self.camera_name,
                width=self.camera_width,
                height=self.camera_height,
                depth=self.camera_depth,
            )
            if self.camera_depth:
                di["image"], di["depth"] = camera_obs
            else:
                di["image"] = camera_obs

        # low-level object information
        if self.use_object_obs:
            # position and rotation of object
            cube_pos = np.array(self.sim.data.body_xpos[self.cube_body_id])
            cube_quat = convert_quat(np.array(
                self.sim.data.body_xquat[self.cube_body_id]),
                                     to="xyzw")
            di["cube_pos"] = cube_pos
            di["cube_quat"] = cube_quat

            gripper_site_pos = np.array(
                self.sim.data.site_xpos[self.eef_site_id])
            di["gripper_to_cube"] = gripper_site_pos - cube_pos

            di["object-state"] = np.concatenate(
                [cube_pos, cube_quat, di["gripper_to_cube"]])

        return di

    def _check_contact(self):
        """
        Returns True if gripper is in contact with an object.
        """
        collision = False
        for contact in self.sim.data.contact[:self.sim.data.ncon]:
            if (self.sim.model.geom_id2name(
                    contact.geom1) in self.gripper.contact_geoms()
                    or self.sim.model.geom_id2name(
                        contact.geom2) in self.gripper.contact_geoms()):
                collision = True
                break
        return collision

    def _check_success(self):
        """
        Returns True if task has been completed.
        """
        cube_height = self.sim.data.body_xpos[self.cube_body_id][2]
        table_height = self.table_full_size[2]

        # cube is higher than the table top above a margin
        return cube_height > table_height + 0.04

    def _gripper_visualization(self):
        """
        Do any needed visualization here. Overrides superclass implementations.
        """

        # color the gripper site appropriately based on distance to cube
        if self.gripper_visualization:
            # get distance to cube
            cube_site_id = self.sim.model.site_name2id("cube")
            dist = np.sum(
                np.square(self.sim.data.site_xpos[cube_site_id] -
                          self.sim.data.get_site_xpos("grip_site")))

            # set RGBA for the EEF site here
            max_dist = 0.1
            scaled = (1.0 - min(dist / max_dist, 1.))**15
            rgba = np.zeros(4)
            rgba[0] = 1 - scaled
            rgba[1] = scaled
            rgba[3] = 0.5

            self.sim.model.site_rgba[self.eef_site_id] = rgba
예제 #28
0
class TwoArmHandover(RobotEnv):
    """
    This class corresponds to the handover task for two robot arms.

    Args:
        robots (str or list of str): Specification for specific robot arm(s) to be instantiated within this env
            (e.g: "Sawyer" would generate one arm; ["Panda", "Panda", "Sawyer"] would generate three robot arms)
            Note: Must be either 2 single single-arm robots or 1 bimanual robot!

        env_configuration (str): Specifies how to position the robots within the environment. Can be either:

            :`'bimanual'`: Only applicable for bimanual robot setups. Sets up the (single) bimanual robot on the -x
                side of the table
            :`'single-arm-parallel'`: Only applicable for multi single arm setups. Sets up the (two) single armed
                robots next to each other on the -x side of the table
            :`'single-arm-opposed'`: Only applicable for multi single arm setups. Sets up the (two) single armed
                robots opposed from each others on the opposite +/-y sides of the table (Default option)

        controller_configs (str or list of dict): If set, contains relevant controller parameters for creating a
            custom controller. Else, uses the default controller for this specific task. Should either be single
            dict if same controller is to be used for all robots or else it should be a list of the same length as
            "robots" param

        gripper_types (str or list of str): type of gripper, used to instantiate
            gripper models from gripper factory. Default is "default", which is the default grippers(s) associated
            with the robot(s) the 'robots' specification. None removes the gripper, and any other (valid) model
            overrides the default gripper. Should either be single str if same gripper type is to be used for all
            robots or else it should be a list of the same length as "robots" param

        gripper_visualizations (bool or list of bool): True if using gripper visualization.
            Useful for teleoperation. Should either be single bool if gripper visualization is to be used for all
            robots or else it should be a list of the same length as "robots" param

        initialization_noise (dict or list of dict): Dict containing the initialization noise parameters.
            The expected keys and corresponding value types are specified below:

            :`'magnitude'`: The scale factor of uni-variate random noise applied to each of a robot's given initial
                joint positions. Setting this value to `None` or 0.0 results in no noise being applied.
                If "gaussian" type of noise is applied then this magnitude scales the standard deviation applied,
                If "uniform" type of noise is applied then this magnitude sets the bounds of the sampling range
            :`'type'`: Type of noise to apply. Can either specify "gaussian" or "uniform"

            Should either be single dict if same noise value is to be used for all robots or else it should be a
            list of the same length as "robots" param

            :Note: Specifying "default" will automatically use the default noise settings.
                Specifying None will automatically create the required dict with "magnitude" set to 0.0.

        prehensile (bool): If true, handover object starts on the table. Else, the object starts in Arm0's gripper

        table_full_size (3-tuple): x, y, and z dimensions of the table.

        table_friction (3-tuple): the three mujoco friction parameters for
            the table.

        use_camera_obs (bool): if True, every observation includes rendered image(s)

        use_object_obs (bool): if True, include object (cube) information in
            the observation.

        reward_scale (None or float): Scales the normalized reward function by the amount specified.
            If None, environment reward remains unnormalized

        reward_shaping (bool): if True, use dense rewards.

        placement_initializer (ObjectPositionSampler instance): if provided, will
            be used to place objects on every reset, else a UniformRandomSampler
            is used by default.

        use_indicator_object (bool): if True, sets up an indicator object that
            is useful for debugging.

        has_renderer (bool): If true, render the simulation state in
            a viewer instead of headless mode.

        has_offscreen_renderer (bool): True if using off-screen rendering

        render_camera (str): Name of camera to render if `has_renderer` is True. Setting this value to 'None'
            will result in the default angle being applied, which is useful as it can be dragged / panned by
            the user using the mouse

        render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise.

        render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise.

        control_freq (float): how many control signals to receive in every second. This sets the amount of
            simulation time that passes between every action input.

        horizon (int): Every episode lasts for exactly @horizon timesteps.

        ignore_done (bool): True if never terminating the environment (ignore @horizon).

        hard_reset (bool): If True, re-loads model, sim, and render object upon a reset call, else,
            only calls sim.reset and resets all robosuite-internal variables

        camera_names (str or list of str): name of camera to be rendered. Should either be single str if
            same name is to be used for all cameras' rendering or else it should be a list of cameras to render.

            :Note: At least one camera must be specified if @use_camera_obs is True.

            :Note: To render all robots' cameras of a certain type (e.g.: "robotview" or "eye_in_hand"), use the
                convention "all-{name}" (e.g.: "all-robotview") to automatically render all camera images from each
                robot's camera list).

        camera_heights (int or list of int): height of camera frame. Should either be single int if
            same height is to be used for all cameras' frames or else it should be a list of the same length as
            "camera names" param.

        camera_widths (int or list of int): width of camera frame. Should either be single int if
            same width is to be used for all cameras' frames or else it should be a list of the same length as
            "camera names" param.

        camera_depths (bool or list of bool): True if rendering RGB-D, and RGB otherwise. Should either be single
            bool if same depth setting is to be used for all cameras or else it should be a list of the same length as
            "camera names" param.

    Raises:
        ValueError: [Invalid number of robots specified]
        ValueError: [Invalid env configuration]
        ValueError: [Invalid robots for specified env configuration]
    """
    def __init__(
        self,
        robots,
        env_configuration="single-arm-opposed",
        controller_configs=None,
        gripper_types="default",
        gripper_visualizations=False,
        initialization_noise="default",
        prehensile=True,
        table_full_size=(0.8, 1.2, 0.05),
        table_friction=(1., 5e-3, 1e-4),
        use_camera_obs=True,
        use_object_obs=True,
        reward_scale=1.0,
        reward_shaping=False,
        placement_initializer=None,
        use_indicator_object=False,
        has_renderer=False,
        has_offscreen_renderer=True,
        render_camera="frontview",
        render_collision_mesh=False,
        render_visual_mesh=True,
        control_freq=10,
        horizon=1000,
        ignore_done=False,
        hard_reset=True,
        camera_names="agentview",
        camera_heights=256,
        camera_widths=256,
        camera_depths=False,
    ):
        # First, verify that correct number of robots are being inputted
        self.env_configuration = env_configuration
        self._check_robot_configuration(robots)

        # Task settings
        self.prehensile = prehensile

        # settings for table top
        self.table_full_size = table_full_size
        self.table_true_size = list(table_full_size)
        self.table_true_size[
            1] *= 0.25  # true size will only be partially wide
        self.table_friction = table_friction
        self.table_offset = [0, self.table_full_size[1] * (-3 / 8), 0.8]

        # reward configuration
        self.reward_scale = reward_scale
        self.reward_shaping = reward_shaping
        self.height_threshold = 0.1  # threshold above the table surface which the hammer is considered lifted

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # object placement initializer
        if placement_initializer:
            self.placement_initializer = placement_initializer
        else:
            # Set rotation about y-axis if hammer starts on table else rotate about z if it starts in gripper
            rotation_axis = 'y' if self.prehensile else 'z'
            self.placement_initializer = UniformRandomSampler(
                x_range=[-0.1, 0.1],
                y_range=[-0.05, 0.05],
                ensure_object_boundary_in_range=False,
                rotation=None,
                rotation_axis=rotation_axis,
            )

        super().__init__(
            robots=robots,
            controller_configs=controller_configs,
            gripper_types=gripper_types,
            gripper_visualizations=gripper_visualizations,
            initialization_noise=initialization_noise,
            use_camera_obs=use_camera_obs,
            use_indicator_object=use_indicator_object,
            has_renderer=has_renderer,
            has_offscreen_renderer=has_offscreen_renderer,
            render_camera=render_camera,
            render_collision_mesh=render_collision_mesh,
            render_visual_mesh=render_visual_mesh,
            control_freq=control_freq,
            horizon=horizon,
            ignore_done=ignore_done,
            hard_reset=hard_reset,
            camera_names=camera_names,
            camera_heights=camera_heights,
            camera_widths=camera_widths,
            camera_depths=camera_depths,
        )

    def reward(self, action=None):
        """
        Reward function for the task.

        Sparse un-normalized reward:

            - a discrete reward of 2.0 is provided when only Arm 1 is gripping the handle and has the handle
              lifted above a certain threshold

        Un-normalized max-wise components if using reward shaping:

            - Arm0 Reaching: (1) in [0, 0.25] proportional to the distance between Arm 0 and the handle
            - Arm0 Grasping: (2) in {0, 0.5}, nonzero if Arm 0 is gripping the hammer (any part).
            - Arm0 Lifting: (3) in {0, 1.0}, nonzero if Arm 0 lifts the handle from the table past a certain threshold
            - Arm0 Hovering: (4) in {0, [1.0, 1.25]}, nonzero only if Arm0 is actively lifting the hammer, and is
              proportional to the distance between the handle and Arm 1
              conditioned on the handle being lifted from the table and being grasped by Arm 0
            - Mutual Grasping: (5) in {0, 1.5}, nonzero if both Arm 0 and Arm 1 are gripping the hammer (Arm 1 must be
              gripping the handle) while lifted above the table
            - Handover: (6) in {0, 2.0}, nonzero when only Arm 1 is gripping the handle and has the handle
              lifted above the table

        Note that the final reward is normalized and scaled by reward_scale / 2.0 as
        well so that the max score is equal to reward_scale

        Args:
            action (np array): [NOT USED]

        Returns:
            float: reward value
        """
        # Initialize reward
        reward = 0

        # use a shaping reward if specified
        if self.reward_shaping:
            # Grab relevant parameters
            arm0_grasp_any, arm1_grasp_handle, hammer_height, table_height = self._get_task_info(
            )
            # First, we'll consider the cases if the hammer is lifted above the threshold (step 3 - 6)
            if hammer_height - table_height > self.height_threshold:
                # Split cases depending on whether arm1 is currently grasping the handle or not
                if arm1_grasp_handle:
                    # Check if arm0 is grasping
                    if arm0_grasp_any:
                        # This is step 5
                        reward = 1.5
                    else:
                        # This is step 6 (completed task!)
                        reward = 2.0
                # This is the case where only arm0 is grasping (step 2-3)
                else:
                    reward = 1.0
                    # Add in up to 0.25 based on distance between handle and arm1
                    dist = np.linalg.norm(self._gripper_1_to_handle)
                    reaching_reward = 0.25 * (1 - np.tanh(1.0 * dist))
                    reward += reaching_reward
            # Else, the hammer is still on the ground ):
            else:
                # Split cases depending on whether arm0 is currently grasping the handle or not
                if arm0_grasp_any:
                    # This is step 2
                    reward = 0.5
                else:
                    # This is step 1, we want to encourage arm0 to reach for the handle
                    dist = np.linalg.norm(self._gripper_0_to_handle)
                    reaching_reward = 0.25 * (1 - np.tanh(1.0 * dist))
                    reward = reaching_reward

        # Else this is the sparse reward setting
        else:
            # Provide reward if only Arm 1 is grasping the hammer and the handle lifted above the pre-defined threshold
            if self._check_success():
                reward = 2.0

        if self.reward_scale is not None:
            reward *= self.reward_scale / 2.0

        return reward

    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()

        # Adjust base pose(s) accordingly
        if self.env_configuration == "bimanual":
            xpos = self.robots[0].robot_model.base_xpos_offset["table"](
                self.table_full_size[0])
            self.robots[0].robot_model.set_base_xpos(xpos)
        else:
            if self.env_configuration == "single-arm-opposed":
                # Set up robots facing towards each other by rotating them from their default position
                for robot, rotation, offset in zip(self.robots,
                                                   (np.pi / 2, -np.pi / 2),
                                                   (-0.25, 0.25)):
                    xpos = robot.robot_model.base_xpos_offset["table"](
                        self.table_full_size[0])
                    rot = np.array((0, 0, rotation))
                    xpos = T.euler2mat(rot) @ np.array(xpos)
                    xpos += np.array((0, offset, 0))
                    robot.robot_model.set_base_xpos(xpos)
                    robot.robot_model.set_base_ori(rot)
            else:  # "single-arm-parallel" configuration setting
                # Set up robots parallel to each other but offset from the center
                for robot, offset in zip(self.robots, (-0.6, 0.6)):
                    xpos = robot.robot_model.base_xpos_offset["table"](
                        self.table_full_size[0])
                    xpos = np.array(xpos) + np.array((0, offset, 0))
                    robot.robot_model.set_base_xpos(xpos)

        # load model for table top workspace
        self.mujoco_arena = TableArena(table_full_size=self.table_true_size,
                                       table_friction=self.table_friction,
                                       table_offset=self.table_offset)
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # Arena always gets set to zero origin
        self.mujoco_arena.set_origin([0, 0, 0])

        # initialize objects of interest
        self.hammer = HammerObject(name="hammer")
        self.mujoco_objects = OrderedDict([("hammer", self.hammer)])

        # task includes arena, robot, and objects of interest
        self.model = ManipulationTask(
            mujoco_arena=self.mujoco_arena,
            mujoco_robots=[robot.robot_model for robot in self.robots],
            mujoco_objects=self.mujoco_objects,
            visual_objects=None,
            initializer=self.placement_initializer,
        )
        self.model.place_objects()

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flatten array, which is how MuJoCo stores physical simulation data.
        """
        super()._get_reference()

        # Hammer object references from this env
        self.hammer_body_id = self.sim.model.body_name2id("hammer")
        self.hammer_handle_geom_id = self.sim.model.geom_name2id(
            "hammer_handle")
        self.hammer_head_geom_id = self.sim.model.geom_name2id("hammer_head")
        self.hammer_face_geom_id = self.sim.model.geom_name2id("hammer_face")
        self.hammer_claw_geom_id = self.sim.model.geom_name2id("hammer_claw")

        # General env references
        self.table_top_id = self.sim.model.site_name2id("table_top")

    def _reset_internal(self):
        """
        Resets simulation internal configurations.
        """
        super()._reset_internal()

        # Reset all object positions using initializer sampler if we're not directly loading from an xml
        if not self.deterministic_reset:

            # Sample from the placement initializer for all objects
            obj_pos, obj_quat = self.model.place_objects()

            # Loop through all objects and reset their positions
            for i, (obj_name, _) in enumerate(self.mujoco_objects.items()):
                # If prehensile, set the object normally
                if self.prehensile:
                    self.sim.data.set_joint_qpos(
                        obj_name + "_jnt0",
                        np.concatenate(
                            [np.array(obj_pos[i]),
                             np.array(obj_quat[i])]))
                # Else, set the object in the hand of the robot and loop a few steps to guarantee the robot is grasping
                #   the object initially
                else:
                    eef_rot_quat = T.mat2quat(
                        T.euler2mat(
                            [np.pi - T.mat2euler(self._eef0_xmat)[2], 0, 0]))
                    obj_quat[i] = T.quat_multiply(obj_quat[i], eef_rot_quat)
                    for j in range(100):
                        # Set object in hand
                        self.sim.data.set_joint_qpos(
                            obj_name + "_jnt0",
                            np.concatenate(
                                [self._eef0_xpos,
                                 np.array(obj_quat[i])]))
                        # Close gripper (action = 1) and prevent arm from moving
                        if self.env_configuration == 'bimanual':
                            # Execute no-op action with gravity compensation
                            torques = np.concatenate([
                                self.robots[0].controller["right"].
                                torque_compensation, self.robots[0].
                                controller["left"].torque_compensation
                            ])
                            self.sim.data.ctrl[self.robots[
                                0]._ref_joint_torq_actuator_indexes] = torques
                            # Execute gripper action
                            self.robots[0].grip_action([1], "right")
                        else:
                            # Execute no-op action with gravity compensation
                            self.sim.data.ctrl[self.robots[0]._ref_joint_torq_actuator_indexes] =\
                                self.robots[0].controller.torque_compensation
                            self.sim.data.ctrl[self.robots[1]._ref_joint_torq_actuator_indexes] = \
                                self.robots[1].controller.torque_compensation
                            # Execute gripper action
                            self.robots[0].grip_action([1])
                        # Take forward step
                        self.sim.step()

    def _get_task_info(self):
        """
        Helper function that grabs the current relevant locations of objects of interest within the environment

        Returns:
            4-tuple:

                - (bool) True if Arm0 is grasping any part of the hammer
                - (bool) True if Arm1 is grasping the hammer handle
                - (float) Height of the hammer body
                - (float) Height of the table surface
        """
        # Get height of hammer and table and define height threshold
        hammer_angle_offset = (self.hammer.handle_length / 2 +
                               2 * self.hammer.head_halfsize) * np.sin(
                                   self._hammer_angle)
        hammer_height = self.sim.data.geom_xpos[self.hammer_handle_geom_id][2]\
            - self.hammer.get_top_offset()[2]\
            - hammer_angle_offset
        table_height = self.sim.data.site_xpos[self.table_top_id][2]

        # Check if any Arm's gripper is grasping the hammer handle

        # Single bimanual robot setting
        if self.env_configuration == "bimanual":
            _contacts_0_lf = len(
                list(
                    self.find_contacts(
                        self.robots[0].gripper["left"].
                        important_geoms["left_finger"],
                        self.hammer.all_geoms))) > 0
            _contacts_0_rf = len(
                list(
                    self.find_contacts(
                        self.robots[0].gripper["left"].
                        important_geoms["right_finger"],
                        self.hammer.all_geoms))) > 0
            _contacts_1_lf = len(
                list(
                    self.find_contacts(
                        self.robots[0].gripper["right"].
                        important_geoms["left_finger"],
                        self.hammer.handle_geoms))) > 0
            _contacts_1_rf = len(
                list(
                    self.find_contacts(
                        self.robots[0].gripper["right"].
                        important_geoms["right_finger"],
                        self.hammer.handle_geoms))) > 0
        # Multi single arm setting
        else:
            _contacts_0_lf = len(
                list(
                    self.find_contacts(
                        self.robots[0].gripper.important_geoms["left_finger"],
                        self.hammer.all_geoms))) > 0
            _contacts_0_rf = len(
                list(
                    self.find_contacts(
                        self.robots[0].gripper.important_geoms["right_finger"],
                        self.hammer.all_geoms))) > 0
            _contacts_1_lf = len(
                list(
                    self.find_contacts(
                        self.robots[1].gripper.important_geoms["left_finger"],
                        self.hammer.handle_geoms))) > 0
            _contacts_1_rf = len(
                list(
                    self.find_contacts(
                        self.robots[1].gripper.important_geoms["right_finger"],
                        self.hammer.handle_geoms))) > 0

        arm0_grasp_any = True if _contacts_0_lf and _contacts_0_rf else False
        arm1_grasp_handle = True if _contacts_1_lf and _contacts_1_rf else False

        # Return all relevant values
        return arm0_grasp_any, arm1_grasp_handle, hammer_height, table_height

    def _get_observation(self):
        """
        Returns an OrderedDict containing observations [(name_string, np.array), ...].

        Important keys:

            `'robot-state'`: contains robot-centric information.

            `'object-state'`: requires @self.use_object_obs to be True. Contains object-centric information.

            `'image'`: requires @self.use_camera_obs to be True. Contains a rendered frame from the simulation.

            `'depth'`: requires @self.use_camera_obs and @self.camera_depth to be True.
            Contains a rendered depth map from the simulation

        Returns:
            OrderedDict: Observations from the environment
        """
        di = super()._get_observation()

        # low-level object information
        if self.use_object_obs:
            # Get robot prefix
            if self.env_configuration == "bimanual":
                pr0 = self.robots[0].robot_model.naming_prefix + "right_"
                pr1 = self.robots[0].robot_model.naming_prefix + "left_"
            else:
                pr0 = self.robots[0].robot_model.naming_prefix
                pr1 = self.robots[1].robot_model.naming_prefix

            # position and rotation of hammer
            di["hammer_pos"] = np.array(self._hammer_pos)
            di["hammer_quat"] = np.array(self._hammer_quat)
            di["handle_xpos"] = np.array(self._handle_xpos)

            di[pr0 + "eef_xpos"] = np.array(self._eef0_xpos)
            di[pr1 + "eef_xpos"] = np.array(self._eef1_xpos)
            di[pr0 + "gripper_to_handle"] = np.array(self._gripper_0_to_handle)
            di[pr1 + "gripper_to_handle"] = np.array(self._gripper_1_to_handle)

            di["object-state"] = np.concatenate([
                di["hammer_pos"],
                di["hammer_quat"],
                di["handle_xpos"],
                di[pr0 + "eef_xpos"],
                di[pr1 + "eef_xpos"],
                di[pr0 + "gripper_to_handle"],
                di[pr1 + "gripper_to_handle"],
            ])

        return di

    def _check_success(self):
        """
        Check if hammer is successfully handed off

        Returns:
            bool: True if handover has been completed
        """
        # Grab relevant params
        arm0_grasp_any, arm1_grasp_handle, hammer_height, table_height = self._get_task_info(
        )
        return \
            True if \
            arm1_grasp_handle and \
            not arm0_grasp_any and \
            hammer_height - table_height > self.height_threshold \
            else False

    def _check_robot_configuration(self, robots):
        """
        Sanity check to make sure the inputted robots and configuration is acceptable

        Args:
            robots (str or list of str): Robots to instantiate within this env
        """
        robots = robots if type(robots) == list or type(robots) == tuple else [
            robots
        ]
        if self.env_configuration == "single-arm-opposed" or self.env_configuration == "single-arm-parallel":
            # Specifically two robots should be inputted!
            is_bimanual = False
            if type(robots) is not list or len(robots) != 2:
                raise ValueError(
                    "Error: Exactly two single-armed robots should be inputted "
                    "for this task configuration!")
        elif self.env_configuration == "bimanual":
            is_bimanual = True
            # Specifically one robot should be inputted!
            if type(robots) is list and len(robots) != 1:
                raise ValueError(
                    "Error: Exactly one bimanual robot should be inputted "
                    "for this task configuration!")
        else:
            # This is an unknown env configuration, print error
            raise ValueError(
                "Error: Unknown environment configuration received. Only 'bimanual',"
                "'single-arm-parallel', and 'single-arm-opposed' are supported. Got: {}"
                .format(self.env_configuration))

        # Lastly, check to make sure all inputted robot names are of their correct type (bimanual / not bimanual)
        for robot in robots:
            if check_type(robot, RobotType.bimanual) != is_bimanual:
                raise ValueError(
                    "Error: For {} configuration, expected bimanual check to return {}; "
                    "instead, got {}.".format(
                        self.env_configuration, is_bimanual,
                        check_type(robot, RobotType.bimanual)))

    @property
    def _handle_xpos(self):
        """
        Grab the position of the hammer handle.

        Returns:
            np.array: (x,y,z) position of handle
        """
        return self.sim.data.geom_xpos[self.hammer_handle_geom_id]

    @property
    def _head_xpos(self):
        """
        Grab the position of the hammer head.

        Returns:
            np.array: (x,y,z) position of head
        """
        return self.sim.data.geom_xpos[self.hammer_head_geom_id]

    @property
    def _face_xpos(self):
        """
        Grab the position of the hammer face.

        Returns:
            np.array: (x,y,z) position of face
        """
        return self.sim.data.geom_xpos[self.hammer_face_geom_id]

    @property
    def _claw_xpos(self):
        """
        Grab the position of the hammer claw.

        Returns:
            np.array: (x,y,z) position of claw
        """
        return self.sim.data.geom_xpos[self.hammer_claw_geom_id]

    @property
    def _hammer_pos(self):
        """
        Grab the position of the hammer body.

        Returns:
            np.array: (x,y,z) position of body
        """
        return np.array(self.sim.data.body_xpos[self.hammer_body_id])

    @property
    def _hammer_quat(self):
        """
        Grab the orientation of the hammer body.

        Returns:
            np.array: (x,y,z,w) quaternion of the hammer body
        """
        return T.convert_quat(self.sim.data.body_xquat[self.hammer_body_id],
                              to="xyzw")

    @property
    def _hammer_angle(self):
        """
        Calculate the angle of hammer with the ground, relative to it resting horizontally

        Returns:
            float: angle in radians
        """
        mat = T.quat2mat(self._hammer_quat)
        z_unit = [0, 0, 1]
        z_rotated = np.matmul(mat, z_unit)
        return np.pi / 2 - np.arccos(np.dot(z_unit, z_rotated))

    @property
    def _world_quat(self):
        """
        Grab the world orientation

        Returns:
            np.array: (x,y,z,w) world quaternion
        """
        return T.convert_quat(np.array([1, 0, 0, 0]), to="xyzw")

    @property
    def _eef0_xpos(self):
        """
        Grab the position of Robot 0's end effector.

        Returns:
            np.array: (x,y,z) position of EEF0
        """
        if self.env_configuration == "bimanual":
            return np.array(
                self.sim.data.site_xpos[self.robots[0].eef_site_id["right"]])
        else:
            return np.array(
                self.sim.data.site_xpos[self.robots[0].eef_site_id])

    @property
    def _eef1_xpos(self):
        """
        Grab the position of Robot 1's end effector.

        Returns:
            np.array: (x,y,z) position of EEF1
        """
        if self.env_configuration == "bimanual":
            return np.array(
                self.sim.data.site_xpos[self.robots[0].eef_site_id["left"]])
        else:
            return np.array(
                self.sim.data.site_xpos[self.robots[1].eef_site_id])

    @property
    def _eef0_xmat(self):
        """
        End Effector 0 orientation as a rotation matrix
        Note that this draws the orientation from the "ee" site, NOT the gripper site, since the gripper
        orientations are inconsistent!

        Returns:
            np.array: (3,3) orientation matrix for EEF0
        """
        pf = self.robots[0].robot_model.naming_prefix
        if self.env_configuration == "bimanual":
            return np.array(self.sim.data.site_xmat[
                self.sim.model.site_name2id(pf + "right_ee")]).reshape(3, 3)
        else:
            return np.array(self.sim.data.site_xmat[
                self.sim.model.site_name2id(pf + "ee")]).reshape(3, 3)

    @property
    def _eef1_xmat(self):
        """
        End Effector 1 orientation as a rotation matrix
        Note that this draws the orientation from the "right_/left_hand" body, NOT the gripper site, since the gripper
        orientations are inconsistent!

        Returns:
            np.array: (3,3) orientation matrix for EEF1
        """
        if self.env_configuration == "bimanual":
            pf = self.robots[0].robot_model.naming_prefix
            return np.array(self.sim.data.site_xmat[
                self.sim.model.site_name2id(pf + "left_ee")]).reshape(3, 3)
        else:
            pf = self.robots[1].robot_model.naming_prefix
            return np.array(self.sim.data.site_xmat[
                self.sim.model.site_name2id(pf + "ee")]).reshape(3, 3)

    @property
    def _gripper_0_to_handle(self):
        """
        Calculate vector from the left gripper to the hammer handle.

        Returns:
            np.array: (dx,dy,dz) distance vector between handle and EEF0
        """
        return self._handle_xpos - self._eef0_xpos

    @property
    def _gripper_1_to_handle(self):
        """
        Calculate vector from the right gripper to the hammer handle.

        Returns:
            np.array: (dx,dy,dz) distance vector between handle and EEF1
        """
        return self._handle_xpos - self._eef1_xpos
예제 #29
0
class PandaPush(change_dof(PandaEnv, 7, 8)):  # don't need to control a gripper
    """
    This class corresponds to the pushing task for the Panda robot arm.
    """

    parameters_spec = {
        **PandaEnv.parameters_spec,
        'table_size_0': [0.7, 0.9],
        'table_size_1': [0.7, 0.9],
        'table_size_2': [0.7, 0.9],
        #'table_friction_0': [0.4, 1.6],
        'table_friction_1': [0.0025, 0.0075],
        'table_friction_2': [0.00005, 0.00015],
        'boxobject_size_0': [0.018, 0.022],
        'boxobject_size_1': [0.018, 0.022],
        'boxobject_size_2': [0.018, 0.022],
        'boxobject_friction_0': [0.05, 0.15],
        #'boxobject_friction_1': [0.0025, 0.0075],    # fixed this to zero
        'boxobject_friction_2': [0.00005, 0.00015],
        'boxobject_density_1000': [0.06, 0.14],
    }

    def reset_props(self,
                    table_size_0=0.8,
                    table_size_1=0.8,
                    table_size_2=0.8,
                    table_friction_0=0.,
                    table_friction_1=0.005,
                    table_friction_2=0.0001,
                    boxobject_size_0=0.020,
                    boxobject_size_1=0.020,
                    boxobject_size_2=0.020,
                    boxobject_friction_0=0.1,
                    boxobject_friction_1=0.0,
                    boxobject_friction_2=0.0001,
                    boxobject_density_1000=0.1,
                    **kwargs):

        self.table_full_size = (table_size_0, table_size_1, table_size_2)
        self.table_friction = (table_friction_0, table_friction_1,
                               table_friction_2)
        self.boxobject_size = (boxobject_size_0, boxobject_size_1,
                               boxobject_size_2)
        self.boxobject_friction = (boxobject_friction_0, boxobject_friction_1,
                                   boxobject_friction_2)
        self.boxobject_density = boxobject_density_1000 * 1000.
        super().reset_props(**kwargs)

    def __init__(self,
                 use_object_obs=True,
                 reward_shaping=True,
                 placement_initializer=None,
                 object_obs_process=True,
                 **kwargs):
        """
        Args:

            use_object_obs (bool): if True, include object (cube) information in
                the observation.

            reward_shaping (bool): if True, use dense rewards.

            placement_initializer (ObjectPositionSampler instance): if provided, will
                be used to place objects on every reset, else a UniformRandomSampler
                is used by default.

            object_obs_process (bool): if True, process the object observation to get a task_state.
                Setting this to False is useful when some transformation (eg. noise) need to be done to object observation raw data prior to the processing.
        """

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # reward configuration
        self.reward_shaping = reward_shaping

        # object placement initializer
        if placement_initializer:
            self.placement_initializer = placement_initializer
        else:
            # self.placement_initializer = UniformRandomSampler(
            #     x_range=[-0.1, 0.1],
            #     y_range=[-0.1, 0.1],
            #     ensure_object_boundary_in_range=False,
            #     z_rotation=None,
            # )
            self.placement_initializer = UniformRandomSamplerObjectSpecific(
                x_ranges=[[-0.03, -0.02], [0.09, 0.1]],
                y_ranges=[[-0.05, -0.04], [-0.05, -0.04]],
                ensure_object_boundary_in_range=False,
                z_rotation=None,
            )

        # for first initialization
        self.table_full_size = (0.8, 0.8, 0.8)
        self.table_friction = (0., 0.005, 0.0001)
        self.boxobject_size = (0.02, 0.02, 0.02)
        self.boxobject_friction = (0.1, 0.005, 0.0001)
        self.boxobject_density = 100.

        self.object_obs_process = object_obs_process

        super().__init__(gripper_visualization=True, **kwargs)

    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()
        self.mujoco_robot.set_base_xpos([0, 0, 0])

        # load model for table top workspace
        self.mujoco_arena = TableArena(table_full_size=self.table_full_size,
                                       table_friction=self.table_friction)
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # The panda robot has a pedestal, we want to align it with the table
        self.mujoco_arena.set_origin(
            [0.16 + self.table_full_size[0] / 2, 0, 0])

        # initialize objects of interest
        # in original robosuite, a simple domain randomization is included in BoxObject implementation, and called here. We choose to discard that implementation.
        cube = FullyFrictionalBoxObject(
            size=self.boxobject_size,
            friction=self.boxobject_friction,
            density=self.boxobject_density,
            rgba=[1, 0, 0, 1],
        )
        self.mujoco_cube = cube

        goal = CylinderObject(
            size=[0.03, 0.001],
            rgba=[0, 1, 0, 1],
        )
        self.mujoco_goal = goal

        self.mujoco_objects = OrderedDict([("cube", cube), ("goal", goal)])

        # task includes arena, robot, and objects of interest
        self.model = TableTopTask(
            self.mujoco_arena,
            self.mujoco_robot,
            self.mujoco_objects,
            initializer=self.placement_initializer,
            visual_objects=['goal'],
        )
        self.model.place_objects()

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flatten array, which is how MuJoCo stores physical simulation data.
        """
        super()._get_reference()
        self.cube_body_id = self.sim.model.body_name2id("cube")
        self.l_finger_geom_ids = [
            self.sim.model.geom_name2id(x)
            for x in self.gripper.left_finger_geoms
        ]
        self.r_finger_geom_ids = [
            self.sim.model.geom_name2id(x)
            for x in self.gripper.right_finger_geoms
        ]
        self.cube_geom_id = self.sim.model.geom_name2id("cube")

        # gripper ids
        self.goal_body_id = self.sim.model.body_name2id('goal')
        self.goal_site_id = self.sim.model.site_name2id('goal')

    def _reset_internal(self):
        """
        Resets simulation internal configurations.
        """
        super()._reset_internal()
        self.sim.forward()

        # reset positions of objects
        self.model.place_objects()

        # reset joint positions
        init_pos = self.mujoco_robot.init_qpos
        init_pos += np.random.randn(init_pos.shape[0]) * 0.02
        self.sim.data.qpos[self._ref_joint_pos_indexes] = np.array(init_pos)
        # shut the gripper
        self.sim.data.qpos[
            self._ref_joint_gripper_actuator_indexes] = np.array([0., -0.])

        # set other reference attributes
        eef_rot_in_world = self.sim.data.get_body_xmat("right_hand").reshape(
            (3, 3))
        self.world_rot_in_eef = copy.deepcopy(
            eef_rot_in_world.T
        )  # TODO inspect on this: should we set a golden reference other than a initial position?

    # reward function from sawyer_push
    def reward(self, action=None):
        """
        Reward function for the task.

        The dense reward has three components.

            Reaching: in [-inf, 0], to encourage the arm to reach the object
            Goal Distance: in [-inf, 0] the distance between the pushed object and the goal
            Safety reward in [-inf, 0], -1 for every joint that is at its limit.

        The sparse reward only receives a {0,1} upon reaching the goal

        Args:
            action (np array): The action taken in that timestep

        Returns:
            reward (float): the reward
            previously in robosuite-extra, when dense reward is used, the return value will be a dictionary. but we removed that feature.
        """
        reward = 0.

        # sparse completion reward
        if not self.reward_shaping and self._check_success():
            reward = 1.0

        # use a dense reward
        if self.reward_shaping:
            object_pos = self.sim.data.body_xpos[self.cube_body_id]

            # max joint angles reward
            joint_limits = self._joint_ranges
            current_joint_pos = self._joint_positions

            hitting_limits_reward = -int(
                any([(x < joint_limits[i, 0] + 0.05
                      or x > joint_limits[i, 1] - 0.05)
                     for i, x in enumerate(current_joint_pos)]))

            reward += hitting_limits_reward

            # reaching reward
            gripper_site_pos = self.sim.data.site_xpos[self.eef_site_id]
            dist = np.linalg.norm(gripper_site_pos - object_pos)
            reaching_reward = -0.15 * dist
            reward += reaching_reward

            # print(gripper_site_pos, object_pos, reaching_reward)

            # Success Reward
            success = self._check_success()
            if (success):
                reward += 0.1

            # goal distance reward
            goal_pos = self.sim.data.site_xpos[self.goal_site_id]

            dist = np.linalg.norm(goal_pos - object_pos)
            goal_distance_reward = -dist
            reward += goal_distance_reward

            # punish when there is a line of object--gripper--goal
            angle_g_o_g = angle_between(gripper_site_pos - object_pos,
                                        goal_pos - object_pos)
            if not success and angle_g_o_g < np.pi / 2.:
                reward += -0.03 - 0.02 * (np.pi / 2. - angle_g_o_g)

            # print('grippersitepos', gripper_site_pos,
            #       'objpos', object_pos,
            #       'jointangles', hitting_limits_reward,
            #       'reaching', reaching_reward,
            #       'success', success,
            #       'goaldist', goal_distance_reward)

            unstable = reward < -2.5

            # # Return all three types of rewards
            # reward = {"reward": reward, "reaching_distance": -10 * reaching_reward,
            #           "goal_distance": - goal_distance_reward,
            #           "hitting_limits_reward": hitting_limits_reward,
            #           "unstable":unstable}

        return reward

    def _check_success(self):
        """
        Returns True if task has been completed.
        """
        object_pos = self.sim.data.body_xpos[self.cube_body_id]
        goal_pos = self.sim.data.site_xpos[self.goal_site_id]

        dist = np.linalg.norm(goal_pos - object_pos)
        goal_horizontal_radius = self.model.mujoco_objects[
            'goal'].get_horizontal_radius()

        # object centre is within the goal radius
        return dist < goal_horizontal_radius

    def step(self, action):
        """ explicitly shut the gripper """
        joined_action = np.append(action, [1.])
        return super().step(joined_action)

    def world2eef(self, world):
        return self.world_rot_in_eef.dot(world)

    def put_raw_object_obs(self, di):
        # Extract position and velocity of the eef
        eef_pos_in_world = self.sim.data.get_body_xpos("right_hand")
        eef_xvelp_in_world = self.sim.data.get_body_xvelp("right_hand")

        # print('eef_pos_in_world', eef_pos_in_world)

        # Get the position, velocity, rotation  and rotational velocity of the object in the world frame
        object_pos_in_world = self.sim.data.body_xpos[self.cube_body_id]
        object_xvelp_in_world = self.sim.data.get_body_xvelp('cube')
        object_rot_in_world = self.sim.data.get_body_xmat('cube')

        # Get the z-angle with respect to the reference position and do sin-cosine encoding
        # world_rotation_in_reference = np.array([[0., 1., 0., ], [-1., 0., 0., ], [0., 0., 1., ]])
        # object_rotation_in_ref = world_rotation_in_reference.dot(object_rot_in_world)
        # object_euler_in_ref = T.mat2euler(object_rotation_in_ref)
        # z_angle = object_euler_in_ref[2]

        object_quat = convert_quat(self.sim.data.body_xquat[self.cube_body_id],
                                   to='xyzw')

        # Get the goal position in the world
        goal_site_pos_in_world = np.array(
            self.sim.data.site_xpos[self.goal_site_id])

        # Record observations into a dictionary
        di['goal_pos_in_world'] = goal_site_pos_in_world
        di['eef_pos_in_world'] = eef_pos_in_world
        di['eef_vel_in_world'] = eef_xvelp_in_world
        di['object_pos_in_world'] = object_pos_in_world
        di['object_vel_in_world'] = object_xvelp_in_world
        # di["z_angle"] = np.array([z_angle])
        di['object_quat'] = object_quat

    def process_object_obs(self, di):
        # z_angle = di['z_angle']
        # sine_cosine = np.array([np.sin(8*z_angle), np.cos(8*z_angle)]).reshape((2,))

        eef_to_object_in_world = di['object_pos_in_world'] - di[
            'eef_pos_in_world']
        # eef_to_object_in_eef = self.world2eef(eef_to_object_in_world)

        object_to_goal_in_world = di['goal_pos_in_world'] - di[
            'object_pos_in_world']
        # object_to_goal_in_eef = self.world2eef(object_to_goal_in_world)

        # object_xvelp_in_eef = self.world2eef(di['object_vel_in_world'])
        # eef_xvelp_in_eef = self.world2eef(di['eef_vel_in_world'])

        task_state = np.concatenate([
            eef_to_object_in_world, object_to_goal_in_world,
            di['eef_vel_in_world'], di['object_vel_in_world'],
            di['object_quat']
        ])

        di['task_state'] = task_state

    def _get_observation(self):
        """
        Returns an OrderedDict containing observations [(name_string, np.array), ...].

        Important keys:
            gripper_to_object : The x-y component of the gripper to object distance
            object_to_goal : The x-y component of the object-to-goal distance
            object_z_rot : the roation of the object around an axis sticking out the table

            object_xvelp: x-y linear velocity of the object
            gripper_xvelp: x-y linear velocity of the gripper


            task-state : a concatenation of all the above.
        """
        # di = super()._get_observation()  # joint angles & vel, which we don't need.
        di = OrderedDict()

        # camera observations
        if self.use_camera_obs:
            camera_obs = self.sim.render(
                camera_name=self.camera_name,
                width=self.camera_width,
                height=self.camera_height,
                depth=self.camera_depth,
            )
            if self.camera_depth:
                di["image"], di["depth"] = camera_obs
            else:
                di["image"] = camera_obs

        # low-level object information
        if self.use_object_obs:
            self.put_raw_object_obs(di)
            if self.object_obs_process:
                self.process_object_obs(di)

        return di

    def _check_contact(self):
        """
        Returns True if gripper is in contact with an object.
        """
        collision = False
        for contact in self.sim.data.contact[:self.sim.data.ncon]:
            if (self.sim.model.geom_id2name(
                    contact.geom1) in self.gripper.contact_geoms()
                    or self.sim.model.geom_id2name(
                        contact.geom2) in self.gripper.contact_geoms()):
                collision = True
                break
        return collision

    def _check_contact_with(self, object):
        """
        Returns True if gripper is in contact with an object.
        """
        collision = False
        for contact in self.sim.data.contact[:self.sim.data.ncon]:
            if ((self.sim.model.geom_id2name(
                    contact.geom1) in self.gripper.contact_geoms()
                 and contact.geom2 == self.sim.model.geom_name2id(object)) or
                (self.sim.model.geom_id2name(
                    contact.geom2) in self.gripper.contact_geoms()
                 and contact.geom1 == self.sim.model.geom_name2id(object))):
                collision = True
                break
        return collision

    def _gripper_visualization(self):
        """
        Do any needed visualization here. Overrides superclass implementations.
        """

        # color the gripper site appropriately based on distance to cube
        if self.gripper_visualization:
            # get distance to cube
            cube_site_id = self.sim.model.site_name2id("cube")
            dist = np.sum(
                np.square(self.sim.data.site_xpos[cube_site_id] -
                          self.sim.data.get_site_xpos("grip_site")))

            # set RGBA for the EEF site here
            max_dist = 0.1
            scaled = (1.0 - min(dist / max_dist, 1.))**15
            rgba = np.zeros(4)
            rgba[0] = 1 - scaled
            rgba[1] = scaled
            rgba[3] = 0.5

            self.sim.model.site_rgba[self.eef_site_id] = rgba
예제 #30
0
class Stack(RobotEnv):
    """
    This class corresponds to the stacking task for a single robot arm.

    Args:
        robots (str or list of str): Specification for specific robot arm(s) to be instantiated within this env
            (e.g: "Sawyer" would generate one arm; ["Panda", "Panda", "Sawyer"] would generate three robot arms)
            Note: Must be a single single-arm robot!

        controller_configs (str or list of dict): If set, contains relevant controller parameters for creating a
            custom controller. Else, uses the default controller for this specific task. Should either be single
            dict if same controller is to be used for all robots or else it should be a list of the same length as
            "robots" param

        gripper_types (str or list of str): type of gripper, used to instantiate
            gripper models from gripper factory. Default is "default", which is the default grippers(s) associated
            with the robot(s) the 'robots' specification. None removes the gripper, and any other (valid) model
            overrides the default gripper. Should either be single str if same gripper type is to be used for all
            robots or else it should be a list of the same length as "robots" param

        gripper_visualizations (bool or list of bool): True if using gripper visualization.
            Useful for teleoperation. Should either be single bool if gripper visualization is to be used for all
            robots or else it should be a list of the same length as "robots" param

        initialization_noise (dict or list of dict): Dict containing the initialization noise parameters.
            The expected keys and corresponding value types are specified below:

            :`'magnitude'`: The scale factor of uni-variate random noise applied to each of a robot's given initial
                joint positions. Setting this value to `None` or 0.0 results in no noise being applied.
                If "gaussian" type of noise is applied then this magnitude scales the standard deviation applied,
                If "uniform" type of noise is applied then this magnitude sets the bounds of the sampling range
            :`'type'`: Type of noise to apply. Can either specify "gaussian" or "uniform"

            Should either be single dict if same noise value is to be used for all robots or else it should be a
            list of the same length as "robots" param

            :Note: Specifying "default" will automatically use the default noise settings.
                Specifying None will automatically create the required dict with "magnitude" set to 0.0.

        table_full_size (3-tuple): x, y, and z dimensions of the table.

        table_friction (3-tuple): the three mujoco friction parameters for
            the table.

        use_camera_obs (bool): if True, every observation includes rendered image(s)

        use_object_obs (bool): if True, include object (cube) information in
            the observation.

        reward_scale (None or float): Scales the normalized reward function by the amount specified.
            If None, environment reward remains unnormalized

        reward_shaping (bool): if True, use dense rewards.

        placement_initializer (ObjectPositionSampler instance): if provided, will
            be used to place objects on every reset, else a UniformRandomSampler
            is used by default.

        use_indicator_object (bool): if True, sets up an indicator object that
            is useful for debugging.

        has_renderer (bool): If true, render the simulation state in
            a viewer instead of headless mode.

        has_offscreen_renderer (bool): True if using off-screen rendering

        render_camera (str): Name of camera to render if `has_renderer` is True. Setting this value to 'None'
            will result in the default angle being applied, which is useful as it can be dragged / panned by
            the user using the mouse

        render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise.

        render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise.

        control_freq (float): how many control signals to receive in every second. This sets the amount of
            simulation time that passes between every action input.

        horizon (int): Every episode lasts for exactly @horizon timesteps.

        ignore_done (bool): True if never terminating the environment (ignore @horizon).

        hard_reset (bool): If True, re-loads model, sim, and render object upon a reset call, else,
            only calls sim.reset and resets all robosuite-internal variables

        camera_names (str or list of str): name of camera to be rendered. Should either be single str if
            same name is to be used for all cameras' rendering or else it should be a list of cameras to render.

            :Note: At least one camera must be specified if @use_camera_obs is True.

            :Note: To render all robots' cameras of a certain type (e.g.: "robotview" or "eye_in_hand"), use the
                convention "all-{name}" (e.g.: "all-robotview") to automatically render all camera images from each
                robot's camera list).

        camera_heights (int or list of int): height of camera frame. Should either be single int if
            same height is to be used for all cameras' frames or else it should be a list of the same length as
            "camera names" param.

        camera_widths (int or list of int): width of camera frame. Should either be single int if
            same width is to be used for all cameras' frames or else it should be a list of the same length as
            "camera names" param.

        camera_depths (bool or list of bool): True if rendering RGB-D, and RGB otherwise. Should either be single
            bool if same depth setting is to be used for all cameras or else it should be a list of the same length as
            "camera names" param.

    Raises:
        AssertionError: [Invalid number of robots specified]
    """
    def __init__(
        self,
        robots,
        controller_configs=None,
        gripper_types="default",
        gripper_visualizations=False,
        initialization_noise="default",
        table_full_size=(0.8, 0.8, 0.05),
        table_friction=(1., 5e-3, 1e-4),
        use_camera_obs=True,
        use_object_obs=True,
        reward_scale=1.0,
        reward_shaping=False,
        placement_initializer=None,
        use_indicator_object=False,
        has_renderer=False,
        has_offscreen_renderer=True,
        render_camera="frontview",
        render_collision_mesh=False,
        render_visual_mesh=True,
        control_freq=10,
        horizon=1000,
        ignore_done=False,
        hard_reset=True,
        camera_names="agentview",
        camera_heights=256,
        camera_widths=256,
        camera_depths=False,
    ):
        """

        """
        # First, verify that only one robot is being inputted
        self._check_robot_configuration(robots)

        # settings for table top
        self.table_full_size = table_full_size
        self.table_friction = table_friction

        # reward configuration
        self.reward_scale = reward_scale
        self.reward_shaping = reward_shaping

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # object placement initializer
        if placement_initializer:
            self.placement_initializer = placement_initializer
        else:
            self.placement_initializer = UniformRandomSampler(
                x_range=[-0.08, 0.08],
                y_range=[-0.08, 0.08],
                ensure_object_boundary_in_range=False,
                rotation=None,
            )

        super().__init__(
            robots=robots,
            controller_configs=controller_configs,
            gripper_types=gripper_types,
            gripper_visualizations=gripper_visualizations,
            initialization_noise=initialization_noise,
            use_camera_obs=use_camera_obs,
            use_indicator_object=use_indicator_object,
            has_renderer=has_renderer,
            has_offscreen_renderer=has_offscreen_renderer,
            render_camera=render_camera,
            render_collision_mesh=render_collision_mesh,
            render_visual_mesh=render_visual_mesh,
            control_freq=control_freq,
            horizon=horizon,
            ignore_done=ignore_done,
            hard_reset=hard_reset,
            camera_names=camera_names,
            camera_heights=camera_heights,
            camera_widths=camera_widths,
            camera_depths=camera_depths,
        )

    def reward(self, action):
        """
        Reward function for the task.

        Sparse un-normalized reward:

            - a discrete reward of 2.0 is provided if the red block is stacked on the green block

        Un-normalized components if using reward shaping:

            - Reaching: in [0, 0.25], to encourage the arm to reach the cube
            - Grasping: in {0, 0.25}, non-zero if arm is grasping the cube
            - Lifting: in {0, 1}, non-zero if arm has lifted the cube
            - Aligning: in [0, 0.5], encourages aligning one cube over the other
            - Stacking: in {0, 2}, non-zero if cube is stacked on other cube

        The reward is max over the following:

            - Reaching + Grasping
            - Lifting + Aligning
            - Stacking

        The sparse reward only consists of the stacking component.

        Note that the final reward is normalized and scaled by
        reward_scale / 2.0 as well so that the max score is equal to reward_scale

        Args:
            action (np array): [NOT USED]

        Returns:
            float: reward value
        """
        r_reach, r_lift, r_stack = self.staged_rewards()
        if self.reward_shaping:
            reward = max(r_reach, r_lift, r_stack)
        else:
            reward = 2.0 if r_stack > 0 else 0.0

        if self.reward_scale is not None:
            reward *= self.reward_scale / 2.0

        return reward

    def staged_rewards(self):
        """
        Helper function to calculate staged rewards based on current physical states.

        Returns:
            3-tuple:

                - (float): reward for reaching and grasping
                - (float): reward for lifting and aligning
                - (float): reward for stacking
        """
        # reaching is successful when the gripper site is close to
        # the center of the cube
        cubeA_pos = self.sim.data.body_xpos[self.cubeA_body_id]
        cubeB_pos = self.sim.data.body_xpos[self.cubeB_body_id]
        gripper_site_pos = self.sim.data.site_xpos[self.robots[0].eef_site_id]
        dist = np.linalg.norm(gripper_site_pos - cubeA_pos)
        r_reach = (1 - np.tanh(10.0 * dist)) * 0.25

        # collision checking
        touch_left_finger = False
        touch_right_finger = False
        touch_cubeA_cubeB = False

        for i in range(self.sim.data.ncon):
            c = self.sim.data.contact[i]
            if c.geom1 in self.l_finger_geom_ids and c.geom2 == self.cubeA_geom_id:
                touch_left_finger = True
            if c.geom1 == self.cubeA_geom_id and c.geom2 in self.l_finger_geom_ids:
                touch_left_finger = True
            if c.geom1 in self.r_finger_geom_ids and c.geom2 == self.cubeA_geom_id:
                touch_right_finger = True
            if c.geom1 == self.cubeA_geom_id and c.geom2 in self.r_finger_geom_ids:
                touch_right_finger = True
            if c.geom1 == self.cubeA_geom_id and c.geom2 == self.cubeB_geom_id:
                touch_cubeA_cubeB = True
            if c.geom1 == self.cubeB_geom_id and c.geom2 == self.cubeA_geom_id:
                touch_cubeA_cubeB = True

        # additional grasping reward
        if touch_left_finger and touch_right_finger:
            r_reach += 0.25

        # lifting is successful when the cube is above the table top
        # by a margin
        cubeA_height = cubeA_pos[2]
        table_height = self.mujoco_arena.table_offset[2]
        cubeA_lifted = cubeA_height > table_height + 0.04
        r_lift = 1.0 if cubeA_lifted else 0.0

        # Aligning is successful when cubeA is right above cubeB
        if cubeA_lifted:
            horiz_dist = np.linalg.norm(
                np.array(cubeA_pos[:2]) - np.array(cubeB_pos[:2]))
            r_lift += 0.5 * (1 - np.tanh(horiz_dist))

        # stacking is successful when the block is lifted and
        # the gripper is not holding the object
        r_stack = 0
        not_touching = not touch_left_finger and not touch_right_finger
        if not_touching and r_lift > 0 and touch_cubeA_cubeB:
            r_stack = 2.0

        return r_reach, r_lift, r_stack

    def _load_model(self):
        """
        Loads an xml model, puts it in self.model
        """
        super()._load_model()

        # Verify the correct robot has been loaded
        assert isinstance(self.robots[0], SingleArm), \
            "Error: Expected one single-armed robot! Got {} type instead.".format(type(self.robots[0]))

        # Adjust base pose accordingly
        xpos = self.robots[0].robot_model.base_xpos_offset["table"](
            self.table_full_size[0])
        self.robots[0].robot_model.set_base_xpos(xpos)

        # load model for table top workspace
        self.mujoco_arena = TableArena(
            table_full_size=self.table_full_size,
            table_friction=self.table_friction,
            table_offset=(0, 0, 0.8),
        )
        if self.use_indicator_object:
            self.mujoco_arena.add_pos_indicator()

        # Arena always gets set to zero origin
        self.mujoco_arena.set_origin([0, 0, 0])

        # initialize objects of interest
        tex_attrib = {
            "type": "cube",
        }
        mat_attrib = {
            "texrepeat": "1 1",
            "specular": "0.4",
            "shininess": "0.1",
        }
        redwood = CustomMaterial(
            texture="WoodRed",
            tex_name="redwood",
            mat_name="redwood_mat",
            tex_attrib=tex_attrib,
            mat_attrib=mat_attrib,
        )
        greenwood = CustomMaterial(
            texture="WoodGreen",
            tex_name="greenwood",
            mat_name="greenwood_mat",
            tex_attrib=tex_attrib,
            mat_attrib=mat_attrib,
        )
        cubeA = BoxObject(
            name="cubeA",
            size_min=[0.02, 0.02, 0.02],
            size_max=[0.02, 0.02, 0.02],
            rgba=[1, 0, 0, 1],
            material=redwood,
        )
        cubeB = BoxObject(
            name="cubeB",
            size_min=[0.025, 0.025, 0.025],
            size_max=[0.025, 0.025, 0.025],
            rgba=[0, 1, 0, 1],
            material=greenwood,
        )
        self.mujoco_objects = OrderedDict([("cubeA", cubeA), ("cubeB", cubeB)])

        # task includes arena, robot, and objects of interest
        self.model = ManipulationTask(
            self.mujoco_arena,
            [robot.robot_model for robot in self.robots],
            self.mujoco_objects,
            initializer=self.placement_initializer,
        )
        self.model.place_objects()

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flatten array, which is how MuJoCo stores physical simulation data.
        """
        super()._get_reference()

        # Additional object references from this env
        self.cubeA_body_id = self.sim.model.body_name2id("cubeA")
        self.cubeB_body_id = self.sim.model.body_name2id("cubeB")

        # information of objects
        self.object_names = list(self.mujoco_objects.keys())
        self.object_site_ids = [
            self.sim.model.site_name2id(ob_name)
            for ob_name in self.object_names
        ]

        # id of grippers for contact checking
        self.l_finger_geom_ids = [
            self.sim.model.geom_name2id(x)
            for x in self.robots[0].gripper.important_geoms["left_finger"]
        ]
        self.r_finger_geom_ids = [
            self.sim.model.geom_name2id(x)
            for x in self.robots[0].gripper.important_geoms["right_finger"]
        ]
        self.cubeA_geom_id = self.sim.model.geom_name2id("cubeA")
        self.cubeB_geom_id = self.sim.model.geom_name2id("cubeB")

    def _reset_internal(self):
        """
        Resets simulation internal configurations.
        """
        super()._reset_internal()

        # Reset all object positions using initializer sampler if we're not directly loading from an xml
        if not self.deterministic_reset:

            # Sample from the placement initializer for all objects
            obj_pos, obj_quat = self.model.place_objects()

            # Loop through all objects and reset their positions
            for i, (obj_name, _) in enumerate(self.mujoco_objects.items()):
                self.sim.data.set_joint_qpos(
                    obj_name + "_jnt0",
                    np.concatenate(
                        [np.array(obj_pos[i]),
                         np.array(obj_quat[i])]))

    def _get_observation(self):
        """
        Returns an OrderedDict containing observations [(name_string, np.array), ...].

        Important keys:

            `'robot-state'`: contains robot-centric information.

            `'object-state'`: requires @self.use_object_obs to be True. Contains object-centric information.

            `'image'`: requires @self.use_camera_obs to be True. Contains a rendered frame from the simulation.

            `'depth'`: requires @self.use_camera_obs and @self.camera_depth to be True.
            Contains a rendered depth map from the simulation

        Returns:
            OrderedDict: Observations from the environment
        """
        di = super()._get_observation()

        # low-level object information
        if self.use_object_obs:
            # Get robot prefix
            pr = self.robots[0].robot_model.naming_prefix

            # position and rotation of the first cube
            cubeA_pos = np.array(self.sim.data.body_xpos[self.cubeA_body_id])
            cubeA_quat = convert_quat(np.array(
                self.sim.data.body_xquat[self.cubeA_body_id]),
                                      to="xyzw")
            di["cubeA_pos"] = cubeA_pos
            di["cubeA_quat"] = cubeA_quat

            # position and rotation of the second cube
            cubeB_pos = np.array(self.sim.data.body_xpos[self.cubeB_body_id])
            cubeB_quat = convert_quat(np.array(
                self.sim.data.body_xquat[self.cubeB_body_id]),
                                      to="xyzw")
            di["cubeB_pos"] = cubeB_pos
            di["cubeB_quat"] = cubeB_quat

            # relative positions between gripper and cubes
            gripper_site_pos = np.array(
                self.sim.data.site_xpos[self.robots[0].eef_site_id])
            di[pr + "gripper_to_cubeA"] = gripper_site_pos - cubeA_pos
            di[pr + "gripper_to_cubeB"] = gripper_site_pos - cubeB_pos
            di["cubeA_to_cubeB"] = cubeA_pos - cubeB_pos

            di["object-state"] = np.concatenate([
                cubeA_pos,
                cubeA_quat,
                cubeB_pos,
                cubeB_quat,
                di[pr + "gripper_to_cubeA"],
                di[pr + "gripper_to_cubeB"],
                di["cubeA_to_cubeB"],
            ])

        return di

    def _check_success(self):
        """
        Check if blocks are stacked correctly.

        Returns:
            bool: True if blocks are correctly stacked
        """
        _, _, r_stack = self.staged_rewards()
        return r_stack > 0

    def _visualization(self):
        """
        Do any needed visualization here. Overrides superclass implementations.
        """

        # color the gripper site appropriately based on distance to cube
        if self.robots[0].gripper_visualization:
            # find closest object
            square_dist = lambda x: np.sum(
                np.square(x - self.sim.data.get_site_xpos(self.robots[
                    0].gripper.visualization_sites["grip_site"])))
            dists = np.array(list(map(square_dist, self.sim.data.site_xpos)))
            dists[
                self.robots[0].
                eef_site_id] = np.inf  # make sure we don't pick the same site
            dists[self.robots[0].eef_cylinder_id] = np.inf
            ob_dists = dists[
                self.object_site_ids]  # filter out object sites we care about
            min_dist = np.min(ob_dists)

            # set RGBA for the EEF site here
            max_dist = 0.1
            scaled = (1.0 - min(min_dist / max_dist, 1.))**15
            rgba = np.zeros(4)
            rgba[0] = 1 - scaled
            rgba[1] = scaled
            rgba[3] = 0.5

            self.sim.model.site_rgba[self.robots[0].eef_site_id] = rgba

    def _check_robot_configuration(self, robots):
        """
        Sanity check to make sure the inputted robots and configuration is acceptable

        Args:
            robots (str or list of str): Robots to instantiate within this env
        """
        if type(robots) is list:
            assert len(
                robots
            ) == 1, "Error: Only one robot should be inputted for this task!"