def __init__( self, cylinder_radius=(0.015, 0.03), cylinder_length=0.13, use_object_obs=True, reward_shaping=True, **kwargs ): """ Args: cylinder_radius (2-tuple): low and high limits of the (uniformly sampled) radius of the cylinder cylinder_length (float): length of the cylinder use_object_obs (bool): if True, include object information in the observation. reward_shaping (bool): if True, use dense rewards Inherits the Baxter environment; refer to other parameters described there. """ # initialize objects of interest self.hole = PlateWithHoleObject() cylinder_radius = np.random.uniform(0.015, 0.03) self.cylinder = CylinderObject( size_min=(cylinder_radius, cylinder_length), size_max=(cylinder_radius, cylinder_length), ) self.mujoco_objects = OrderedDict() # whether to use ground-truth object states self.use_object_obs = use_object_obs # reward configuration self.reward_shaping = reward_shaping super().__init__(gripper_left=None, gripper_right=None, **kwargs)
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose(s) accordingly if self.env_configuration == "bimanual": xpos = self.robots[0].robot_model.base_xpos_offset["empty"] self.robots[0].robot_model.set_base_xpos(xpos) else: if self.env_configuration == "single-arm-opposed": # Set up robots facing towards each other by rotating them from their default position for robot, rotation in zip(self.robots, (np.pi / 2, -np.pi / 2)): xpos = robot.robot_model.base_xpos_offset["empty"] rot = np.array((0, 0, rotation)) xpos = T.euler2mat(rot) @ np.array(xpos) robot.robot_model.set_base_xpos(xpos) robot.robot_model.set_base_ori(rot) else: # "single-arm-parallel" configuration setting # Set up robots parallel to each other but offset from the center for robot, offset in zip(self.robots, (-0.25, 0.25)): xpos = robot.robot_model.base_xpos_offset["empty"] xpos = np.array(xpos) + np.array((0, offset, 0)) robot.robot_model.set_base_xpos(xpos) # Add arena and robot self.model = MujocoWorldBase() self.mujoco_arena = EmptyArena() if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() self.model.merge(self.mujoco_arena) for robot in self.robots: self.model.merge(robot.robot_model) # initialize objects of interest self.hole = PlateWithHoleObject(name="hole", ) tex_attrib = { "type": "cube", } mat_attrib = { "texrepeat": "1 1", "specular": "0.4", "shininess": "0.1", } greenwood = CustomMaterial( texture="WoodGreen", tex_name="greenwood", mat_name="greenwood_mat", tex_attrib=tex_attrib, mat_attrib=mat_attrib, ) self.peg = CylinderObject( name="peg", size_min=(self.peg_radius[0], self.peg_length), size_max=(self.peg_radius[1], self.peg_length), material=greenwood, rgba=[0, 1, 0, 1], ) # Load hole object self.hole_obj = self.hole.get_collision(site=True) self.hole_obj.set("quat", "0 0 0.707 0.707") self.hole_obj.set("pos", "0.11 0 0.17") self.model.merge_asset(self.hole) # Load peg object self.peg_obj = self.peg.get_collision(site=True) self.peg_obj.set("pos", array_to_string((0, 0, self.peg_length))) self.model.merge_asset(self.peg) # Depending on env configuration, append appropriate objects to arms if self.env_configuration == "bimanual": self.model.worldbody.find(".//body[@name='{}']".format( self.robots[0].robot_model.eef_name["left"])).append( self.hole_obj) self.model.worldbody.find(".//body[@name='{}']".format( self.robots[0].robot_model.eef_name["right"])).append( self.peg_obj) else: self.model.worldbody.find(".//body[@name='{}']".format( self.robots[1].robot_model.eef_name)).append(self.hole_obj) self.model.worldbody.find(".//body[@name='{}']".format( self.robots[0].robot_model.eef_name)).append(self.peg_obj)
class TwoArmPegInHole(RobotEnv): """ This class corresponds to the peg-in-hole task for two robot arms. Args: robots (str or list of str): Specification for specific robot arm(s) to be instantiated within this env (e.g: "Sawyer" would generate one arm; ["Panda", "Panda", "Sawyer"] would generate three robot arms) Note: Must be either 2 single single-arm robots or 1 bimanual robot! env_configuration (str): Specifies how to position the robots within the environment. Can be either: :`'bimanual'`: Only applicable for bimanual robot setups. Sets up the (single) bimanual robot on the -x side of the table :`'single-arm-parallel'`: Only applicable for multi single arm setups. Sets up the (two) single armed robots next to each other on the -x side of the table :`'single-arm-opposed'`: Only applicable for multi single arm setups. Sets up the (two) single armed robots opposed from each others on the opposite +/-y sides of the table (Default option) controller_configs (str or list of dict): If set, contains relevant controller parameters for creating a custom controller. Else, uses the default controller for this specific task. Should either be single dict if same controller is to be used for all robots or else it should be a list of the same length as "robots" param gripper_types (str or list of str): type of gripper, used to instantiate gripper models from gripper factory. For this environment, setting a value other than the default (None) will raise an AssertionError, as this environment is not meant to be used with any gripper at all. gripper_visualizations (bool or list of bool): True if using gripper visualization. Useful for teleoperation. Should either be single bool if gripper visualization is to be used for all robots or else it should be a list of the same length as "robots" param initialization_noise (dict or list of dict): Dict containing the initialization noise parameters. The expected keys and corresponding value types are specified below: :`'magnitude'`: The scale factor of uni-variate random noise applied to each of a robot's given initial joint positions. Setting this value to `None` or 0.0 results in no noise being applied. If "gaussian" type of noise is applied then this magnitude scales the standard deviation applied, If "uniform" type of noise is applied then this magnitude sets the bounds of the sampling range :`'type'`: Type of noise to apply. Can either specify "gaussian" or "uniform" Should either be single dict if same noise value is to be used for all robots or else it should be a list of the same length as "robots" param :Note: Specifying "default" will automatically use the default noise settings. Specifying None will automatically create the required dict with "magnitude" set to 0.0. use_camera_obs (bool or list of bool): if True, every observation for a specific robot includes a rendered image. Should either be single bool if camera obs value is to be used for all robots or else it should be a list of the same length as "robots" param use_object_obs (bool): if True, include object (cube) information in the observation. reward_scale (None or float): Scales the normalized reward function by the amount specified. If None, environment reward remains unnormalized reward_shaping (bool): if True, use dense rewards. peg_radius (2-tuple): low and high limits of the (uniformly sampled) radius of the peg peg_length (float): length of the peg use_indicator_object (bool): if True, sets up an indicator object that is useful for debugging. has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering render_camera (str): Name of camera to render if `has_renderer` is True. Setting this value to 'None' will result in the default angle being applied, which is useful as it can be dragged / panned by the user using the mouse render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). hard_reset (bool): If True, re-loads model, sim, and render object upon a reset call, else, only calls sim.reset and resets all robosuite-internal variables camera_names (str or list of str): name of camera to be rendered. Should either be single str if same name is to be used for all cameras' rendering or else it should be a list of cameras to render. :Note: At least one camera must be specified if @use_camera_obs is True. :Note: To render all robots' cameras of a certain type (e.g.: "robotview" or "eye_in_hand"), use the convention "all-{name}" (e.g.: "all-robotview") to automatically render all camera images from each robot's camera list). camera_heights (int or list of int): height of camera frame. Should either be single int if same height is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_widths (int or list of int): width of camera frame. Should either be single int if same width is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_depths (bool or list of bool): True if rendering RGB-D, and RGB otherwise. Should either be single bool if same depth setting is to be used for all cameras or else it should be a list of the same length as "camera names" param. Raises: AssertionError: [Gripper specified] ValueError: [Invalid number of robots specified] ValueError: [Invalid env configuration] ValueError: [Invalid robots for specified env configuration] """ def __init__( self, robots, env_configuration="single-arm-opposed", controller_configs=None, gripper_types=None, gripper_visualizations=False, initialization_noise="default", use_camera_obs=True, use_object_obs=True, reward_scale=1.0, reward_shaping=False, peg_radius=(0.015, 0.03), peg_length=0.13, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_camera="frontview", render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, hard_reset=True, camera_names="agentview", camera_heights=256, camera_widths=256, camera_depths=False, ): # First, verify that correct number of robots are being inputted self.env_configuration = env_configuration self._check_robot_configuration(robots) # Assert that the gripper type is None assert gripper_types is None, "Tried to specify gripper other than None in TwoArmPegInHole environment!" # reward configuration self.reward_scale = reward_scale self.reward_shaping = reward_shaping # whether to use ground-truth object states self.use_object_obs = use_object_obs # Save peg specs self.peg_radius = peg_radius self.peg_length = peg_length super().__init__( robots=robots, controller_configs=controller_configs, gripper_types=gripper_types, gripper_visualizations=gripper_visualizations, initialization_noise=initialization_noise, use_camera_obs=use_camera_obs, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_camera=render_camera, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, hard_reset=hard_reset, camera_names=camera_names, camera_heights=camera_heights, camera_widths=camera_widths, camera_depths=camera_depths, ) def reward(self, action): """ Reward function for the task. Sparse un-normalized reward: - a discrete reward of 5.0 is provided if the peg is inside the plate's hole - Note that we enforce that it's inside at an appropriate angle (cos(theta) > 0.95). Un-normalized summed components if using reward shaping: - Reaching: in [0, 1], to encourage the arms to approach each other - Perpendicular Distance: in [0,1], to encourage the arms to approach each other - Parallel Distance: in [0,1], to encourage the arms to approach each other - Alignment: in [0, 1], to encourage having the right orientation between the peg and hole. - Placement: in {0, 1}, nonzero if the peg is in the hole with a relatively correct alignment Note that the final reward is normalized and scaled by reward_scale / 5.0 as well so that the max score is equal to reward_scale """ reward = 0 # Right location and angle if self._check_success(): reward = 1.0 # use a shaping reward if self.reward_shaping: # Grab relevant values t, d, cos = self._compute_orientation() # reaching reward hole_pos = self.sim.data.body_xpos[self.hole_body_id] gripper_site_pos = self.sim.data.body_xpos[self.peg_body_id] dist = np.linalg.norm(gripper_site_pos - hole_pos) reaching_reward = 1 - np.tanh(1.0 * dist) reward += reaching_reward # Orientation reward reward += 1 - np.tanh(d) reward += 1 - np.tanh(np.abs(t)) reward += cos # if we're not reward shaping, we need to scale our sparse reward so that the max reward is identical # to its dense version else: reward *= 5.0 if self.reward_scale is not None: reward *= self.reward_scale / 5.0 return reward def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose(s) accordingly if self.env_configuration == "bimanual": xpos = self.robots[0].robot_model.base_xpos_offset["empty"] self.robots[0].robot_model.set_base_xpos(xpos) else: if self.env_configuration == "single-arm-opposed": # Set up robots facing towards each other by rotating them from their default position for robot, rotation in zip(self.robots, (np.pi / 2, -np.pi / 2)): xpos = robot.robot_model.base_xpos_offset["empty"] rot = np.array((0, 0, rotation)) xpos = T.euler2mat(rot) @ np.array(xpos) robot.robot_model.set_base_xpos(xpos) robot.robot_model.set_base_ori(rot) else: # "single-arm-parallel" configuration setting # Set up robots parallel to each other but offset from the center for robot, offset in zip(self.robots, (-0.25, 0.25)): xpos = robot.robot_model.base_xpos_offset["empty"] xpos = np.array(xpos) + np.array((0, offset, 0)) robot.robot_model.set_base_xpos(xpos) # Add arena and robot self.model = MujocoWorldBase() self.mujoco_arena = EmptyArena() if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() self.model.merge(self.mujoco_arena) for robot in self.robots: self.model.merge(robot.robot_model) # initialize objects of interest self.hole = PlateWithHoleObject(name="hole", ) tex_attrib = { "type": "cube", } mat_attrib = { "texrepeat": "1 1", "specular": "0.4", "shininess": "0.1", } greenwood = CustomMaterial( texture="WoodGreen", tex_name="greenwood", mat_name="greenwood_mat", tex_attrib=tex_attrib, mat_attrib=mat_attrib, ) self.peg = CylinderObject( name="peg", size_min=(self.peg_radius[0], self.peg_length), size_max=(self.peg_radius[1], self.peg_length), material=greenwood, rgba=[0, 1, 0, 1], ) # Load hole object self.hole_obj = self.hole.get_collision(site=True) self.hole_obj.set("quat", "0 0 0.707 0.707") self.hole_obj.set("pos", "0.11 0 0.17") self.model.merge_asset(self.hole) # Load peg object self.peg_obj = self.peg.get_collision(site=True) self.peg_obj.set("pos", array_to_string((0, 0, self.peg_length))) self.model.merge_asset(self.peg) # Depending on env configuration, append appropriate objects to arms if self.env_configuration == "bimanual": self.model.worldbody.find(".//body[@name='{}']".format( self.robots[0].robot_model.eef_name["left"])).append( self.hole_obj) self.model.worldbody.find(".//body[@name='{}']".format( self.robots[0].robot_model.eef_name["right"])).append( self.peg_obj) else: self.model.worldbody.find(".//body[@name='{}']".format( self.robots[1].robot_model.eef_name)).append(self.hole_obj) self.model.worldbody.find(".//body[@name='{}']".format( self.robots[0].robot_model.eef_name)).append(self.peg_obj) def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() # Additional object references from this env self.hole_body_id = self.sim.model.body_name2id("hole") self.peg_body_id = self.sim.model.body_name2id("peg") def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: `'robot-state'`: contains robot-centric information. `'object-state'`: requires @self.use_object_obs to be True. Contains object-centric information. `'image'`: requires @self.use_camera_obs to be True. Contains a rendered frame from the simulation. `'depth'`: requires @self.use_camera_obs and @self.camera_depth to be True. Contains a rendered depth map from the simulation Returns: OrderedDict: Observations from the environment """ di = super()._get_observation() # low-level object information if self.use_object_obs: # Get robot prefix if self.env_configuration == "bimanual": pr0 = self.robots[0].robot_model.naming_prefix + "left_" pr1 = self.robots[0].robot_model.naming_prefix + "right_" else: pr0 = self.robots[0].robot_model.naming_prefix pr1 = self.robots[1].robot_model.naming_prefix # position and rotation of peg and hole hole_pos = np.array(self.sim.data.body_xpos[self.hole_body_id]) hole_quat = T.convert_quat( self.sim.data.body_xquat[self.hole_body_id], to="xyzw") di["hole_pos"] = hole_pos di["hole_quat"] = hole_quat peg_pos = np.array(self.sim.data.body_xpos[self.peg_body_id]) peg_quat = T.convert_quat( self.sim.data.body_xquat[self.peg_body_id], to="xyzw") di["peg_to_hole"] = peg_pos - hole_pos di["peg_quat"] = peg_quat # Relative orientation parameters t, d, cos = self._compute_orientation() di["angle"] = cos di["t"] = t di["d"] = d di["object-state"] = np.concatenate([ di["hole_pos"], di["hole_quat"], di["peg_to_hole"], di["peg_quat"], [di["angle"]], [di["t"]], [di["d"]], ]) return di def _check_success(self): """ Check if peg is successfully aligned and placed within the hole Returns: bool: True if peg is placed in hole correctly """ t, d, cos = self._compute_orientation() return d < 0.06 and -0.12 <= t <= 0.14 and cos > 0.95 def _compute_orientation(self): """ Helper function to return the relative positions between the hole and the peg. In particular, the intersection of the line defined by the peg and the plane defined by the hole is computed; the parallel distance, perpendicular distance, and angle are returned. Returns: 3-tuple: - (float): parallel distance - (float): perpendicular distance - (float): angle """ peg_mat = self.sim.data.body_xmat[self.peg_body_id] peg_mat.shape = (3, 3) peg_pos = self.sim.data.body_xpos[self.peg_body_id] hole_pos = self.sim.data.body_xpos[self.hole_body_id] hole_mat = self.sim.data.body_xmat[self.hole_body_id] hole_mat.shape = (3, 3) v = peg_mat @ np.array([0, 0, 1]) v = v / np.linalg.norm(v) center = hole_pos + hole_mat @ np.array([0.1, 0, 0]) t = (center - peg_pos) @ v / (np.linalg.norm(v)**2) d = np.linalg.norm(np.cross(v, peg_pos - center)) / np.linalg.norm(v) hole_normal = hole_mat @ np.array([0, 0, 1]) return ( t, d, abs( np.dot(hole_normal, v) / np.linalg.norm(hole_normal) / np.linalg.norm(v)), ) def _peg_pose_in_hole_frame(self): """ A helper function that takes in a named data field and returns the pose of that object in the base frame. Returns: np.array: (4,4) matrix corresponding to the pose of the peg in the hole frame """ # World frame peg_pos_in_world = self.sim.data.get_body_xpos("peg") peg_rot_in_world = self.sim.data.get_body_xmat("peg").reshape((3, 3)) peg_pose_in_world = T.make_pose(peg_pos_in_world, peg_rot_in_world) # World frame hole_pos_in_world = self.sim.data.get_body_xpos("hole") hole_rot_in_world = self.sim.data.get_body_xmat("hole").reshape((3, 3)) hole_pose_in_world = T.make_pose(hole_pos_in_world, hole_rot_in_world) world_pose_in_hole = T.pose_inv(hole_pose_in_world) peg_pose_in_hole = T.pose_in_A_to_pose_in_B(peg_pose_in_world, world_pose_in_hole) return peg_pose_in_hole def _check_robot_configuration(self, robots): """ Sanity check to make sure the inputted robots and configuration is acceptable Args: robots (str or list of str): Robots to instantiate within this env """ robots = robots if type(robots) == list or type(robots) == tuple else [ robots ] if self.env_configuration == "single-arm-opposed" or self.env_configuration == "single-arm-parallel": # Specifically two robots should be inputted! is_bimanual = False if type(robots) is not list or len(robots) != 2: raise ValueError( "Error: Exactly two single-armed robots should be inputted " "for this task configuration!") elif self.env_configuration == "bimanual": is_bimanual = True # Specifically one robot should be inputted! if type(robots) is list and len(robots) != 1: raise ValueError( "Error: Exactly one bimanual robot should be inputted " "for this task configuration!") else: # This is an unknown env configuration, print error raise ValueError( "Error: Unknown environment configuration received. Only 'bimanual'," "'single-arm-parallel', and 'single-arm-opposed' are supported. Got: {}" .format(self.env_configuration)) # Lastly, check to make sure all inputted robot names are of their correct type (bimanual / not bimanual) for robot in robots: if check_bimanual(robot) != is_bimanual: raise ValueError( "Error: For {} configuration, expected bimanual check to return {}; " "instead, got {}.".format(self.env_configuration, is_bimanual, check_bimanual(robot)))
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose(s) accordingly if self.env_configuration == "bimanual": xpos = self.robots[0].robot_model.base_xpos_offset["empty"] self.robots[0].robot_model.set_base_xpos(xpos) else: if self.env_configuration == "single-arm-opposed": # Set up robots facing towards each other by rotating them from their default position for robot, rotation in zip(self.robots, (np.pi / 2, -np.pi / 2)): xpos = robot.robot_model.base_xpos_offset["empty"] rot = np.array((0, 0, rotation)) xpos = T.euler2mat(rot) @ np.array(xpos) robot.robot_model.set_base_xpos(xpos) robot.robot_model.set_base_ori(rot) else: # "single-arm-parallel" configuration setting # Set up robots parallel to each other but offset from the center for robot, offset in zip(self.robots, (-0.25, 0.25)): xpos = robot.robot_model.base_xpos_offset["empty"] xpos = np.array(xpos) + np.array((0, offset, 0)) robot.robot_model.set_base_xpos(xpos) # Add arena and robot mujoco_arena = EmptyArena() # Arena always gets set to zero origin mujoco_arena.set_origin([0, 0, 0]) # Modify default agentview camera mujoco_arena.set_camera(camera_name="agentview", pos=[ 1.0666432116509934, 1.4903257668114777e-08, 2.0563394967349096 ], quat=[ 0.6530979871749878, 0.27104058861732483, 0.27104055881500244, 0.6530978679656982 ]) # initialize objects of interest self.hole = PlateWithHoleObject(name="hole") tex_attrib = { "type": "cube", } mat_attrib = { "texrepeat": "1 1", "specular": "0.4", "shininess": "0.1", } greenwood = CustomMaterial( texture="WoodGreen", tex_name="greenwood", mat_name="greenwood_mat", tex_attrib=tex_attrib, mat_attrib=mat_attrib, ) self.peg = CylinderObject( name="peg", size_min=(self.peg_radius[0], self.peg_length), size_max=(self.peg_radius[1], self.peg_length), material=greenwood, rgba=[0, 1, 0, 1], joints=None, ) # Load hole object hole_obj = self.hole.get_obj() hole_obj.set("quat", "0 0 0.707 0.707") hole_obj.set("pos", "0.11 0 0.17") # Load peg object peg_obj = self.peg.get_obj() peg_obj.set("pos", array_to_string((0, 0, self.peg_length))) # Append appropriate objects to arms if self.env_configuration == "bimanual": r_eef, l_eef = [ self.robots[0].robot_model.eef_name[arm] for arm in self.robots[0].arms ] r_model, l_model = [ self.robots[0].robot_model, self.robots[0].robot_model ] else: r_eef, l_eef = [ robot.robot_model.eef_name for robot in self.robots ] r_model, l_model = [ self.robots[0].robot_model, self.robots[1].robot_model ] r_body = find_elements(root=r_model.worldbody, tags="body", attribs={"name": r_eef}, return_first=True) l_body = find_elements(root=l_model.worldbody, tags="body", attribs={"name": l_eef}, return_first=True) r_body.append(peg_obj) l_body.append(hole_obj) # task includes arena, robot, and objects of interest # We don't add peg and hole directly since they were already appended to the robots self.model = ManipulationTask( mujoco_arena=mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], ) # Make sure to add relevant assets from peg and hole objects self.model.merge_assets(self.hole) self.model.merge_assets(self.peg)
class BaxterPegInHole(BaxterEnv): """ This class corresponds to the peg in hole task for the Baxter robot. There's a cylinder attached to one gripper and a hole attached to the other one. """ def __init__(self, cylinder_radius=(0.015, 0.03), cylinder_length=0.13, use_object_obs=True, reward_shaping=True, **kwargs): """ Args: cylinder_radius (2-tuple): low and high limits of the (uniformly sampled) radius of the cylinder cylinder_length (float): length of the cylinder use_object_obs (bool): if True, include object information in the observation. reward_shaping (bool): if True, use dense rewards Inherits the Baxter environment; refer to other parameters described there. """ # initialize objects of interest self.hole = PlateWithHoleObject() cylinder_radius = np.random.uniform(0.015, 0.03) self.cylinder = CylinderObject( size_min=(cylinder_radius, cylinder_length), size_max=(cylinder_radius, cylinder_length), ) self.mujoco_objects = OrderedDict() # whether to use ground-truth object states self.use_object_obs = use_object_obs # reward configuration self.reward_shaping = reward_shaping super().__init__(gripper_left=None, gripper_right=None, **kwargs) def _load_model(self): """ Loads the peg and the hole models. """ super()._load_model() self.mujoco_robot.set_base_xpos([0, 0, 0]) # Add arena and robot self.model = MujocoWorldBase() self.arena = EmptyArena() if self.use_indicator_object: self.arena.add_pos_indicator() self.model.merge(self.arena) self.model.merge(self.mujoco_robot) # Load hole object self.hole_obj = self.hole.get_collision(name="hole", site=True) self.hole_obj.set("quat", "0 0 0.707 0.707") self.hole_obj.set("pos", "0.11 0 0.18") self.model.merge_asset(self.hole) self.model.worldbody.find(".//body[@name='left_hand']").append( self.hole_obj) # Load cylinder object self.cyl_obj = self.cylinder.get_collision(name="cylinder", site=True) self.cyl_obj.set("pos", "0 0 0.15") self.model.merge_asset(self.cylinder) self.model.worldbody.find(".//body[@name='right_hand']").append( self.cyl_obj) self.model.worldbody.find(".//geom[@name='cylinder']").set( "rgba", "0 1 0 1") def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flattened array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() self.hole_body_id = self.sim.model.body_name2id("hole") self.cyl_body_id = self.sim.model.body_name2id("cylinder") def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() def _compute_orientation(self): """ Helper function to return the relative positions between the hole and the peg. In particular, the intersection of the line defined by the peg and the plane defined by the hole is computed; the parallel distance, perpendicular distance, and angle are returned. """ cyl_mat = self.sim.data.body_xmat[self.cyl_body_id] cyl_mat.shape = (3, 3) cyl_pos = self.sim.data.body_xpos[self.cyl_body_id] hole_pos = self.sim.data.body_xpos[self.hole_body_id] hole_mat = self.sim.data.body_xmat[self.hole_body_id] hole_mat.shape = (3, 3) v = cyl_mat @ np.array([0, 0, 1]) v = v / np.linalg.norm(v) center = hole_pos + hole_mat @ np.array([0.1, 0, 0]) t = (center - cyl_pos) @ v / (np.linalg.norm(v)**2) d = np.linalg.norm(np.cross(v, cyl_pos - center)) / np.linalg.norm(v) hole_normal = hole_mat @ np.array([0, 0, 1]) return ( t, d, abs( np.dot(hole_normal, v) / np.linalg.norm(hole_normal) / np.linalg.norm(v)), ) def reward(self, action): """ Reward function for the task. The sparse reward is 0 if the peg is outside the hole, and 1 if it's inside. We enforce that it's inside at an appropriate angle (cos(theta) > 0.95). The dense reward has four components. Reaching: in [0, 1], to encourage the arms to get together. Perpendicular and parallel distance: in [0,1], for the same purpose. Cosine of the angle: in [0, 1], to encourage having the right orientation. """ reward = 0 t, d, cos = self._compute_orientation() # Right location and angle if d < 0.06 and t >= -0.12 and t <= 0.14 and cos > 0.95: reward = 1 # use a shaping reward if self.reward_shaping: # reaching reward hole_pos = self.sim.data.body_xpos[self.hole_body_id] gripper_site_pos = self.sim.data.body_xpos[self.cyl_body_id] dist = np.linalg.norm(gripper_site_pos - hole_pos) reaching_reward = 1 - np.tanh(1.0 * dist) reward += reaching_reward # Orientation reward reward += 1 - np.tanh(d) reward += 1 - np.tanh(np.abs(t)) reward += cos return reward def _peg_pose_in_hole_frame(self): """ A helper function that takes in a named data field and returns the pose of that object in the base frame. """ # World frame peg_pos_in_world = self.sim.data.get_body_xpos("cylinder") peg_rot_in_world = self.sim.data.get_body_xmat("cylinder").reshape( (3, 3)) peg_pose_in_world = T.make_pose(peg_pos_in_world, peg_rot_in_world) # World frame hole_pos_in_world = self.sim.data.get_body_xpos("hole") hole_rot_in_world = self.sim.data.get_body_xmat("hole").reshape((3, 3)) hole_pose_in_world = T.make_pose(hole_pos_in_world, hole_rot_in_world) world_pose_in_hole = T.pose_inv(hole_pose_in_world) peg_pose_in_hole = T.pose_in_A_to_pose_in_B(peg_pose_in_world, world_pose_in_hole) return peg_pose_in_hole def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: robot-state: contains robot-centric information. object-state: requires @self.use_object_obs to be True. contains object-centric information. image: requires @self.use_camera_obs to be True. contains a rendered frame from the simulation. depth: requires @self.use_camera_obs and @self.camera_depth to be True. contains a rendered depth map from the simulation """ di = super()._get_observation() # camera observations if self.use_camera_obs: camera_obs = self.sim.render( camera_name=self.camera_name, width=self.camera_width, height=self.camera_height, depth=self.camera_depth, ) if self.camera_depth: di["image"], di["depth"] = camera_obs else: di["image"] = camera_obs # low-level object information if self.use_object_obs: # position and rotation of cylinder and hole hole_pos = self.sim.data.body_xpos[self.hole_body_id] hole_quat = T.convert_quat( self.sim.data.body_xquat[self.hole_body_id], to="xyzw") di["hole_pos"] = hole_pos di["hole_quat"] = hole_quat cyl_pos = self.sim.data.body_xpos[self.cyl_body_id] cyl_quat = T.convert_quat( self.sim.data.body_xquat[self.cyl_body_id], to="xyzw") di["cyl_to_hole"] = cyl_pos - hole_pos di["cyl_quat"] = cyl_quat # Relative orientation parameters t, d, cos = self._compute_orientation() di["angle"] = cos di["t"] = t di["d"] = d di["object-state"] = np.concatenate([ di["hole_pos"], di["hole_quat"], di["cyl_to_hole"], di["cyl_quat"], [di["angle"]], [di["t"]], [di["d"]], ]) return di def _check_contact(self): """ Returns True if gripper is in contact with an object. """ collision = False contact_geoms = (self.gripper_right.contact_geoms() + self.gripper_left.contact_geoms()) for contact in self.sim.data.contact[:self.sim.data.ncon]: if (self.sim.model.geom_id2name(contact.geom1) in contact_geoms or self.sim.model.geom_id2name( contact.geom2) in contact_geoms): collision = True break return collision def _check_success(self): """ Returns True if task is successfully completed. """ t, d, cos = self._compute_orientation() return d < 0.06 and t >= -0.12 and t <= 0.14 and cos > 0.95
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace mujoco_arena = TableArena( table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=self.table_offset, ) # Arena always gets set to zero origin mujoco_arena.set_origin([0, 0, 0]) # Load hole object # register object with the corresponding option (objectClass, name, xrange, yrange) if self.task_configs['board'] == 'GMC_assembly': self.register_object(my_object.GMC_Assembly_Object, 'plate', xrange=[0, 0], yrange=[0, 0]) if self.task_configs['board'] == 'GMC_plate': self.register_object(my_object.GMC_Plate_Object, 'plate', xrange=[0, 0], yrange=[0, 0]) if self.task_configs['board'] == 'Square_hole_16mm': self.register_object(my_object.square_hole_16mm, 'plate', xrange=[0, 0], yrange=[0, 0]) # Load peg object if self.task_configs['peg'] == '16mm': self.register_object(my_object.Round_peg_16mm_Object, 'peg', xrange=[-0.1, -0.13], yrange=[0.3, 0.33]) elif self.task_configs['peg'] == '12mm': self.register_object(my_object.Round_peg_12mm_Object, 'peg', xrange=[-0.1, -0.13], yrange=[0.3, 0.33]) elif self.task_configs['peg'] == '9mm': raise NotImplementedError elif self.task_configs['peg'] == 'cylinder_16mm': from robosuite.models.objects.primitive import CylinderObject self.peg = CylinderObject('peg', size=(0.007, 0.025)) self.objects_of_interest.append(self.peg) self.objectsName_of_interest.append('peg') self.objectsXrange_of_interest.append([-0.1, -0.13]) self.objectsYrange_of_interest.append([0.3, 0.33]) # Create Sequential Sampler. The order is same as the order of register. # Create individual samplers per object self.placement_initializer = SequentialCompositeSampler( name="ObjectSampler") for obj_name, default_xy_range in zip( self.objectsName_of_interest, zip(self.objectsXrange_of_interest, self.objectsYrange_of_interest)): self.placement_initializer.append_sampler( sampler=UniformRandomSampler( name=f"{obj_name}Sampler", x_range=default_xy_range[0], y_range=default_xy_range[1], rotation=None, rotation_axis='z', ensure_object_boundary_in_range=True, ensure_valid_placement=True, reference_pos=self.table_offset, z_offset=0.01, )) # Add objects to the sampler for obj_to_put, obj_name in zip(self.objects_of_interest, self.objectsName_of_interest): self.placement_initializer.add_objects_to_sampler( sampler_name=f"{obj_name}Sampler", mujoco_objects=obj_to_put) if self.task_configs['board'] == 'hole': self.plate = PlateWithHoleObject(name='plate') plate_obj = self.plate.get_obj() plate_obj.set("quat", "0 0 0 1") plate_obj.set("pos", "0 0 {}".format(self.table_offset[2])) self.objects_of_interest.append(self.plate) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.objects_of_interest, )
class MyEnv(SingleArmEnv): ''' my environment ''' def __init__( self, robots, env_configuration="default", task_configs=None, controller_configs=None, gripper_types="default", initialization_noise="default", table_full_size=(0.8, 0.8, 0.05), table_friction=(1., 5e-3, 1e-4), use_camera_obs=True, use_object_obs=True, reward_scale=1.0, reward_shaping=False, has_renderer=False, has_offscreen_renderer=True, render_camera="frontview", render_collision_mesh=False, render_visual_mesh=True, render_gpu_device_id=-1, control_freq=20, horizon=1000, ignore_done=False, hard_reset=True, camera_names="agentview", camera_heights=256, camera_widths=256, camera_depths=False, ): # settings for table top self.table_full_size = table_full_size self.table_friction = table_friction self.table_offset = np.array((0, 0, 0.8)) # reward configuration self.reward_scale = reward_scale self.reward_shaping = reward_shaping # whether to use ground-truth object states self.use_object_obs = use_object_obs # task configuration self.task_configs = task_configs # list of MujocoObject that will be usedf in the task self.objects_of_interest = [] # for object self.objectsName_of_interest = [] # for name self.objectsXrange_of_interest = [] # x range for placement self.objectsYrange_of_interest = [] # y range for placement super().__init__( robots=robots, env_configuration=env_configuration, controller_configs=controller_configs, mount_types="default", gripper_types=gripper_types, initialization_noise=initialization_noise, use_camera_obs=use_camera_obs, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_camera=render_camera, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, render_gpu_device_id=render_gpu_device_id, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, hard_reset=hard_reset, camera_names=camera_names, camera_heights=camera_heights, camera_widths=camera_widths, camera_depths=camera_depths, ) def reward(self, action=None): pass def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace mujoco_arena = TableArena( table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=self.table_offset, ) # Arena always gets set to zero origin mujoco_arena.set_origin([0, 0, 0]) # Load hole object # register object with the corresponding option (objectClass, name, xrange, yrange) if self.task_configs['board'] == 'GMC_assembly': self.register_object(my_object.GMC_Assembly_Object, 'plate', xrange=[0, 0], yrange=[0, 0]) if self.task_configs['board'] == 'GMC_plate': self.register_object(my_object.GMC_Plate_Object, 'plate', xrange=[0, 0], yrange=[0, 0]) if self.task_configs['board'] == 'Square_hole_16mm': self.register_object(my_object.square_hole_16mm, 'plate', xrange=[0, 0], yrange=[0, 0]) # Load peg object if self.task_configs['peg'] == '16mm': self.register_object(my_object.Round_peg_16mm_Object, 'peg', xrange=[-0.1, -0.13], yrange=[0.3, 0.33]) elif self.task_configs['peg'] == '12mm': self.register_object(my_object.Round_peg_12mm_Object, 'peg', xrange=[-0.1, -0.13], yrange=[0.3, 0.33]) elif self.task_configs['peg'] == '9mm': raise NotImplementedError elif self.task_configs['peg'] == 'cylinder_16mm': from robosuite.models.objects.primitive import CylinderObject self.peg = CylinderObject('peg', size=(0.007, 0.025)) self.objects_of_interest.append(self.peg) self.objectsName_of_interest.append('peg') self.objectsXrange_of_interest.append([-0.1, -0.13]) self.objectsYrange_of_interest.append([0.3, 0.33]) # Create Sequential Sampler. The order is same as the order of register. # Create individual samplers per object self.placement_initializer = SequentialCompositeSampler( name="ObjectSampler") for obj_name, default_xy_range in zip( self.objectsName_of_interest, zip(self.objectsXrange_of_interest, self.objectsYrange_of_interest)): self.placement_initializer.append_sampler( sampler=UniformRandomSampler( name=f"{obj_name}Sampler", x_range=default_xy_range[0], y_range=default_xy_range[1], rotation=None, rotation_axis='z', ensure_object_boundary_in_range=True, ensure_valid_placement=True, reference_pos=self.table_offset, z_offset=0.01, )) # Add objects to the sampler for obj_to_put, obj_name in zip(self.objects_of_interest, self.objectsName_of_interest): self.placement_initializer.add_objects_to_sampler( sampler_name=f"{obj_name}Sampler", mujoco_objects=obj_to_put) if self.task_configs['board'] == 'hole': self.plate = PlateWithHoleObject(name='plate') plate_obj = self.plate.get_obj() plate_obj.set("quat", "0 0 0 1") plate_obj.set("pos", "0 0 {}".format(self.table_offset[2])) self.objects_of_interest.append(self.plate) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.objects_of_interest, ) def _setup_references(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._setup_references() # Additional object references from this env if self.plate is not None: self.plate_body_id = self.sim.model.body_name2id( self.plate.root_body) if self.peg is not None: self.peg_body_id = self.sim.model.body_name2id(self.peg.root_body) def _setup_observables(self): """ Sets up observables to be used for this environment. Creates object-based observables if enabled Returns: OrderedDict: Dictionary mapping observable names to its corresponding Observable object """ observables = super()._setup_observables() # low-level object information if self.use_object_obs: # Get robot prefix and define observables modality pf = self.robots[0].robot_model.naming_prefix modality = "object" sensors = [] names = [] # plate-related observables if self.plate is not None: @sensor(modality=modality) def plate_pos(obs_cache): return np.array( self.sim.data.body_xpos[self.plate_body_id]) @sensor(modality=modality) def plate_quat(obs_cache): return convert_quat(np.array( self.sim.data.body_xquat[self.plate_body_id]), to="xyzw") @sensor(modality=modality) def gripper_to_plate_pos(obs_cache): return obs_cache[f"{pf}eef_pos"] - obs_cache["plate_pos"] if \ f"{pf}eef_pos" in obs_cache and "plate_pos" in obs_cache else np.zeros(3) sensors_plate = [plate_pos, plate_quat, gripper_to_plate_pos] names_plate = [s.__name__ for s in sensors_plate] sensors.extend(sensors_plate) names.extend(names_plate) # peg-related observables if self.peg is not None: @sensor(modality=modality) def peg_pos(obs_cache): return np.array(self.sim.data.body_xpos[self.peg_body_id]) @sensor(modality=modality) def peg_quat(obs_cache): return convert_quat(np.array( self.sim.data.body_xquat[self.peg_body_id]), to="xyzw") @sensor(modality=modality) def gripper_to_peg_pos(obs_cache): return obs_cache[f"{pf}eef_pos"] - obs_cache["peg_pos"] if \ f"{pf}eef_pos" in obs_cache and "peg_pos" in obs_cache else np.zeros(3) sensors_peg = [peg_pos, peg_quat, gripper_to_peg_pos] names_peg = [s.__name__ for s in sensors_peg] sensors.extend(sensors_peg) names.extend(names_peg) # Create observables for name, s in zip(names, sensors): observables[name] = Observable( name=name, sensor=s, sampling_rate=self.control_freq, ) return observables def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() # Reset all object positions using initializer sampler if we're not directly loading from an xml if not self.deterministic_reset: # Sample from the placement initializer for all objects object_placements = self.placement_initializer.sample() # Loop through all objects and reset their positions for obj_pos, obj_quat, obj in object_placements.values(): self.sim.data.set_joint_qpos( obj.joints[0], np.concatenate([np.array(obj_pos), np.array(obj_quat)])) def register_object(self, objectClass, name, xrange=[0, 0], yrange=[0, 0]): # initialize object with the corresponding option exec('self.{} = objectClass(name=name)'.format(name)) exec('self.objects_of_interest.append(self.{})'.format(name)) self.objectsName_of_interest.append(name) self.objectsXrange_of_interest.append(xrange) self.objectsYrange_of_interest.append(yrange) def visualize(self, vis_settings): # Run superclass method first super().visualize(vis_settings=vis_settings) # Color the gripper visualization site according to its distance to the cube if vis_settings["grippers"]: self._visualize_gripper_to_target(gripper=self.robots[0].gripper, target=self.cube) def _check_success(self): pass def get_ee_force(self): return self.ee_force
class TwoArmPegInHole(TwoArmEnv): """ This class corresponds to the peg-in-hole task for two robot arms. Args: robots (str or list of str): Specification for specific robot arm(s) to be instantiated within this env (e.g: "Sawyer" would generate one arm; ["Panda", "Panda", "Sawyer"] would generate three robot arms) Note: Must be either 2 single single-arm robots or 1 bimanual robot! env_configuration (str): Specifies how to position the robots within the environment. Can be either: :`'bimanual'`: Only applicable for bimanual robot setups. Sets up the (single) bimanual robot on the -x side of the table :`'single-arm-parallel'`: Only applicable for multi single arm setups. Sets up the (two) single armed robots next to each other on the -x side of the table :`'single-arm-opposed'`: Only applicable for multi single arm setups. Sets up the (two) single armed robots opposed from each others on the opposite +/-y sides of the table. Note that "default" corresponds to either "bimanual" if a bimanual robot is used or "single-arm-opposed" if two single-arm robots are used. controller_configs (str or list of dict): If set, contains relevant controller parameters for creating a custom controller. Else, uses the default controller for this specific task. Should either be single dict if same controller is to be used for all robots or else it should be a list of the same length as "robots" param gripper_types (str or list of str): type of gripper, used to instantiate gripper models from gripper factory. For this environment, setting a value other than the default (None) will raise an AssertionError, as this environment is not meant to be used with any gripper at all. initialization_noise (dict or list of dict): Dict containing the initialization noise parameters. The expected keys and corresponding value types are specified below: :`'magnitude'`: The scale factor of uni-variate random noise applied to each of a robot's given initial joint positions. Setting this value to `None` or 0.0 results in no noise being applied. If "gaussian" type of noise is applied then this magnitude scales the standard deviation applied, If "uniform" type of noise is applied then this magnitude sets the bounds of the sampling range :`'type'`: Type of noise to apply. Can either specify "gaussian" or "uniform" Should either be single dict if same noise value is to be used for all robots or else it should be a list of the same length as "robots" param :Note: Specifying "default" will automatically use the default noise settings. Specifying None will automatically create the required dict with "magnitude" set to 0.0. use_camera_obs (bool or list of bool): if True, every observation for a specific robot includes a rendered image. Should either be single bool if camera obs value is to be used for all robots or else it should be a list of the same length as "robots" param use_object_obs (bool): if True, include object (cube) information in the observation. reward_scale (None or float): Scales the normalized reward function by the amount specified. If None, environment reward remains unnormalized reward_shaping (bool): if True, use dense rewards. peg_radius (2-tuple): low and high limits of the (uniformly sampled) radius of the peg peg_length (float): length of the peg has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering render_camera (str): Name of camera to render if `has_renderer` is True. Setting this value to 'None' will result in the default angle being applied, which is useful as it can be dragged / panned by the user using the mouse render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. render_gpu_device_id (int): corresponds to the GPU device id to use for offscreen rendering. Defaults to -1, in which case the device will be inferred from environment variables (GPUS or CUDA_VISIBLE_DEVICES). control_freq (float): how many control signals to receive in every second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). hard_reset (bool): If True, re-loads model, sim, and render object upon a reset call, else, only calls sim.reset and resets all robosuite-internal variables camera_names (str or list of str): name of camera to be rendered. Should either be single str if same name is to be used for all cameras' rendering or else it should be a list of cameras to render. :Note: At least one camera must be specified if @use_camera_obs is True. :Note: To render all robots' cameras of a certain type (e.g.: "robotview" or "eye_in_hand"), use the convention "all-{name}" (e.g.: "all-robotview") to automatically render all camera images from each robot's camera list). camera_heights (int or list of int): height of camera frame. Should either be single int if same height is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_widths (int or list of int): width of camera frame. Should either be single int if same width is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_depths (bool or list of bool): True if rendering RGB-D, and RGB otherwise. Should either be single bool if same depth setting is to be used for all cameras or else it should be a list of the same length as "camera names" param. camera_segmentations (None or str or list of str or list of list of str): Camera segmentation(s) to use for each camera. Valid options are: `None`: no segmentation sensor used `'instance'`: segmentation at the class-instance level `'class'`: segmentation at the class level `'element'`: segmentation at the per-geom level If not None, multiple types of segmentations can be specified. A [list of str / str or None] specifies [multiple / a single] segmentation(s) to use for all cameras. A list of list of str specifies per-camera segmentation setting(s) to use. Raises: AssertionError: [Gripper specified] ValueError: [Invalid number of robots specified] ValueError: [Invalid env configuration] ValueError: [Invalid robots for specified env configuration] """ def __init__( self, robots, env_configuration="default", controller_configs=None, gripper_types=None, initialization_noise="default", use_camera_obs=True, use_object_obs=True, reward_scale=1.0, reward_shaping=False, peg_radius=(0.015, 0.03), peg_length=0.13, has_renderer=False, has_offscreen_renderer=True, render_camera="frontview", render_collision_mesh=False, render_visual_mesh=True, render_gpu_device_id=-1, control_freq=20, horizon=1000, ignore_done=False, hard_reset=True, camera_names="agentview", camera_heights=256, camera_widths=256, camera_depths=False, camera_segmentations=None, # {None, instance, class, element} renderer="mujoco", renderer_config=None, ): # Assert that the gripper type is None assert gripper_types is None, "Tried to specify gripper other than None in TwoArmPegInHole environment!" # reward configuration self.reward_scale = reward_scale self.reward_shaping = reward_shaping # whether to use ground-truth object states self.use_object_obs = use_object_obs # Save peg specs self.peg_radius = peg_radius self.peg_length = peg_length super().__init__( robots=robots, env_configuration=env_configuration, controller_configs=controller_configs, mount_types="default", gripper_types=gripper_types, initialization_noise=initialization_noise, use_camera_obs=use_camera_obs, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_camera=render_camera, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, render_gpu_device_id=render_gpu_device_id, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, hard_reset=hard_reset, camera_names=camera_names, camera_heights=camera_heights, camera_widths=camera_widths, camera_depths=camera_depths, camera_segmentations=camera_segmentations, renderer=renderer, renderer_config=renderer_config, ) def reward(self, action=None): """ Reward function for the task. Sparse un-normalized reward: - a discrete reward of 5.0 is provided if the peg is inside the plate's hole - Note that we enforce that it's inside at an appropriate angle (cos(theta) > 0.95). Un-normalized summed components if using reward shaping: - Reaching: in [0, 1], to encourage the arms to approach each other - Perpendicular Distance: in [0,1], to encourage the arms to approach each other - Parallel Distance: in [0,1], to encourage the arms to approach each other - Alignment: in [0, 1], to encourage having the right orientation between the peg and hole. - Placement: in {0, 1}, nonzero if the peg is in the hole with a relatively correct alignment Note that the final reward is normalized and scaled by reward_scale / 5.0 as well so that the max score is equal to reward_scale """ reward = 0 # Right location and angle if self._check_success(): reward = 1.0 # use a shaping reward if self.reward_shaping: # Grab relevant values t, d, cos = self._compute_orientation() # reaching reward hole_pos = self.sim.data.body_xpos[self.hole_body_id] gripper_site_pos = self.sim.data.body_xpos[self.peg_body_id] dist = np.linalg.norm(gripper_site_pos - hole_pos) reaching_reward = 1 - np.tanh(1.0 * dist) reward += reaching_reward # Orientation reward reward += 1 - np.tanh(d) reward += 1 - np.tanh(np.abs(t)) reward += cos # if we're not reward shaping, scale sparse reward so that the max reward is identical to its dense version else: reward *= 5.0 if self.reward_scale is not None: reward *= self.reward_scale / 5.0 return reward def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose(s) accordingly if self.env_configuration == "bimanual": xpos = self.robots[0].robot_model.base_xpos_offset["empty"] self.robots[0].robot_model.set_base_xpos(xpos) else: if self.env_configuration == "single-arm-opposed": # Set up robots facing towards each other by rotating them from their default position for robot, rotation in zip(self.robots, (np.pi / 2, -np.pi / 2)): xpos = robot.robot_model.base_xpos_offset["empty"] rot = np.array((0, 0, rotation)) xpos = T.euler2mat(rot) @ np.array(xpos) robot.robot_model.set_base_xpos(xpos) robot.robot_model.set_base_ori(rot) else: # "single-arm-parallel" configuration setting # Set up robots parallel to each other but offset from the center for robot, offset in zip(self.robots, (-0.25, 0.25)): xpos = robot.robot_model.base_xpos_offset["empty"] xpos = np.array(xpos) + np.array((0, offset, 0)) robot.robot_model.set_base_xpos(xpos) # Add arena and robot mujoco_arena = EmptyArena() # Arena always gets set to zero origin mujoco_arena.set_origin([0, 0, 0]) # Modify default agentview camera mujoco_arena.set_camera(camera_name="agentview", pos=[ 1.0666432116509934, 1.4903257668114777e-08, 2.0563394967349096 ], quat=[ 0.6530979871749878, 0.27104058861732483, 0.27104055881500244, 0.6530978679656982 ]) # initialize objects of interest self.hole = PlateWithHoleObject(name="hole") tex_attrib = { "type": "cube", } mat_attrib = { "texrepeat": "1 1", "specular": "0.4", "shininess": "0.1", } greenwood = CustomMaterial( texture="WoodGreen", tex_name="greenwood", mat_name="greenwood_mat", tex_attrib=tex_attrib, mat_attrib=mat_attrib, ) self.peg = CylinderObject( name="peg", size_min=(self.peg_radius[0], self.peg_length), size_max=(self.peg_radius[1], self.peg_length), material=greenwood, rgba=[0, 1, 0, 1], joints=None, ) # Load hole object hole_obj = self.hole.get_obj() hole_obj.set("quat", "0 0 0.707 0.707") hole_obj.set("pos", "0.11 0 0.17") # Load peg object peg_obj = self.peg.get_obj() peg_obj.set("pos", array_to_string((0, 0, self.peg_length))) # Append appropriate objects to arms if self.env_configuration == "bimanual": r_eef, l_eef = [ self.robots[0].robot_model.eef_name[arm] for arm in self.robots[0].arms ] r_model, l_model = [ self.robots[0].robot_model, self.robots[0].robot_model ] else: r_eef, l_eef = [ robot.robot_model.eef_name for robot in self.robots ] r_model, l_model = [ self.robots[0].robot_model, self.robots[1].robot_model ] r_body = find_elements(root=r_model.worldbody, tags="body", attribs={"name": r_eef}, return_first=True) l_body = find_elements(root=l_model.worldbody, tags="body", attribs={"name": l_eef}, return_first=True) r_body.append(peg_obj) l_body.append(hole_obj) # task includes arena, robot, and objects of interest # We don't add peg and hole directly since they were already appended to the robots self.model = ManipulationTask( mujoco_arena=mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], ) # Make sure to add relevant assets from peg and hole objects self.model.merge_assets(self.hole) self.model.merge_assets(self.peg) def _setup_references(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._setup_references() # Additional object references from this env self.hole_body_id = self.sim.model.body_name2id(self.hole.root_body) self.peg_body_id = self.sim.model.body_name2id(self.peg.root_body) def _setup_observables(self): """ Sets up observables to be used for this environment. Creates object-based observables if enabled Returns: OrderedDict: Dictionary mapping observable names to its corresponding Observable object """ observables = super()._setup_observables() # low-level object information if self.use_object_obs: # Get robot prefix and define observables modality if self.env_configuration == "bimanual": pf0 = self.robots[0].robot_model.naming_prefix + "right_" pf1 = self.robots[0].robot_model.naming_prefix + "left_" else: pf0 = self.robots[0].robot_model.naming_prefix pf1 = self.robots[1].robot_model.naming_prefix modality = "object" # position and rotation of peg and hole @sensor(modality=modality) def hole_pos(obs_cache): return np.array(self.sim.data.body_xpos[self.hole_body_id]) @sensor(modality=modality) def hole_quat(obs_cache): return T.convert_quat( self.sim.data.body_xquat[self.hole_body_id], to="xyzw") @sensor(modality=modality) def peg_to_hole(obs_cache): return obs_cache["hole_pos"] - np.array(self.sim.data.body_xpos[self.peg_body_id]) if \ "hole_pos" in obs_cache else np.zeros(3) @sensor(modality=modality) def peg_quat(obs_cache): return T.convert_quat( self.sim.data.body_xquat[self.peg_body_id], to="xyzw") # Relative orientation parameters @sensor(modality=modality) def angle(obs_cache): t, d, cos = self._compute_orientation() obs_cache["t"] = t obs_cache["d"] = d return cos @sensor(modality=modality) def t(obs_cache): return obs_cache["t"] if "t" in obs_cache else 0.0 @sensor(modality=modality) def d(obs_cache): return obs_cache["d"] if "d" in obs_cache else 0.0 sensors = [hole_pos, hole_quat, peg_to_hole, peg_quat, angle, t, d] names = [s.__name__ for s in sensors] # Create observables for name, s in zip(names, sensors): observables[name] = Observable( name=name, sensor=s, sampling_rate=self.control_freq, ) return observables def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() def _check_success(self): """ Check if peg is successfully aligned and placed within the hole Returns: bool: True if peg is placed in hole correctly """ t, d, cos = self._compute_orientation() return d < 0.06 and -0.12 <= t <= 0.14 and cos > 0.95 def _compute_orientation(self): """ Helper function to return the relative positions between the hole and the peg. In particular, the intersection of the line defined by the peg and the plane defined by the hole is computed; the parallel distance, perpendicular distance, and angle are returned. Returns: 3-tuple: - (float): parallel distance - (float): perpendicular distance - (float): angle """ peg_mat = self.sim.data.body_xmat[self.peg_body_id] peg_mat.shape = (3, 3) peg_pos = self.sim.data.body_xpos[self.peg_body_id] hole_pos = self.sim.data.body_xpos[self.hole_body_id] hole_mat = self.sim.data.body_xmat[self.hole_body_id] hole_mat.shape = (3, 3) v = peg_mat @ np.array([0, 0, 1]) v = v / np.linalg.norm(v) center = hole_pos + hole_mat @ np.array([0.1, 0, 0]) t = (center - peg_pos) @ v / (np.linalg.norm(v)**2) d = np.linalg.norm(np.cross(v, peg_pos - center)) / np.linalg.norm(v) hole_normal = hole_mat @ np.array([0, 0, 1]) return ( t, d, abs( np.dot(hole_normal, v) / np.linalg.norm(hole_normal) / np.linalg.norm(v)), ) def _peg_pose_in_hole_frame(self): """ A helper function that takes in a named data field and returns the pose of that object in the base frame. Returns: np.array: (4,4) matrix corresponding to the pose of the peg in the hole frame """ # World frame peg_pos_in_world = self.sim.data.get_body_xpos(self.peg.root_body) peg_rot_in_world = self.sim.data.get_body_xmat( self.peg.root_body).reshape((3, 3)) peg_pose_in_world = T.make_pose(peg_pos_in_world, peg_rot_in_world) # World frame hole_pos_in_world = self.sim.data.get_body_xpos(self.hole.root_body) hole_rot_in_world = self.sim.data.get_body_xmat( self.hole.root_body).reshape((3, 3)) hole_pose_in_world = T.make_pose(hole_pos_in_world, hole_rot_in_world) world_pose_in_hole = T.pose_inv(hole_pose_in_world) peg_pose_in_hole = T.pose_in_A_to_pose_in_B(peg_pose_in_world, world_pose_in_hole) return peg_pose_in_hole