def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Verify the correct robot has been loaded assert isinstance(self.robots[0], SingleArm), \ "Error: Expected one single-armed robot! Got {} type instead.".format(type(self.robots[0])) # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace self.mujoco_arena = PegsArena(table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=(0, 0, 0.82)) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # Arena always gets set to zero origin self.mujoco_arena.set_origin([0, 0, 0]) # define mujoco objects self.ob_inits = [SquareNutObject, RoundNutObject] self.item_names = ["SquareNut", "RoundNut"] self.item_names_org = list(self.item_names) self.obj_to_use = (self.item_names[1] + "{}").format(0) self.ngeoms = [5, 9] lst = [] for i in range(len(self.ob_inits)): ob_name = (self.item_names[i] + "0") ob = self.ob_inits[i]( name=ob_name, joints=[dict(type="free", damping="0.0005") ], # damp the free joint for each object ) lst.append((ob_name, ob)) self.mujoco_objects = OrderedDict(lst) self.n_objects = len(self.mujoco_objects) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=self.mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.mujoco_objects, visual_objects=None, initializer=self.placement_initializer, ) # set positions of objects self.model.place_objects()
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose(s) accordingly if self.env_configuration == "bimanual": xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) else: if self.env_configuration == "single-arm-opposed": # Set up robots facing towards each other by rotating them from their default position for robot, rotation in zip(self.robots, (np.pi / 2, -np.pi / 2)): xpos = robot.robot_model.base_xpos_offset["table"]( self.table_full_size[0]) rot = np.array((0, 0, rotation)) xpos = T.euler2mat(rot) @ np.array(xpos) robot.robot_model.set_base_xpos(xpos) robot.robot_model.set_base_ori(rot) else: # "single-arm-parallel" configuration setting # Set up robots parallel to each other but offset from the center for robot, offset in zip(self.robots, (-0.25, 0.25)): xpos = robot.robot_model.base_xpos_offset["table"]( self.table_full_size[0]) xpos = np.array(xpos) + np.array((0, offset, 0)) robot.robot_model.set_base_xpos(xpos) # load model for table top workspace self.mujoco_arena = TableArena( table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=(0, 0, 0.8), ) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # Arena always gets set to zero origin self.mujoco_arena.set_origin([0, 0, 0]) # initialize objects of interest self.pot = PotWithHandlesObject(name="pot") self.mujoco_objects = OrderedDict([("pot", self.pot)]) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=self.mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.mujoco_objects, visual_objects=None, initializer=self.placement_initializer, ) self.model.place_objects()
def _load_model(self): super()._load_model() # there may be a banana arena = EmptyArena() # sphere = BallObject( # name="sphere", # size=[0.04], # rgba=[0, 0.5, 0.5, 1] # ) # # Create placement initializer # if self.placement_initializer is not None: # self.placement_initializer.reset() # self.placement_initializer.add_objects(sphere) # else: # self.placement_initializer = UniformRandomSampler( # name="ObjectSampler", # mujoco_objects=sphere, # x_range=[-0.03, 0.03], # y_range=[-0.03, 0.03], # rotation=None, # ensure_object_boundary_in_range=False, # ensure_valid_placement=True, # z_offset=0.01, # ) self.model = ManipulationTask( mujoco_arena=arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=None, )
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # Get robot's contact geoms self.robot_contact_geoms = self.robots[0].robot_model.contact_geoms mujoco_arena = WipeArena( table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=self.table_offset, table_friction_std=self.table_friction_std, coverage_factor=self.coverage_factor, num_markers=self.num_markers, line_width=self.line_width, two_clusters=self.two_clusters, ) # Arena always gets set to zero origin mujoco_arena.set_origin([0, 0, 0]) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], )
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace mujoco_arena = TableArena( table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=self.table_offset, ) # Arena always gets set to zero origin mujoco_arena.set_origin([0, 0, 0]) # initialize objects of interest self.cube = BoxObject( name="cube", size_min=[0.020, 0.020, 0.020], # [0.015, 0.015, 0.015], size_max=[0.022, 0.022, 0.022], # [0.018, 0.018, 0.018]) rgba=[1, 0, 0, 1], ) self.goal = BoxObject( name="goal", size_min=[0.005, 0.005, 0.005], size_max=[0.005, 0.005, 0.005], rgba=[0, 0, 1, 1], density=11342, # Density of lead such that the goal is hard to move ) # Create placement initializer if self.placement_initializer is not None: self.placement_initializer.reset() self.placement_initializer.add_objects([self.cube, self.goal]) else: self.placement_initializer = UniformRandomSampler( name="ObjectSampler", mujoco_objects=[self.cube, self.goal], x_range=[-0.20, 0.20], y_range=[-0.20, 0.20], rotation=None, ensure_object_boundary_in_range=False, ensure_valid_placement=True, reference_pos=self.table_offset, z_offset=0.01, ) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=[self.cube, self.goal], )
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Verify the correct robot has been loaded assert isinstance(self.robots[0], SingleArm), \ "Error: Expected one single-armed robot! Got {} type instead.".format(type(self.robots[0])) # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace self.mujoco_arena = TableArena( table_full_size=self.table_full_size, table_offset=self.table_offset, ) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # Arena always gets set to zero origin self.mujoco_arena.set_origin([0, 0, 0]) # initialize objects of interest door = DoorObject( name="Door", friction=0.0, damping=0.1, lock=self.use_latch, joints=[], # ensures that door object does not have a free joint ) self.mujoco_objects = OrderedDict([("Door", door)]) self.n_objects = len(self.mujoco_objects) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=self.mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.mujoco_objects, visual_objects=None, initializer=self.placement_initializer, ) self.model.place_objects()
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Verify the correct robot has been loaded assert isinstance(self.robots[0], SingleArm), \ "Error: Expected one single-armed robot! Got {} type instead.".format(type(self.robots[0])) # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # Get robot's contact geoms self.robot_contact_geoms = self.robots[0].robot_model.contact_geoms # Delta goes down delta_height = min( 0, np.random.normal(self.table_height, self.table_height_std)) self.mujoco_arena = WipeArena( table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=np.array(self.table_offset) + np.array( (0, 0, delta_height)), table_friction_std=self.table_friction_std, coverage_factor=self.coverage_factor, num_sensors=self.num_sensors, line_width=self.line_width, two_clusters=self.two_clusters) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # Arena always gets set to zero origin self.mujoco_arena.set_origin([0, 0, 0]) self.mujoco_objects = OrderedDict() # task includes arena, robot, and objects of interest self.model = ManipulationTask( self.mujoco_arena, [robot.robot_model for robot in self.robots], self.mujoco_objects, initializer=self.placement_initializer) self.model.place_objects()
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"](self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace mujoco_arena = TableArena( table_full_size=self.table_full_size, table_offset=self.table_offset, ) # Arena always gets set to zero origin mujoco_arena.set_origin([0, 0, 0]) # Modify default agentview camera mujoco_arena.set_camera( camera_name="agentview", pos=[0.5986131746834771, -4.392035683362857e-09, 1.5903500240372423], quat=[0.6380177736282349, 0.3048497438430786, 0.30484986305236816, 0.6380177736282349] ) # initialize objects of interest self.door = DoorObject( name="Door", friction=0.0, damping=0.1, lock=self.use_latch, ) # Create placement initializer if self.placement_initializer is not None: self.placement_initializer.reset() self.placement_initializer.add_objects(self.door) else: self.placement_initializer = UniformRandomSampler( name="ObjectSampler", mujoco_objects=self.door, x_range=[0.07, 0.09], y_range=[-0.01, 0.01], rotation=(-np.pi / 2. - 0.25, -np.pi / 2.), rotation_axis='z', ensure_object_boundary_in_range=False, ensure_valid_placement=True, reference_pos=self.table_offset, ) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.door, )
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["bins"] self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace mujoco_arena = BinsArena( bin1_pos=self.bin1_pos, table_full_size=self.table_full_size, table_friction=self.table_friction ) # Arena always gets set to zero origin mujoco_arena.set_origin([0, 0, 0]) # store some arena attributes self.bin_size = mujoco_arena.table_full_size self.objects = [] self.visual_objects = [] for vis_obj_cls, obj_name in zip( (MilkVisualObject, BreadVisualObject, CerealVisualObject, CanVisualObject), self.obj_names, ): vis_name = "Visual" + obj_name vis_obj = vis_obj_cls(name=vis_name) self.visual_objects.append(vis_obj) for obj_cls, obj_name in zip( (MilkObject, BreadObject, CerealObject, CanObject), self.obj_names, ): obj = obj_cls(name=obj_name) self.objects.append(obj) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.visual_objects + self.objects, ) # Generate placement initializer self._get_placement_initializer()
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # load model for table top workspace mujoco_arena = EmptyArena() # Arena always gets set to zero origin mujoco_arena.set_origin([0, 0, 0]) # initialize objects of interest self.cube = BoxObject(name="cube") # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=[self.cube] )
def _load_model(self): """ Loads an xml model, puts it in self.model """ SingleArmEnv._load_model(self) # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace mujoco_arena = TableArena( table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=self.table_offset, ) # Arena always gets set to zero origin mujoco_arena.set_origin([0, 0, 0]) cube_material = self._get_cube_material() self.cube = BoxObject( name="cube", size_min=(0.025, 0.025, 0.025), size_max=(0.025, 0.025, 0.025), rgba=[1, 0, 0, 1], material=cube_material, ) self.placement_initializer.reset() self.placement_initializer.add_objects(self.cube) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.cube, )
class Door(RobotEnv): """ This class corresponds to the door opening task for a single robot arm. Args: robots (str or list of str): Specification for specific robot arm(s) to be instantiated within this env (e.g: "Sawyer" would generate one arm; ["Panda", "Panda", "Sawyer"] would generate three robot arms) Note: Must be a single single-arm robot! controller_configs (str or list of dict): If set, contains relevant controller parameters for creating a custom controller. Else, uses the default controller for this specific task. Should either be single dict if same controller is to be used for all robots or else it should be a list of the same length as "robots" param gripper_types (str or list of str): type of gripper, used to instantiate gripper models from gripper factory. Default is "default", which is the default grippers(s) associated with the robot(s) the 'robots' specification. None removes the gripper, and any other (valid) model overrides the default gripper. Should either be single str if same gripper type is to be used for all robots or else it should be a list of the same length as "robots" param gripper_visualizations (bool or list of bool): True if using gripper visualization. Useful for teleoperation. Should either be single bool if gripper visualization is to be used for all robots or else it should be a list of the same length as "robots" param initialization_noise (dict or list of dict): Dict containing the initialization noise parameters. The expected keys and corresponding value types are specified below: :`'magnitude'`: The scale factor of uni-variate random noise applied to each of a robot's given initial joint positions. Setting this value to `None` or 0.0 results in no noise being applied. If "gaussian" type of noise is applied then this magnitude scales the standard deviation applied, If "uniform" type of noise is applied then this magnitude sets the bounds of the sampling range :`'type'`: Type of noise to apply. Can either specify "gaussian" or "uniform" Should either be single dict if same noise value is to be used for all robots or else it should be a list of the same length as "robots" param :Note: Specifying "default" will automatically use the default noise settings. Specifying None will automatically create the required dict with "magnitude" set to 0.0. use_latch (bool): if True, uses a spring-loaded handle and latch to "lock" the door closed initially Otherwise, door is instantiated with a fixed handle use_camera_obs (bool): if True, every observation includes rendered image(s) use_object_obs (bool): if True, include object (cube) information in the observation. reward_scale (None or float): Scales the normalized reward function by the amount specified. If None, environment reward remains unnormalized reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. use_indicator_object (bool): if True, sets up an indicator object that is useful for debugging. has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering render_camera (str): Name of camera to render if `has_renderer` is True. Setting this value to 'None' will result in the default angle being applied, which is useful as it can be dragged / panned by the user using the mouse render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). hard_reset (bool): If True, re-loads model, sim, and render object upon a reset call, else, only calls sim.reset and resets all robosuite-internal variables camera_names (str or list of str): name of camera to be rendered. Should either be single str if same name is to be used for all cameras' rendering or else it should be a list of cameras to render. :Note: At least one camera must be specified if @use_camera_obs is True. :Note: To render all robots' cameras of a certain type (e.g.: "robotview" or "eye_in_hand"), use the convention "all-{name}" (e.g.: "all-robotview") to automatically render all camera images from each robot's camera list). camera_heights (int or list of int): height of camera frame. Should either be single int if same height is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_widths (int or list of int): width of camera frame. Should either be single int if same width is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_depths (bool or list of bool): True if rendering RGB-D, and RGB otherwise. Should either be single bool if same depth setting is to be used for all cameras or else it should be a list of the same length as "camera names" param. Raises: AssertionError: [Invalid number of robots specified] """ def __init__( self, robots, controller_configs=None, gripper_types="default", gripper_visualizations=False, initialization_noise="default", use_latch=True, use_camera_obs=True, use_object_obs=True, reward_scale=1.0, reward_shaping=False, placement_initializer=None, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_camera="frontview", render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, hard_reset=True, camera_names="agentview", camera_heights=256, camera_widths=256, camera_depths=False, ): # First, verify that only one robot is being inputted self._check_robot_configuration(robots) # settings for table top (hardcoded since it's not an essential part of the environment) self.table_full_size = (0.8, 0.3, 0.05) self.table_offset = (-0.2, -0.35, 0.8) # reward configuration self.use_latch = use_latch self.reward_scale = reward_scale self.reward_shaping = reward_shaping # whether to use ground-truth object states self.use_object_obs = use_object_obs # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: self.placement_initializer = UniformRandomSampler( x_range=[0.07, 0.09], y_range=[-0.01, 0.01], ensure_object_boundary_in_range=False, rotation=(-np.pi / 2. - 0.25, -np.pi / 2.), rotation_axis='z', ) super().__init__( robots=robots, controller_configs=controller_configs, gripper_types=gripper_types, gripper_visualizations=gripper_visualizations, initialization_noise=initialization_noise, use_camera_obs=use_camera_obs, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_camera=render_camera, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, hard_reset=hard_reset, camera_names=camera_names, camera_heights=camera_heights, camera_widths=camera_widths, camera_depths=camera_depths, ) def reward(self, action=None): """ Reward function for the task. Sparse un-normalized reward: - a discrete reward of 1.0 is provided if the door is opened Un-normalized summed components if using reward shaping: - Reaching: in [0, 0.25], proportional to the distance between door handle and robot arm - Rotating: in [0, 0.25], proportional to angle rotated by door handled - Note that this component is only relevant if the environment is using the locked door version Note that a successfully completed task (door opened) will return 1.0 irregardless of whether the environment is using sparse or shaped rewards Note that the final reward is normalized and scaled by reward_scale / 1.0 as well so that the max score is equal to reward_scale Args: action (np.array): [NOT USED] Returns: float: reward value """ reward = 0. # sparse completion reward if self._check_success(): reward = 1.0 # else, we consider only the case if we're using shaped rewards elif self.reward_shaping: # Add reaching component dist = np.linalg.norm(self._gripper_to_handle) reaching_reward = 0.25 * (1 - np.tanh(10.0 * dist)) reward += reaching_reward # Add rotating component if we're using a locked door if self.use_latch: handle_qpos = self.sim.data.qpos[self.handle_qpos_addr] reward += np.clip(0.25 * np.abs(handle_qpos / (0.5 * np.pi)), -0.25, 0.25) # Scale reward if requested if self.reward_scale is not None: reward *= self.reward_scale / 1.0 return reward def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Verify the correct robot has been loaded assert isinstance(self.robots[0], SingleArm), \ "Error: Expected one single-armed robot! Got {} type instead.".format(type(self.robots[0])) # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace self.mujoco_arena = TableArena( table_full_size=self.table_full_size, table_offset=self.table_offset, ) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # Arena always gets set to zero origin self.mujoco_arena.set_origin([0, 0, 0]) # initialize objects of interest door = DoorObject( name="Door", friction=0.0, damping=0.1, lock=self.use_latch, joints=[], # ensures that door object does not have a free joint ) self.mujoco_objects = OrderedDict([("Door", door)]) self.n_objects = len(self.mujoco_objects) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=self.mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.mujoco_objects, visual_objects=None, initializer=self.placement_initializer, ) self.model.place_objects() def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() # Additional object references from this env self.object_body_ids = dict() self.object_body_ids["door"] = self.sim.model.body_name2id("door") self.object_body_ids["frame"] = self.sim.model.body_name2id("frame") self.object_body_ids["latch"] = self.sim.model.body_name2id("latch") self.door_handle_site_id = self.sim.model.site_name2id("door_handle") self.hinge_qpos_addr = self.sim.model.get_joint_qpos_addr("door_hinge") if self.use_latch: self.handle_qpos_addr = self.sim.model.get_joint_qpos_addr( "latch_joint") self.l_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.robots[0].gripper.important_geoms["left_finger"] ] self.r_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.robots[0].gripper.important_geoms["right_finger"] ] def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() # Reset all object positions using initializer sampler if we're not directly loading from an xml if not self.deterministic_reset: # Sample from the placement initializer for the Door object door_pos, door_quat = self.model.place_objects() door_body_id = self.sim.model.body_name2id("Door") self.sim.model.body_pos[door_body_id] = door_pos[0] self.sim.model.body_quat[door_body_id] = door_quat[0] def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: `'robot-state'`: contains robot-centric information. `'object-state'`: requires @self.use_object_obs to be True. Contains object-centric information. `'image'`: requires @self.use_camera_obs to be True. Contains a rendered frame from the simulation. `'depth'`: requires @self.use_camera_obs and @self.camera_depth to be True. Contains a rendered depth map from the simulation Returns: OrderedDict: Observations from the environment """ di = super()._get_observation() # low-level object information if self.use_object_obs: # Get robot prefix pr = self.robots[0].robot_model.naming_prefix eef_pos = self._eef_xpos door_pos = np.array( self.sim.data.body_xpos[self.object_body_ids["door"]]) handle_pos = self._handle_xpos hinge_qpos = np.array([self.sim.data.qpos[self.hinge_qpos_addr]]) di["door_pos"] = door_pos di["handle_pos"] = handle_pos di[pr + "door_to_eef_pos"] = door_pos - eef_pos di[pr + "handle_to_eef_pos"] = handle_pos - eef_pos di["hinge_qpos"] = hinge_qpos di['object-state'] = np.concatenate([ di["door_pos"], di["handle_pos"], di[pr + "door_to_eef_pos"], di[pr + "handle_to_eef_pos"], di["hinge_qpos"] ]) # Also append handle qpos if we're using a locked door version with rotatable handle if self.use_latch: handle_qpos = np.array( [self.sim.data.qpos[self.handle_qpos_addr]]) di["handle_qpos"] = handle_qpos di['object-state'] = np.concatenate( [di["object-state"], di["handle_qpos"]]) return di def _check_success(self): """ Check if door has been opened. Returns: bool: True if door has been opened """ hinge_qpos = self.sim.data.qpos[self.hinge_qpos_addr] return hinge_qpos > 0.3 def _visualization(self): """ Do any needed visualization here. Overrides superclass implementations. """ # color the gripper site appropriately based on distance to door handle if self.robots[0].gripper_visualization: # get distance to door handle dist = np.sum( np.square(self._handle_xpos - self.sim.data.get_site_xpos( self.robots[0].gripper.visualization_sites["grip_site"]))) # set RGBA for the EEF site here max_dist = 0.1 scaled = (1.0 - min(dist / max_dist, 1.))**15 rgba = np.zeros(4) rgba[0] = 1 - scaled rgba[1] = scaled rgba[3] = 0.5 self.sim.model.site_rgba[self.robots[0].eef_site_id] = rgba def _check_robot_configuration(self, robots): """ Sanity check to make sure the inputted robots and configuration is acceptable Args: robots (str or list of str): Robots to instantiate within this env """ if type(robots) is list: assert len( robots ) == 1, "Error: Only one robot should be inputted for this task!" @property def _eef_xpos(self): """ Grabs End Effector position Returns: np.array: End effector(x,y,z) """ return np.array(self.sim.data.site_xpos[self.robots[0].eef_site_id]) @property def _handle_xpos(self): """ Grabs the position of the door handle handle. Returns: np.array: Door handle (x,y,z) """ return self.sim.data.site_xpos[self.door_handle_site_id] @property def _gripper_to_handle(self): """ Calculates distance from the gripper to the door handle. Returns: np.array: (x,y,z) distance between handle and eef """ return self._handle_xpos - self._eef_xpos
class FetchPush(RobotEnv): """ This class corresponds to the fetch/push task for a single robot arm. Many similarities with lifting task. Args: robots (str or list of str): Specification for specific robot arm(s) to be instantiated within this env (e.g: "Sawyer" would generate one arm; ["Panda", "Panda", "Sawyer"] would generate three robot arms) Note: Must be a single single-arm robot! controller_configs (str or list of dict): If set, contains relevant controller parameters for creating a custom controller. Else, uses the default controller for this specific task. Should either be single dict if same controller is to be used for all robots or else it should be a list of the same length as "robots" param gripper_types (str or list of str): type of gripper, used to instantiate gripper models from gripper factory. Default is "default", which is the default grippers(s) associated with the robot(s) the 'robots' specification. None removes the gripper, and any other (valid) model overrides the default gripper. Should either be single str if same gripper type is to be used for all robots or else it should be a list of the same length as "robots" param gripper_visualizations (bool or list of bool): True if using gripper visualization. Useful for teleoperation. Should either be single bool if gripper visualization is to be used for all robots or else it should be a list of the same length as "robots" param initialization_noise (dict or list of dict): Dict containing the initialization noise parameters. The expected keys and corresponding value types are specified below: :`'magnitude'`: The scale factor of uni-variate random noise applied to each of a robot's given initial joint positions. Setting this value to `None` or 0.0 results in no noise being applied. If "gaussian" type of noise is applied then this magnitude scales the standard deviation applied, If "uniform" type of noise is applied then this magnitude sets the bounds of the sampling range :`'type'`: Type of noise to apply. Can either specify "gaussian" or "uniform" Should either be single dict if same noise value is to be used for all robots or else it should be a list of the same length as "robots" param :Note: Specifying "default" will automatically use the default noise settings. Specifying None will automatically create the required dict with "magnitude" set to 0.0. table_full_size (3-tuple): x, y, and z dimensions of the table. table_friction (3-tuple): the three mujoco friction parameters for the table. use_camera_obs (bool): if True, every observation includes rendered image(s) use_object_obs (bool): if True, include object (cube) information in the observation. reward_scale (None or float): Scales the normalized reward function by the amount specified. If None, environment reward remains unnormalized reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. use_indicator_object (bool): if True, sets up an indicator object that is useful for debugging. has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering render_camera (str): Name of camera to render if `has_renderer` is True. Setting this value to 'None' will result in the default angle being applied, which is useful as it can be dragged / panned by the user using the mouse render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). hard_reset (bool): If True, re-loads model, sim, and render object upon a reset call, else, only calls sim.reset and resets all robosuite-internal variables camera_names (str or list of str): name of camera to be rendered. Should either be single str if same name is to be used for all cameras' rendering or else it should be a list of cameras to render. :Note: At least one camera must be specified if @use_camera_obs is True. :Note: To render all robots' cameras of a certain type (e.g.: "robotview" or "eye_in_hand"), use the convention "all-{name}" (e.g.: "all-robotview") to automatically render all camera images from each robot's camera list). camera_heights (int or list of int): height of camera frame. Should either be single int if same height is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_widths (int or list of int): width of camera frame. Should either be single int if same width is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_depths (bool or list of bool): True if rendering RGB-D, and RGB otherwise. Should either be single bool if same depth setting is to be used for all cameras or else it should be a list of the same length as "camera names" param. Raises: AssertionError: [Invalid number of robots specified] """ def __init__( self, robots, controller_configs=None, gripper_types="default", gripper_visualizations=False, initialization_noise="default", table_full_size=(0.8, 0.8, 0.05), table_friction=(5e-3, 5e-3, 1e-4), use_camera_obs=True, use_object_obs=True, reward_scale=1.0, reward_shaping=False, distance_threshold=0.06, placement_initializer=None, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_camera="frontview", render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, hard_reset=True, camera_names="agentview", camera_heights=256, camera_widths=256, camera_depths=False, ): # First, verify that only one robot is being inputted self._check_robot_configuration(robots) # settings for table top self.table_full_size = table_full_size self.table_friction = table_friction # reward configuration self.reward_scale = reward_scale self.reward_shaping = reward_shaping self.distance_threshold = distance_threshold # whether to use ground-truth object states self.use_object_obs = use_object_obs # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: self.placement_initializer = UniformRandomSampler( x_range=[-0.22, 0.22], y_range=[-0.22, 0.22], ensure_object_boundary_in_range=False, rotation=None, z_offset=0.002, ) super().__init__( robots=robots, controller_configs=controller_configs, gripper_types=gripper_types, gripper_visualizations=gripper_visualizations, initialization_noise=initialization_noise, use_camera_obs=use_camera_obs, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_camera=render_camera, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, hard_reset=hard_reset, camera_names=camera_names, camera_heights=camera_heights, camera_widths=camera_widths, camera_depths=camera_depths, ) def _distance(self, goal_a, goal_b): assert goal_a.shape == goal_b.shape return np.linalg.norm(goal_a - goal_b) def reward(self, action=None): """ Reward function for the task. Args: action (np array): [NOT USED] Returns: float: reward value """ reward = .0 cube_pos = self.sim.data.body_xpos[self.cube_body_id] goal_pos = self.sim.data.body_xpos[self.goal_cube_body_id] gripper_site_pos = self.sim.data.site_xpos[self.robots[0].eef_site_id] cube_to_goal_dist = self._distance(cube_pos, goal_pos) gripper_to_cube_dist = self._distance(gripper_site_pos, cube_pos) if self.reward_shaping: # Return large reward if cube has been moved to goal if self._check_success(): reward += 2.25 # Reaching reward reward += 1 - np.tanh(10.0 * gripper_to_cube_dist) if self._is_gripper_touching_cube(): reward += 0.5 # Reward for touching cube reward += 1.5 - 1.5 * np.tanh( 10.0 * cube_to_goal_dist) # Reward for pushing cube # Return large penalty if robot has moved significantly away from cube #if self._has_moved_significantly_away_from_cube(): # reward += -2 # Reward for touching cube and pushing toward goal #if self._is_gripper_touching_cube(): # reward += 0.1 + (1 - np.tanh(10.0 * cube_to_goal_dist)) # Give penalty for touching table #if self._is_gripper_touching_table(): # reward -= 0.1 else: reward += -float(not self._check_success()) return reward def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Verify the correct robot has been loaded assert isinstance(self.robots[0], SingleArm), \ "Error: Expected one single-armed robot! Got {} type instead.".format(type(self.robots[0])) # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace self.mujoco_arena = TableArena( table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=(0, 0, 0.8), ) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # Arena always gets set to zero origin self.mujoco_arena.set_origin([0, 0, 0]) # initialize objects of interest tex_attrib = { "type": "cube", } mat_attrib = { "texrepeat": "1 1", "specular": "0.4", "shininess": "0.1", } redwood = CustomMaterial( texture="WoodRed", tex_name="redwood", mat_name="redwood_mat", tex_attrib=tex_attrib, mat_attrib=mat_attrib, ) cube = BoxObject( name="cube", size_min=[0.020, 0.020, 0.020], # [0.015, 0.015, 0.015], size_max=[0.022, 0.022, 0.022], # [0.018, 0.018, 0.018]) rgba=[1, 0, 0, 1], material=redwood, friction=[0.5, 5e-3, 1e-4]) goal_cube = BoxObject( name="goal_cube", size_min=[0.005, 0.005, 0.005], size_max=[0.005, 0.005, 0.005], rgba=[0, 0, 1, 1], density=11342, # Density of lead such that the goal won't be moved ) self.mujoco_objects = OrderedDict([("cube", cube), ("goal_cube", goal_cube)]) self.n_objects = len(self.mujoco_objects) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=self.mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.mujoco_objects, visual_objects=None, initializer=self.placement_initializer, ) self.model.place_objects() def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() # Additional object references from this env self.cube_body_id = self.sim.model.body_name2id("cube") self.goal_cube_body_id = self.sim.model.body_name2id("goal_cube") if self.robots[0].gripper_type == 'UltrasoundProbeGripper': self.probe_geom_id = [ self.sim.model.geom_name2id(x) for x in self.robots[0].gripper.important_geoms["probe"] ] elif self.robots[0].has_gripper: self.l_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.robots[0].gripper.important_geoms["left_finger"] ] self.r_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.robots[0].gripper.important_geoms["right_finger"] ] self.cube_geom_id = self.sim.model.geom_name2id("cube") self.goal_cube_geom_id = self.sim.model.geom_name2id("goal_cube") self.table_geom_id = self.sim.model.geom_name2id("table_collision") def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() # Reset all object positions using initializer sampler if we're not directly loading from an xml if not self.deterministic_reset: # Sample from the placement initializer for all objects obj_pos, obj_quat = self.model.place_objects() # Loop through all objects and reset their positions for i, (obj_name, _) in enumerate(self.mujoco_objects.items()): self.sim.data.set_joint_qpos( obj_name + "_jnt0", np.concatenate( [np.array(obj_pos[i]), np.array(obj_quat[i])])) def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: `'robot-state'`: contains robot-centric information. `'object-state'`: requires @self.use_object_obs to be True. Contains object-centric information. `'image'`: requires @self.use_camera_obs to be True. Contains a rendered frame from the simulation. `'depth'`: requires @self.use_camera_obs and @self.camera_depth to be True. Contains a rendered depth map from the simulation Returns: OrderedDict: Observations from the environment """ di = super()._get_observation() # Get robot prefix pr = self.robots[0].robot_model.naming_prefix if self.robots[0].has_gripper: # Checking if the UltrasoundProbeGripper is used if self.robots[0].gripper.dof == 0: # Remove unused keys (no joints in gripper) di.pop('robot0_gripper_qpos', None) di.pop('robot0_gripper_qvel', None) # low-level object information if self.use_object_obs: # position and rotation of object goal_pos = np.array( self.sim.data.body_xpos[self.goal_cube_body_id]) cube_pos = np.array(self.sim.data.body_xpos[self.cube_body_id]) di["cube_pos"] = cube_pos di["goal_pos"] = goal_pos di[pr + "cube_to_goal"] = cube_pos - goal_pos cube_quat = convert_quat(np.array( self.sim.data.body_xquat[self.cube_body_id]), to="xyzw") gripper_site_pos = np.array( self.sim.data.site_xpos[self.robots[0].eef_site_id]) di[pr + "gripper_to_cube"] = gripper_site_pos - cube_pos # Used for GymWrapper observations (Robot state will also default be added e.g. eef position) di["object-state"] = np.concatenate([ cube_pos, cube_quat, goal_pos, di[pr + "gripper_to_cube"], di[pr + "gripper_to_cube"], ]) return di def _check_success(self): """ Check if cube has been pushed to goal. Returns: bool: True if cube has been pushed to goal """ cube_pos = self.sim.data.body_xpos[self.cube_body_id] goal_pos = self.sim.data.body_xpos[self.goal_cube_body_id] cube_to_goal_dist = self._distance(cube_pos, goal_pos) return cube_to_goal_dist < self.distance_threshold ''' def _has_moved_significantly_away_from_cube(self): """ Check if the robot has moved away from cube between steps. Returns: bool: True if episode is the robot's end-effector has moved far away from cube """ return self.gripper_to_cube_dist > self.initial_gripper_to_cube_dist + 0.12 def _check_terminated(self): """ Check if the task has completed one way or another. The following conditions lead to termination: - Task completion (cube pushed to goal) Should this be added? - Robot moving away from cube Returns: bool: True if episode is terminated """ return self._has_moved_significantly_away_from_cube() ''' def _is_gripper_touching_cube(self): """ Check if the gripper is in contact with the cube Returns: bool: True if contact """ for contact in self.sim.data.contact[:self.sim.data.ncon]: geom_name1 = self.sim.model.geom_id2name(contact.geom1) geom_name2 = self.sim.model.geom_id2name(contact.geom2) if (geom_name1 in self.robots[0].gripper.contact_geoms and geom_name2 == self.sim.model.geom_id2name( self.cube_geom_id) or geom_name2 in self.robots[0].gripper.contact_geoms and geom_name1 == self.sim.model.geom_id2name( self.cube_geom_id)): return True return False def _is_gripper_touching_table(self): """ Check if the gripper is in contact with the tabletop Returns: bool: True if contact """ for contact in self.sim.data.contact[:self.sim.data.ncon]: geom_name1 = self.sim.model.geom_id2name(contact.geom1) geom_name2 = self.sim.model.geom_id2name(contact.geom2) if (geom_name1 in self.robots[0].gripper.contact_geoms and geom_name2 == self.sim.model.geom_id2name( self.table_geom_id) or geom_name2 in self.robots[0].gripper.contact_geoms and geom_name1 == self.sim.model.geom_id2name( self.table_geom_id)): return True return False def _post_action(self, action): """ In addition to super method, add additional info if requested Args: action (np.array): Action to execute within the environment Returns: 3-tuple: - (float) reward from the environment - (bool) whether the current episode is completed or not - (dict) info about current env step """ reward, done, info = super()._post_action(action) #done = done or self._check_terminated() return reward, done, info def _visualization(self): """ Do any needed visualization here. Overrides superclass implementations. """ # color the gripper site appropriately based on distance to cube if self.robots[0].gripper_visualization: # get distance to cube cube_site_id = self.sim.model.site_name2id("cube") dist = np.sum( np.square(self.sim.data.site_xpos[cube_site_id] - self.sim.data.get_site_xpos( self.robots[0].gripper. visualization_sites["grip_site"]))) # set RGBA for the EEF site here max_dist = 0.1 scaled = (1.0 - min(dist / max_dist, 1.))**15 rgba = np.zeros(4) rgba[0] = 1 - scaled rgba[1] = scaled rgba[3] = 0.5 self.sim.model.site_rgba[self.robots[0].eef_site_id] = rgba def _check_robot_configuration(self, robots): """ Sanity check to make sure the inputted robots and configuration is acceptable Args: robots (str or list of str): Robots to instantiate within this env """ if type(robots) is list: assert len( robots ) == 1, "Error: Only one robot should be inputted for this task!"
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace mujoco_arena = TableArena( table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=self.table_offset, ) # Arena always gets set to zero origin mujoco_arena.set_origin([0, 0, 0]) # initialize objects of interest tex_attrib = { "type": "cube", } mat_attrib = { "texrepeat": "1 1", "specular": "0.4", "shininess": "0.1", } redwood = CustomMaterial( texture="WoodRed", tex_name="redwood", mat_name="redwood_mat", tex_attrib=tex_attrib, mat_attrib=mat_attrib, ) greenwood = CustomMaterial( texture="WoodGreen", tex_name="greenwood", mat_name="greenwood_mat", tex_attrib=tex_attrib, mat_attrib=mat_attrib, ) self.cubeA = BoxObject( name="cubeA", size_min=[0.02, 0.02, 0.02], size_max=[0.02, 0.02, 0.02], rgba=[1, 0, 0, 1], material=redwood, ) self.cubeB = BoxObject( name="cubeB", size_min=[0.025, 0.025, 0.025], size_max=[0.025, 0.025, 0.025], rgba=[0, 1, 0, 1], material=greenwood, ) cubes = [self.cubeA, self.cubeB] # Create placement initializer if self.placement_initializer is not None: self.placement_initializer.reset() self.placement_initializer.add_objects(cubes) else: self.placement_initializer = UniformRandomSampler( name="ObjectSampler", mujoco_objects=cubes, x_range=[-0.08, 0.08], y_range=[-0.08, 0.08], rotation=None, ensure_object_boundary_in_range=False, ensure_valid_placement=True, reference_pos=self.table_offset, z_offset=0.01, ) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=cubes, )
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose(s) accordingly if self.env_configuration == "bimanual": xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) else: if self.env_configuration == "single-arm-opposed": # Set up robots facing towards each other by rotating them from their default position for robot, rotation, offset in zip(self.robots, (np.pi / 2, -np.pi / 2), (-0.25, 0.25)): xpos = robot.robot_model.base_xpos_offset["table"]( self.table_full_size[0]) rot = np.array((0, 0, rotation)) xpos = T.euler2mat(rot) @ np.array(xpos) xpos += np.array((0, offset, 0)) robot.robot_model.set_base_xpos(xpos) robot.robot_model.set_base_ori(rot) else: # "single-arm-parallel" configuration setting # Set up robots parallel to each other but offset from the center for robot, offset in zip(self.robots, (-0.6, 0.6)): xpos = robot.robot_model.base_xpos_offset["table"]( self.table_full_size[0]) xpos = np.array(xpos) + np.array((0, offset, 0)) robot.robot_model.set_base_xpos(xpos) # load model for table top workspace mujoco_arena = TableArena(table_full_size=self.table_true_size, table_friction=self.table_friction, table_offset=self.table_offset) # Arena always gets set to zero origin mujoco_arena.set_origin([0, 0, 0]) # Modify default agentview camera mujoco_arena.set_camera(camera_name="agentview", pos=[ 0.8894354364730311, -3.481824231498976e-08, 1.7383813133506494 ], quat=[ 0.6530981063842773, 0.2710406184196472, 0.27104079723358154, 0.6530979871749878 ]) # initialize objects of interest self.hammer = HammerObject(name="hammer") # Create placement initializer if self.placement_initializer is not None: self.placement_initializer.reset() self.placement_initializer.add_objects(self.hammer) else: # Set rotation about y-axis if hammer starts on table else rotate about z if it starts in gripper rotation_axis = 'y' if self.prehensile else 'z' self.placement_initializer = UniformRandomSampler( name="ObjectSampler", mujoco_objects=self.hammer, x_range=[-0.1, 0.1], y_range=[-0.05, 0.05], rotation=None, rotation_axis=rotation_axis, ensure_object_boundary_in_range=False, ensure_valid_placement=True, reference_pos=self.table_offset, ) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.hammer, )
class Wipe(RobotEnv): """ This class corresponds to the Wiping task for a single robot arm Args: robots (str or list of str): Specification for specific robot arm(s) to be instantiated within this env (e.g: "Sawyer" would generate one arm; ["Panda", "Panda", "Sawyer"] would generate three robot arms) Note: Must be a single single-arm robot! controller_configs (str or list of dict): If set, contains relevant controller parameters for creating a custom controller. Else, uses the default controller for this specific task. Should either be single dict if same controller is to be used for all robots or else it should be a list of the same length as "robots" param gripper_types (str or list of str): type of gripper, used to instantiate gripper models from gripper factory. For this environment, setting a value other than the default ("WipingGripper") will raise an AssertionError, as this environment is not meant to be used with any other alternative gripper. gripper_visualizations (bool or list of bool): True if using gripper visualization. Useful for teleoperation. Should either be single bool if gripper visualization is to be used for all robots or else it should be a list of the same length as "robots" param initialization_noise (dict or list of dict): Dict containing the initialization noise parameters. The expected keys and corresponding value types are specified below: :`'magnitude'`: The scale factor of uni-variate random noise applied to each of a robot's given initial joint positions. Setting this value to `None` or 0.0 results in no noise being applied. If "gaussian" type of noise is applied then this magnitude scales the standard deviation applied, If "uniform" type of noise is applied then this magnitude sets the bounds of the sampling range :`'type'`: Type of noise to apply. Can either specify "gaussian" or "uniform" Should either be single dict if same noise value is to be used for all robots or else it should be a list of the same length as "robots" param :Note: Specifying "default" will automatically use the default noise settings. Specifying None will automatically create the required dict with "magnitude" set to 0.0. use_camera_obs (bool): if True, every observation includes rendered image(s) use_object_obs (bool): if True, include object (cube) information in the observation. reward_scale (None or float): Scales the normalized reward function by the amount specified. If None, environment reward remains unnormalized reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. use_indicator_object (bool): if True, sets up an indicator object that is useful for debugging. has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering render_camera (str): Name of camera to render if `has_renderer` is True. Setting this value to 'None' will result in the default angle being applied, which is useful as it can be dragged / panned by the user using the mouse render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). hard_reset (bool): If True, re-loads model, sim, and render object upon a reset call, else, only calls sim.reset and resets all robosuite-internal variables camera_names (str or list of str): name of camera to be rendered. Should either be single str if same name is to be used for all cameras' rendering or else it should be a list of cameras to render. :Note: At least one camera must be specified if @use_camera_obs is True. :Note: To render all robots' cameras of a certain type (e.g.: "robotview" or "eye_in_hand"), use the convention "all-{name}" (e.g.: "all-robotview") to automatically render all camera images from each robot's camera list). camera_heights (int or list of int): height of camera frame. Should either be single int if same height is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_widths (int or list of int): width of camera frame. Should either be single int if same width is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_depths (bool or list of bool): True if rendering RGB-D, and RGB otherwise. Should either be single bool if same depth setting is to be used for all cameras or else it should be a list of the same length as "camera names" param. task_config (None or dict): Specifies the parameters relevant to this task. For a full list of expected parameters, see the default configuration dict at the top of this file. If None is specified, the default configuration will be used. Raises: AssertionError: [Gripper specified] AssertionError: [Bad reward specification] AssertionError: [Invalid number of robots specified] """ def __init__( self, robots, controller_configs=None, gripper_types="WipingGripper", gripper_visualizations=False, initialization_noise="default", use_camera_obs=True, use_object_obs=True, reward_scale=1.0, reward_shaping=True, placement_initializer=None, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_camera="frontview", render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, hard_reset=True, camera_names="agentview", camera_heights=256, camera_widths=256, camera_depths=False, task_config=None, ): # First, verify that only one robot is being inputted self._check_robot_configuration(robots) # Assert that the gripper type is None assert gripper_types == "WipingGripper",\ "Tried to specify gripper other than WipingGripper in Wipe environment!" # Get config self.task_config = task_config if task_config is not None else DEFAULT_WIPE_CONFIG # Set task-specific parameters # settings for the reward self.reward_scale = reward_scale self.reward_shaping = reward_shaping self.arm_limit_collision_penalty = self.task_config[ 'arm_limit_collision_penalty'] self.wipe_contact_reward = self.task_config['wipe_contact_reward'] self.unit_wiped_reward = self.task_config['unit_wiped_reward'] self.ee_accel_penalty = self.task_config['ee_accel_penalty'] self.excess_force_penalty_mul = self.task_config[ 'excess_force_penalty_mul'] self.distance_multiplier = self.task_config['distance_multiplier'] self.distance_th_multiplier = self.task_config[ 'distance_th_multiplier'] # Final reward computation # So that is better to finish that to stay touching the table for 100 steps # The 0.5 comes from continuous_distance_reward at 0. If something changes, this may change as well self.task_complete_reward = 50 * (self.wipe_contact_reward + 0.5) # Verify that the distance multiplier is not greater than the task complete reward assert self.task_complete_reward > self.distance_multiplier,\ "Distance multiplier cannot be greater than task complete reward!" # settings for table top self.table_full_size = self.task_config['table_full_size'] self.table_offset = self.task_config['table_offset'] self.table_friction = self.task_config['table_friction'] self.table_friction_std = self.task_config['table_friction_std'] self.table_height = self.task_config['table_height'] self.table_height_std = self.task_config['table_height_std'] self.line_width = self.task_config['line_width'] self.two_clusters = self.task_config['two_clusters'] self.coverage_factor = self.task_config['coverage_factor'] self.num_sensors = self.task_config['num_sensors'] # settings for thresholds self.contact_threshold = self.task_config['contact_threshold'] self.touch_threshold = self.task_config['touch_threshold'] self.pressure_threshold = self.task_config['touch_threshold'] self.pressure_threshold_max = self.task_config[ 'pressure_threshold_max'] # misc settings self.print_results = self.task_config['print_results'] self.get_info = self.task_config['get_info'] self.use_robot_obs = self.task_config['use_robot_obs'] self.early_terminations = self.task_config['early_terminations'] # Scale reward if desired (see reward method for details) self.reward_normalization_factor = horizon / \ (self.num_sensors * self.unit_wiped_reward + horizon * (self.wipe_contact_reward + self.task_complete_reward)) # ee resets self.ee_force_bias = np.zeros(3) self.ee_torque_bias = np.zeros(3) # set other wipe-specific attributes self.wiped_sensors = [] self.collisions = 0 self.f_excess = 0 self.metadata = [] self.spec = "spec" # whether to include and use ground-truth object states self.use_object_obs = use_object_obs # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: self.placement_initializer = UniformRandomSampler( x_range=[0, 0.2], y_range=[0, 0.2], ensure_object_boundary_in_range=False, rotation=None) super().__init__( robots=robots, controller_configs=controller_configs, gripper_types=gripper_types, gripper_visualizations=gripper_visualizations, initialization_noise=initialization_noise, use_camera_obs=use_camera_obs, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_camera=render_camera, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, hard_reset=hard_reset, camera_names=camera_names, camera_heights=camera_heights, camera_widths=camera_widths, camera_depths=camera_depths, ) def reward(self, action): """ Reward function for the task. Sparse un-normalized reward: - a discrete reward of self.unit_wiped_reward is provided per single dirt (peg) wiped during this step - a discrete reward of self.task_complete_reward is provided if all dirt is wiped Note that if the arm is either colliding or near its joint limit, a reward of 0 will be automatically given Un-normalized summed components if using reward shaping (individual components can be set to 0: - Reaching: in [0, self.distance_multiplier], proportional to distance between wiper and centroid of dirt and zero if the table has been fully wiped clean of all the dirt - Table Contact: in {0, self.wipe_contact_reward}, non-zero if wiper is in contact with table - Wiping: in {0, self.unit_wiped_reward}, non-zero for each dirt (peg) wiped during this step - Cleaned: in {0, self.task_complete_reward}, non-zero if no dirt remains on the table - Collision / Joint Limit Penalty: in {self.arm_limit_collision_penalty, 0}, nonzero if robot arm is colliding with an object - Note that if this value is nonzero, no other reward components can be added - Large Force Penalty: in [-inf, 0], scaled by wiper force and directly proportional to self.excess_force_penalty_mul if the current force exceeds self.pressure_threshold_max - Large Acceleration Penalty: in [-inf, 0], scaled by estimated wiper acceleration and directly proportional to self.ee_accel_penalty Note that the final per-step reward is normalized given the theoretical best episode return and then scaled: reward_scale * (horizon / (num_sensors * unit_wiped_reward + horizon * (wipe_contact_reward + task_complete_reward))) Args: action (np array): [NOT USED] Returns: float: reward value """ reward = 0 total_force_ee = np.linalg.norm( np.array(self.robots[0].recent_ee_forcetorques.current[:3])) # Neg Reward from collisions of the arm with the table if self._check_arm_contact()[0]: if self.reward_shaping: reward = self.arm_limit_collision_penalty self.collisions += 1 elif self._check_q_limits()[0]: if self.reward_shaping: reward = self.arm_limit_collision_penalty self.collisions += 1 else: # If the arm is not colliding or in joint limits, we check if we are wiping # (we don't want to reward wiping if there are unsafe situations) sensors_active_ids = [] # Current 3D location of the corners of the wiping tool in world frame c_geoms = self.robots[0].gripper.important_geoms["corners"] corner1_id = self.sim.model.geom_name2id(c_geoms[0]) corner1_pos = np.array(self.sim.data.geom_xpos[corner1_id]) corner2_id = self.sim.model.geom_name2id(c_geoms[1]) corner2_pos = np.array(self.sim.data.geom_xpos[corner2_id]) corner3_id = self.sim.model.geom_name2id(c_geoms[2]) corner3_pos = np.array(self.sim.data.geom_xpos[corner3_id]) corner4_id = self.sim.model.geom_name2id(c_geoms[3]) corner4_pos = np.array(self.sim.data.geom_xpos[corner4_id]) # Unit vectors on my plane v1 = corner1_pos - corner2_pos v1 /= np.linalg.norm(v1) v2 = corner4_pos - corner2_pos v2 /= np.linalg.norm(v2) # Corners of the tool in the coordinate frame of the plane t1 = np.array([ np.dot(corner1_pos - corner2_pos, v1), np.dot(corner1_pos - corner2_pos, v2) ]) t2 = np.array([ np.dot(corner2_pos - corner2_pos, v1), np.dot(corner2_pos - corner2_pos, v2) ]) t3 = np.array([ np.dot(corner3_pos - corner2_pos, v1), np.dot(corner3_pos - corner2_pos, v2) ]) t4 = np.array([ np.dot(corner4_pos - corner2_pos, v1), np.dot(corner4_pos - corner2_pos, v2) ]) pp = [t1, t2, t4, t3] # Normal of the plane defined by v1 and v2 n = np.cross(v1, v2) n /= np.linalg.norm(n) def isLeft(P0, P1, P2): return ((P1[0] - P0[0]) * (P2[1] - P0[1]) - (P2[0] - P0[0]) * (P1[1] - P0[1])) def PointInRectangle(X, Y, Z, W, P): return (isLeft(X, Y, P) < 0 and isLeft(Y, Z, P) < 0 and isLeft(Z, W, P) < 0 and isLeft(W, X, P) < 0) # Only go into this computation if there are contact points if self.sim.data.ncon != 0: # Check each sensor that is still active for sensor_name in self.model.arena.sensor_names: # Current sensor 3D location in world frame # sensor_pos = np.array( # self.sim.data.body_xpos[self.sim.model.site_bodyid[self.sim.model.site_name2id(self.model.arena.sensor_site_names[sensor_name])]]) sensor_pos = np.array( self.sim.data.site_xpos[self.sim.model.site_name2id( self.model.arena.sensor_site_names[sensor_name])]) # We use the second tool corner as point on the plane and define the vector connecting # the sensor position to that point v = sensor_pos - corner2_pos # Shortest distance between the center of the sensor and the plane dist = np.dot(v, n) # Projection of the center of the sensor onto the plane projected_point = np.array(sensor_pos) - dist * n # Positive distances means the center of the sensor is over the plane # The plane is aligned with the bottom of the wiper and pointing up, so the sensor would be over it if dist > 0.0: # Distance smaller than this threshold means we are close to the plane on the upper part if dist < 0.02: # Write touching points and projected point in coordinates of the plane pp_2 = np.array([ np.dot(projected_point - corner2_pos, v1), np.dot(projected_point - corner2_pos, v2) ]) # Check if sensor is within the tool center: if PointInRectangle(pp[0], pp[1], pp[2], pp[3], pp_2): parts = sensor_name.split('_') sensors_active_ids += [int(parts[1])] # Obtain the list of currently active (wiped) sensors that where not wiped before # These are the sensors we are wiping at this step lall = np.where( np.isin(sensors_active_ids, self.wiped_sensors, invert=True)) new_sensors_active_ids = np.array(sensors_active_ids)[lall] # Loop through all new sensors we are wiping at this step for new_sensor_active_id in new_sensors_active_ids: # Grab relevant sensor id info sensor_name = self.model.arena.sensor_site_names[ 'contact_' + str(new_sensor_active_id) + '_sensor'] new_sensor_active_geom_id = self.sim.model.geom_name2id( sensor_name) # Make this sensor transparent since we wiped it (alpha = 0) self.sim.model.geom_rgba[new_sensor_active_geom_id] = [ 0, 0, 0, 0 ] # Add this sensor the wiped list self.wiped_sensors += [new_sensor_active_id] # Add reward if we're using the dense reward if self.reward_shaping: reward += self.unit_wiped_reward # Additional reward components if using dense rewards if self.reward_shaping: # If we haven't wiped all the sensors yet, add a smooth reward for getting closer # to the centroid of the dirt to wipe if len(self.wiped_sensors) < len( self.model.arena.sensor_names): mean_distance_to_things_to_wipe = 0 num_non_wiped_sensors = 0 for sensor_name in self.model.arena.sensor_names: parts = sensor_name.split('_') sensor_id = int(parts[1]) if sensor_id not in self.wiped_sensors: sensor_pos = np.array(self.sim.data.site_xpos[ self.sim.model.site_name2id( self.model.arena. sensor_site_names[sensor_name])]) gripper_position = np.array( self.sim.data.site_xpos[ self.robots[0].eef_site_id]) mean_distance_to_things_to_wipe += np.linalg.norm( gripper_position - sensor_pos) num_non_wiped_sensors += 1 mean_distance_to_things_to_wipe /= max( 1, num_non_wiped_sensors) reward += self.distance_multiplier * ( 1 - np.tanh(self.distance_th_multiplier * mean_distance_to_things_to_wipe)) # Reward for keeping contact if self.sim.data.ncon != 0 and self._has_gripper_contact: reward += self.wipe_contact_reward # Penalty for excessive force with the end-effector if total_force_ee > self.pressure_threshold_max: reward -= self.excess_force_penalty_mul * total_force_ee self.f_excess += 1 # Penalize large accelerations reward -= self.ee_accel_penalty * np.mean( abs(self.robots[0].recent_ee_acc.current)) # Final reward if all wiped if len(self.wiped_sensors) == len(self.model.arena.sensor_names): reward += self.task_complete_reward # Printing results if self.print_results: string_to_print = 'Process {pid}, timestep {ts:>4}: reward: {rw:8.4f} wiped sensors: {ws:>3} collisions: {sc:>3} f-excess: {fe:>3}'.format( pid=id(multiprocessing.current_process()), ts=self.timestep, rw=reward, ws=len(self.wiped_sensors), sc=self.collisions, fe=self.f_excess) print(string_to_print) # If we're scaling our reward, we normalize the per-step rewards given the theoretical best episode return # This is equivalent to scaling the reward by: # reward_scale * (horizon / # (num_sensors * unit_wiped_reward + horizon * (wipe_contact_reward + task_complete_reward))) if self.reward_scale: reward *= self.reward_scale * self.reward_normalization_factor return reward def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Verify the correct robot has been loaded assert isinstance(self.robots[0], SingleArm), \ "Error: Expected one single-armed robot! Got {} type instead.".format(type(self.robots[0])) # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # Get robot's contact geoms self.robot_contact_geoms = self.robots[0].robot_model.contact_geoms # Delta goes down delta_height = min( 0, np.random.normal(self.table_height, self.table_height_std)) self.mujoco_arena = WipeArena( table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=np.array(self.table_offset) + np.array( (0, 0, delta_height)), table_friction_std=self.table_friction_std, coverage_factor=self.coverage_factor, num_sensors=self.num_sensors, line_width=self.line_width, two_clusters=self.two_clusters) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # Arena always gets set to zero origin self.mujoco_arena.set_origin([0, 0, 0]) self.mujoco_objects = OrderedDict() # task includes arena, robot, and objects of interest self.model = ManipulationTask( self.mujoco_arena, [robot.robot_model for robot in self.robots], self.mujoco_objects, initializer=self.placement_initializer) self.model.place_objects() def _reset_internal(self): super()._reset_internal() # inherited class should reset positions of objects (only if we're not using a deterministic reset) if not self.deterministic_reset: self.model.place_objects() self.mujoco_arena.reset_arena(self.sim) # Reset all internal vars for this wipe task self.timestep = 0 self.wiped_sensors = [] self.collisions = 0 self.f_excess = 0 # ee resets - bias at initial state self.ee_force_bias = self.robots[0].ee_force self.ee_torque_bias = self.robots[0].ee_torque def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: `'robot-state'`: contains robot-centric information. `'object-state'`: requires @self.use_object_obs to be True. Contains object-centric information. `'image'`: requires @self.use_camera_obs to be True. Contains a rendered frame from the simulation. `'depth'`: requires @self.use_camera_obs and @self.camera_depth to be True. Contains a rendered depth map from the simulation Returns: OrderedDict: Observations from the environment """ di = super()._get_observation() # Get prefix from robot model to avoid naming clashes for multiple robots pf = self.robots[0].robot_model.naming_prefix # Add binary contact observation di[pf + "contact-obs"] = self._has_gripper_contact di[pf + "robot-state"] = np.concatenate( (di[pf + "robot-state"], [di[pf + "contact-obs"]])) # object information in the observation if self.use_object_obs: gripper_site_pos = np.array( self.sim.data.site_xpos[self.robots[0].eef_site_id]) # position of objects to wipe acc = np.array([]) for sensor_name in self.model.arena.sensor_names: parts = sensor_name.split('_') sensor_id = int(parts[1]) sensor_pos = np.array( self.sim.data.site_xpos[self.sim.model.site_name2id( self.model.arena.sensor_site_names[sensor_name])]) di['sensor' + str(sensor_id) + '_pos'] = sensor_pos acc = np.concatenate( [acc, di['sensor' + str(sensor_id) + '_pos']]) di['sensor' + str(sensor_id) + '_wiped'] = [0, 1][sensor_id in self.wiped_sensors] acc = np.concatenate( [acc, [di['sensor' + str(sensor_id) + '_wiped']]]) # proprioception if self.use_robot_obs: di['gripper_to_sensor' + str(sensor_id)] = gripper_site_pos - sensor_pos acc = np.concatenate( [acc, di['gripper_to_sensor' + str(sensor_id)]]) di['object-state'] = acc return di def _check_success(self): """ Checks if Task succeeds (all dirt wiped). Returns: bool: True if completed task """ return True if len(self.wiped_sensors) == len( self.model.arena.sensor_names) else False def _check_terminated(self): """ Check if the task has completed one way or another. The following conditions lead to termination: - Collision - Task completion (wiping succeeded) - Joint Limit reached Returns: bool: True if episode is terminated """ terminated = False # Prematurely terminate if contacting the table with the arm if self._check_arm_contact()[0]: if self.print_results: print(40 * '-' + " COLLIDED " + 40 * '-') terminated = True # Prematurely terminate if task is success if self._check_success(): if self.print_results: print(40 * '+' + " FINISHED WIPING " + 40 * '+') terminated = True # Prematurely terminate if contacting the table with the arm if self._check_q_limits()[0]: if self.print_results: print(40 * '-' + " JOINT LIMIT " + 40 * '-') terminated = True return terminated def _post_action(self, action): """ In addition to super method, add additional info if requested Args: action (np.array): Action to execute within the environment Returns: 3-tuple: - (float) reward from the environment - (bool) whether the current episode is completed or not - (dict) info about current env step """ reward, done, info = super()._post_action(action) if self.get_info: info['add_vals'] = [ 'nwipedsensors', 'colls', 'percent_viapoints_', 'f_excess' ] info['nwipedsensors'] = len(self.wiped_sensors) info['colls'] = self.collisions info['percent_viapoints_'] = len(self.wiped_sensors) / len( self.model.arena.sensor_names) info['f_excess'] = self.f_excess # allow episode to finish early if allowed if self.early_terminations: done = done or self._check_terminated() return reward, done, info def _check_robot_configuration(self, robots): """ Sanity check to make sure the inputted robots and configuration is acceptable Args: robots (str or list of str): Robots to instantiate within this env """ if type(robots) is list: assert len( robots ) == 1, "Error: Only one robot should be inputted for this task!" @property def _has_gripper_contact(self): """ Determines whether the gripper is making contact with an object, as defined by the eef force surprassing a certain threshold defined by self.contact_threshold Returns: bool: True if contact is surpasses given threshold magnitude """ return np.linalg.norm(self.robots[0].ee_force - self.ee_force_bias) > self.contact_threshold
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace mujoco_arena = PegsArena( table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=self.table_offset, ) # Arena always gets set to zero origin mujoco_arena.set_origin([0, 0, 0]) # define nuts self.nuts = [] nut_names = ("SquareNut", "RoundNut") # Create default (SequentialCompositeSampler) sampler if it has not already been specified if self.placement_initializer is None: self.placement_initializer = SequentialCompositeSampler( name="ObjectSampler") for nut_name, default_y_range in zip( nut_names, ([0.11, 0.225], [-0.225, -0.11])): self.placement_initializer.append_sampler( sampler=UniformRandomSampler( name=f"{nut_name}Sampler", x_range=[-0.115, -0.11], y_range=default_y_range, rotation=None, rotation_axis="z", ensure_object_boundary_in_range=False, ensure_valid_placement=True, reference_pos=self.table_offset, z_offset=0.02, )) # Reset sampler before adding any new samplers / objects self.placement_initializer.reset() for i, (nut_cls, nut_name) in enumerate( zip( (SquareNutObject, RoundNutObject), nut_names, )): nut = nut_cls(name=nut_name) self.nuts.append(nut) # Add this nut to the placement initializer if isinstance(self.placement_initializer, SequentialCompositeSampler): # assumes we have two samplers so we add nuts to them self.placement_initializer.add_objects_to_sampler( sampler_name=f"{nut_name}Sampler", mujoco_objects=nut) else: # This is assumed to be a flat sampler, so we just add all nuts to this sampler self.placement_initializer.add_objects(nut) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.nuts, )
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Verify the correct robot has been loaded assert isinstance(self.robots[0], SingleArm), \ "Error: Expected one single-armed robot! Got {} type instead.".format(type(self.robots[0])) # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace self.mujoco_arena = TableArena( table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=(0, 0, 0.8), ) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # Arena always gets set to zero origin self.mujoco_arena.set_origin([0, 0, 0]) # initialize objects of interest tex_attrib = { "type": "cube", } mat_attrib = { "texrepeat": "1 1", "specular": "0.4", "shininess": "0.1", } redwood = CustomMaterial( texture="WoodRed", tex_name="redwood", mat_name="redwood_mat", tex_attrib=tex_attrib, mat_attrib=mat_attrib, ) greenwood = CustomMaterial( texture="WoodGreen", tex_name="greenwood", mat_name="greenwood_mat", tex_attrib=tex_attrib, mat_attrib=mat_attrib, ) cubeA = BoxObject( name="cubeA", size_min=[0.02, 0.02, 0.02], size_max=[0.02, 0.02, 0.02], rgba=[1, 0, 0, 1], material=redwood, ) cubeB = BoxObject( name="cubeB", size_min=[0.025, 0.025, 0.025], size_max=[0.025, 0.025, 0.025], rgba=[0, 1, 0, 1], material=greenwood, ) self.mujoco_objects = OrderedDict([("cubeA", cubeA), ("cubeB", cubeB)]) # task includes arena, robot, and objects of interest self.model = ManipulationTask( self.mujoco_arena, [robot.robot_model for robot in self.robots], self.mujoco_objects, initializer=self.placement_initializer, ) self.model.place_objects()
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Verify the correct robot has been loaded assert isinstance(self.robots[0], SingleArm), \ "Error: Expected one single-armed robot! Got {} type instead.".format(type(self.robots[0])) # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["bins"] self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace self.mujoco_arena = BinsArena(bin1_pos=self.bin1_pos, table_full_size=self.table_full_size, table_friction=self.table_friction) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # Arena always gets set to zero origin self.mujoco_arena.set_origin([0, 0, 0]) # store some arena attributes self.bin_size = self.mujoco_arena.table_full_size # define mujoco objects self.ob_inits = [MilkObject, BreadObject, CerealObject, CanObject] self.vis_inits = [ MilkVisualObject, BreadVisualObject, CerealVisualObject, CanVisualObject, ] self.item_names = ["Milk", "Bread", "Cereal", "Can"] self.item_names_org = list(self.item_names) self.obj_to_use = (self.item_names[0] + "{}").format(0) lst = [] for j in range(len(self.vis_inits)): visual_ob_name = ("Visual" + self.item_names[j] + "0") visual_ob = self.vis_inits[j]( name=visual_ob_name, joints=[], # no free joint for visual objects ) lst.append((visual_ob_name, visual_ob)) self.visual_objects = OrderedDict(lst) lst = [] for i in range(len(self.ob_inits)): ob_name = (self.item_names[i] + "0") ob = self.ob_inits[i]( name=ob_name, joints=[dict(type="free", damping="0.0005") ], # damp the free joint for each object ) lst.append((ob_name, ob)) self.mujoco_objects = OrderedDict(lst) self.n_objects = len(self.mujoco_objects) # task includes arena, robot, and objects of interest self._get_placement_initializer() self.model = ManipulationTask( mujoco_arena=self.mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.mujoco_objects, visual_objects=self.visual_objects, initializer=self.placement_initializer, ) # set positions of objects self.model.place_objects()
class Stack(RobotEnv): """ This class corresponds to the stacking task for a single robot arm. Args: robots (str or list of str): Specification for specific robot arm(s) to be instantiated within this env (e.g: "Sawyer" would generate one arm; ["Panda", "Panda", "Sawyer"] would generate three robot arms) Note: Must be a single single-arm robot! controller_configs (str or list of dict): If set, contains relevant controller parameters for creating a custom controller. Else, uses the default controller for this specific task. Should either be single dict if same controller is to be used for all robots or else it should be a list of the same length as "robots" param gripper_types (str or list of str): type of gripper, used to instantiate gripper models from gripper factory. Default is "default", which is the default grippers(s) associated with the robot(s) the 'robots' specification. None removes the gripper, and any other (valid) model overrides the default gripper. Should either be single str if same gripper type is to be used for all robots or else it should be a list of the same length as "robots" param gripper_visualizations (bool or list of bool): True if using gripper visualization. Useful for teleoperation. Should either be single bool if gripper visualization is to be used for all robots or else it should be a list of the same length as "robots" param initialization_noise (dict or list of dict): Dict containing the initialization noise parameters. The expected keys and corresponding value types are specified below: :`'magnitude'`: The scale factor of uni-variate random noise applied to each of a robot's given initial joint positions. Setting this value to `None` or 0.0 results in no noise being applied. If "gaussian" type of noise is applied then this magnitude scales the standard deviation applied, If "uniform" type of noise is applied then this magnitude sets the bounds of the sampling range :`'type'`: Type of noise to apply. Can either specify "gaussian" or "uniform" Should either be single dict if same noise value is to be used for all robots or else it should be a list of the same length as "robots" param :Note: Specifying "default" will automatically use the default noise settings. Specifying None will automatically create the required dict with "magnitude" set to 0.0. table_full_size (3-tuple): x, y, and z dimensions of the table. table_friction (3-tuple): the three mujoco friction parameters for the table. use_camera_obs (bool): if True, every observation includes rendered image(s) use_object_obs (bool): if True, include object (cube) information in the observation. reward_scale (None or float): Scales the normalized reward function by the amount specified. If None, environment reward remains unnormalized reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. use_indicator_object (bool): if True, sets up an indicator object that is useful for debugging. has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering render_camera (str): Name of camera to render if `has_renderer` is True. Setting this value to 'None' will result in the default angle being applied, which is useful as it can be dragged / panned by the user using the mouse render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). hard_reset (bool): If True, re-loads model, sim, and render object upon a reset call, else, only calls sim.reset and resets all robosuite-internal variables camera_names (str or list of str): name of camera to be rendered. Should either be single str if same name is to be used for all cameras' rendering or else it should be a list of cameras to render. :Note: At least one camera must be specified if @use_camera_obs is True. :Note: To render all robots' cameras of a certain type (e.g.: "robotview" or "eye_in_hand"), use the convention "all-{name}" (e.g.: "all-robotview") to automatically render all camera images from each robot's camera list). camera_heights (int or list of int): height of camera frame. Should either be single int if same height is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_widths (int or list of int): width of camera frame. Should either be single int if same width is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_depths (bool or list of bool): True if rendering RGB-D, and RGB otherwise. Should either be single bool if same depth setting is to be used for all cameras or else it should be a list of the same length as "camera names" param. Raises: AssertionError: [Invalid number of robots specified] """ def __init__( self, robots, controller_configs=None, gripper_types="default", gripper_visualizations=False, initialization_noise="default", table_full_size=(0.8, 0.8, 0.05), table_friction=(1., 5e-3, 1e-4), use_camera_obs=True, use_object_obs=True, reward_scale=1.0, reward_shaping=False, placement_initializer=None, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_camera="frontview", render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, hard_reset=True, camera_names="agentview", camera_heights=256, camera_widths=256, camera_depths=False, ): """ """ # First, verify that only one robot is being inputted self._check_robot_configuration(robots) # settings for table top self.table_full_size = table_full_size self.table_friction = table_friction # reward configuration self.reward_scale = reward_scale self.reward_shaping = reward_shaping # whether to use ground-truth object states self.use_object_obs = use_object_obs # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: self.placement_initializer = UniformRandomSampler( x_range=[-0.08, 0.08], y_range=[-0.08, 0.08], ensure_object_boundary_in_range=False, rotation=None, ) super().__init__( robots=robots, controller_configs=controller_configs, gripper_types=gripper_types, gripper_visualizations=gripper_visualizations, initialization_noise=initialization_noise, use_camera_obs=use_camera_obs, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_camera=render_camera, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, hard_reset=hard_reset, camera_names=camera_names, camera_heights=camera_heights, camera_widths=camera_widths, camera_depths=camera_depths, ) def reward(self, action): """ Reward function for the task. Sparse un-normalized reward: - a discrete reward of 2.0 is provided if the red block is stacked on the green block Un-normalized components if using reward shaping: - Reaching: in [0, 0.25], to encourage the arm to reach the cube - Grasping: in {0, 0.25}, non-zero if arm is grasping the cube - Lifting: in {0, 1}, non-zero if arm has lifted the cube - Aligning: in [0, 0.5], encourages aligning one cube over the other - Stacking: in {0, 2}, non-zero if cube is stacked on other cube The reward is max over the following: - Reaching + Grasping - Lifting + Aligning - Stacking The sparse reward only consists of the stacking component. Note that the final reward is normalized and scaled by reward_scale / 2.0 as well so that the max score is equal to reward_scale Args: action (np array): [NOT USED] Returns: float: reward value """ r_reach, r_lift, r_stack = self.staged_rewards() if self.reward_shaping: reward = max(r_reach, r_lift, r_stack) else: reward = 2.0 if r_stack > 0 else 0.0 if self.reward_scale is not None: reward *= self.reward_scale / 2.0 return reward def staged_rewards(self): """ Helper function to calculate staged rewards based on current physical states. Returns: 3-tuple: - (float): reward for reaching and grasping - (float): reward for lifting and aligning - (float): reward for stacking """ # reaching is successful when the gripper site is close to # the center of the cube cubeA_pos = self.sim.data.body_xpos[self.cubeA_body_id] cubeB_pos = self.sim.data.body_xpos[self.cubeB_body_id] gripper_site_pos = self.sim.data.site_xpos[self.robots[0].eef_site_id] dist = np.linalg.norm(gripper_site_pos - cubeA_pos) r_reach = (1 - np.tanh(10.0 * dist)) * 0.25 # collision checking touch_left_finger = False touch_right_finger = False touch_cubeA_cubeB = False for i in range(self.sim.data.ncon): c = self.sim.data.contact[i] if c.geom1 in self.l_finger_geom_ids and c.geom2 == self.cubeA_geom_id: touch_left_finger = True if c.geom1 == self.cubeA_geom_id and c.geom2 in self.l_finger_geom_ids: touch_left_finger = True if c.geom1 in self.r_finger_geom_ids and c.geom2 == self.cubeA_geom_id: touch_right_finger = True if c.geom1 == self.cubeA_geom_id and c.geom2 in self.r_finger_geom_ids: touch_right_finger = True if c.geom1 == self.cubeA_geom_id and c.geom2 == self.cubeB_geom_id: touch_cubeA_cubeB = True if c.geom1 == self.cubeB_geom_id and c.geom2 == self.cubeA_geom_id: touch_cubeA_cubeB = True # additional grasping reward if touch_left_finger and touch_right_finger: r_reach += 0.25 # lifting is successful when the cube is above the table top # by a margin cubeA_height = cubeA_pos[2] table_height = self.mujoco_arena.table_offset[2] cubeA_lifted = cubeA_height > table_height + 0.04 r_lift = 1.0 if cubeA_lifted else 0.0 # Aligning is successful when cubeA is right above cubeB if cubeA_lifted: horiz_dist = np.linalg.norm( np.array(cubeA_pos[:2]) - np.array(cubeB_pos[:2])) r_lift += 0.5 * (1 - np.tanh(horiz_dist)) # stacking is successful when the block is lifted and # the gripper is not holding the object r_stack = 0 not_touching = not touch_left_finger and not touch_right_finger if not_touching and r_lift > 0 and touch_cubeA_cubeB: r_stack = 2.0 return r_reach, r_lift, r_stack def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Verify the correct robot has been loaded assert isinstance(self.robots[0], SingleArm), \ "Error: Expected one single-armed robot! Got {} type instead.".format(type(self.robots[0])) # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace self.mujoco_arena = TableArena( table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=(0, 0, 0.8), ) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # Arena always gets set to zero origin self.mujoco_arena.set_origin([0, 0, 0]) # initialize objects of interest tex_attrib = { "type": "cube", } mat_attrib = { "texrepeat": "1 1", "specular": "0.4", "shininess": "0.1", } redwood = CustomMaterial( texture="WoodRed", tex_name="redwood", mat_name="redwood_mat", tex_attrib=tex_attrib, mat_attrib=mat_attrib, ) greenwood = CustomMaterial( texture="WoodGreen", tex_name="greenwood", mat_name="greenwood_mat", tex_attrib=tex_attrib, mat_attrib=mat_attrib, ) cubeA = BoxObject( name="cubeA", size_min=[0.02, 0.02, 0.02], size_max=[0.02, 0.02, 0.02], rgba=[1, 0, 0, 1], material=redwood, ) cubeB = BoxObject( name="cubeB", size_min=[0.025, 0.025, 0.025], size_max=[0.025, 0.025, 0.025], rgba=[0, 1, 0, 1], material=greenwood, ) self.mujoco_objects = OrderedDict([("cubeA", cubeA), ("cubeB", cubeB)]) # task includes arena, robot, and objects of interest self.model = ManipulationTask( self.mujoco_arena, [robot.robot_model for robot in self.robots], self.mujoco_objects, initializer=self.placement_initializer, ) self.model.place_objects() def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() # Additional object references from this env self.cubeA_body_id = self.sim.model.body_name2id("cubeA") self.cubeB_body_id = self.sim.model.body_name2id("cubeB") # information of objects self.object_names = list(self.mujoco_objects.keys()) self.object_site_ids = [ self.sim.model.site_name2id(ob_name) for ob_name in self.object_names ] # id of grippers for contact checking self.l_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.robots[0].gripper.important_geoms["left_finger"] ] self.r_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.robots[0].gripper.important_geoms["right_finger"] ] self.cubeA_geom_id = self.sim.model.geom_name2id("cubeA") self.cubeB_geom_id = self.sim.model.geom_name2id("cubeB") def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() # Reset all object positions using initializer sampler if we're not directly loading from an xml if not self.deterministic_reset: # Sample from the placement initializer for all objects obj_pos, obj_quat = self.model.place_objects() # Loop through all objects and reset their positions for i, (obj_name, _) in enumerate(self.mujoco_objects.items()): self.sim.data.set_joint_qpos( obj_name + "_jnt0", np.concatenate( [np.array(obj_pos[i]), np.array(obj_quat[i])])) def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: `'robot-state'`: contains robot-centric information. `'object-state'`: requires @self.use_object_obs to be True. Contains object-centric information. `'image'`: requires @self.use_camera_obs to be True. Contains a rendered frame from the simulation. `'depth'`: requires @self.use_camera_obs and @self.camera_depth to be True. Contains a rendered depth map from the simulation Returns: OrderedDict: Observations from the environment """ di = super()._get_observation() # low-level object information if self.use_object_obs: # Get robot prefix pr = self.robots[0].robot_model.naming_prefix # position and rotation of the first cube cubeA_pos = np.array(self.sim.data.body_xpos[self.cubeA_body_id]) cubeA_quat = convert_quat(np.array( self.sim.data.body_xquat[self.cubeA_body_id]), to="xyzw") di["cubeA_pos"] = cubeA_pos di["cubeA_quat"] = cubeA_quat # position and rotation of the second cube cubeB_pos = np.array(self.sim.data.body_xpos[self.cubeB_body_id]) cubeB_quat = convert_quat(np.array( self.sim.data.body_xquat[self.cubeB_body_id]), to="xyzw") di["cubeB_pos"] = cubeB_pos di["cubeB_quat"] = cubeB_quat # relative positions between gripper and cubes gripper_site_pos = np.array( self.sim.data.site_xpos[self.robots[0].eef_site_id]) di[pr + "gripper_to_cubeA"] = gripper_site_pos - cubeA_pos di[pr + "gripper_to_cubeB"] = gripper_site_pos - cubeB_pos di["cubeA_to_cubeB"] = cubeA_pos - cubeB_pos di["object-state"] = np.concatenate([ cubeA_pos, cubeA_quat, cubeB_pos, cubeB_quat, di[pr + "gripper_to_cubeA"], di[pr + "gripper_to_cubeB"], di["cubeA_to_cubeB"], ]) return di def _check_success(self): """ Check if blocks are stacked correctly. Returns: bool: True if blocks are correctly stacked """ _, _, r_stack = self.staged_rewards() return r_stack > 0 def _visualization(self): """ Do any needed visualization here. Overrides superclass implementations. """ # color the gripper site appropriately based on distance to cube if self.robots[0].gripper_visualization: # find closest object square_dist = lambda x: np.sum( np.square(x - self.sim.data.get_site_xpos(self.robots[ 0].gripper.visualization_sites["grip_site"]))) dists = np.array(list(map(square_dist, self.sim.data.site_xpos))) dists[ self.robots[0]. eef_site_id] = np.inf # make sure we don't pick the same site dists[self.robots[0].eef_cylinder_id] = np.inf ob_dists = dists[ self.object_site_ids] # filter out object sites we care about min_dist = np.min(ob_dists) # set RGBA for the EEF site here max_dist = 0.1 scaled = (1.0 - min(min_dist / max_dist, 1.))**15 rgba = np.zeros(4) rgba[0] = 1 - scaled rgba[1] = scaled rgba[3] = 0.5 self.sim.model.site_rgba[self.robots[0].eef_site_id] = rgba def _check_robot_configuration(self, robots): """ Sanity check to make sure the inputted robots and configuration is acceptable Args: robots (str or list of str): Robots to instantiate within this env """ if type(robots) is list: assert len( robots ) == 1, "Error: Only one robot should be inputted for this task!"
class TwoArmHandover(RobotEnv): """ This class corresponds to the handover task for two robot arms. Args: robots (str or list of str): Specification for specific robot arm(s) to be instantiated within this env (e.g: "Sawyer" would generate one arm; ["Panda", "Panda", "Sawyer"] would generate three robot arms) Note: Must be either 2 single single-arm robots or 1 bimanual robot! env_configuration (str): Specifies how to position the robots within the environment. Can be either: :`'bimanual'`: Only applicable for bimanual robot setups. Sets up the (single) bimanual robot on the -x side of the table :`'single-arm-parallel'`: Only applicable for multi single arm setups. Sets up the (two) single armed robots next to each other on the -x side of the table :`'single-arm-opposed'`: Only applicable for multi single arm setups. Sets up the (two) single armed robots opposed from each others on the opposite +/-y sides of the table (Default option) controller_configs (str or list of dict): If set, contains relevant controller parameters for creating a custom controller. Else, uses the default controller for this specific task. Should either be single dict if same controller is to be used for all robots or else it should be a list of the same length as "robots" param gripper_types (str or list of str): type of gripper, used to instantiate gripper models from gripper factory. Default is "default", which is the default grippers(s) associated with the robot(s) the 'robots' specification. None removes the gripper, and any other (valid) model overrides the default gripper. Should either be single str if same gripper type is to be used for all robots or else it should be a list of the same length as "robots" param gripper_visualizations (bool or list of bool): True if using gripper visualization. Useful for teleoperation. Should either be single bool if gripper visualization is to be used for all robots or else it should be a list of the same length as "robots" param initialization_noise (dict or list of dict): Dict containing the initialization noise parameters. The expected keys and corresponding value types are specified below: :`'magnitude'`: The scale factor of uni-variate random noise applied to each of a robot's given initial joint positions. Setting this value to `None` or 0.0 results in no noise being applied. If "gaussian" type of noise is applied then this magnitude scales the standard deviation applied, If "uniform" type of noise is applied then this magnitude sets the bounds of the sampling range :`'type'`: Type of noise to apply. Can either specify "gaussian" or "uniform" Should either be single dict if same noise value is to be used for all robots or else it should be a list of the same length as "robots" param :Note: Specifying "default" will automatically use the default noise settings. Specifying None will automatically create the required dict with "magnitude" set to 0.0. prehensile (bool): If true, handover object starts on the table. Else, the object starts in Arm0's gripper table_full_size (3-tuple): x, y, and z dimensions of the table. table_friction (3-tuple): the three mujoco friction parameters for the table. use_camera_obs (bool): if True, every observation includes rendered image(s) use_object_obs (bool): if True, include object (cube) information in the observation. reward_scale (None or float): Scales the normalized reward function by the amount specified. If None, environment reward remains unnormalized reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. use_indicator_object (bool): if True, sets up an indicator object that is useful for debugging. has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering render_camera (str): Name of camera to render if `has_renderer` is True. Setting this value to 'None' will result in the default angle being applied, which is useful as it can be dragged / panned by the user using the mouse render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). hard_reset (bool): If True, re-loads model, sim, and render object upon a reset call, else, only calls sim.reset and resets all robosuite-internal variables camera_names (str or list of str): name of camera to be rendered. Should either be single str if same name is to be used for all cameras' rendering or else it should be a list of cameras to render. :Note: At least one camera must be specified if @use_camera_obs is True. :Note: To render all robots' cameras of a certain type (e.g.: "robotview" or "eye_in_hand"), use the convention "all-{name}" (e.g.: "all-robotview") to automatically render all camera images from each robot's camera list). camera_heights (int or list of int): height of camera frame. Should either be single int if same height is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_widths (int or list of int): width of camera frame. Should either be single int if same width is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_depths (bool or list of bool): True if rendering RGB-D, and RGB otherwise. Should either be single bool if same depth setting is to be used for all cameras or else it should be a list of the same length as "camera names" param. Raises: ValueError: [Invalid number of robots specified] ValueError: [Invalid env configuration] ValueError: [Invalid robots for specified env configuration] """ def __init__( self, robots, env_configuration="single-arm-opposed", controller_configs=None, gripper_types="default", gripper_visualizations=False, initialization_noise="default", prehensile=True, table_full_size=(0.8, 1.2, 0.05), table_friction=(1., 5e-3, 1e-4), use_camera_obs=True, use_object_obs=True, reward_scale=1.0, reward_shaping=False, placement_initializer=None, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_camera="frontview", render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, hard_reset=True, camera_names="agentview", camera_heights=256, camera_widths=256, camera_depths=False, ): # First, verify that correct number of robots are being inputted self.env_configuration = env_configuration self._check_robot_configuration(robots) # Task settings self.prehensile = prehensile # settings for table top self.table_full_size = table_full_size self.table_true_size = list(table_full_size) self.table_true_size[ 1] *= 0.25 # true size will only be partially wide self.table_friction = table_friction self.table_offset = [0, self.table_full_size[1] * (-3 / 8), 0.8] # reward configuration self.reward_scale = reward_scale self.reward_shaping = reward_shaping self.height_threshold = 0.1 # threshold above the table surface which the hammer is considered lifted # whether to use ground-truth object states self.use_object_obs = use_object_obs # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: # Set rotation about y-axis if hammer starts on table else rotate about z if it starts in gripper rotation_axis = 'y' if self.prehensile else 'z' self.placement_initializer = UniformRandomSampler( x_range=[-0.1, 0.1], y_range=[-0.05, 0.05], ensure_object_boundary_in_range=False, rotation=None, rotation_axis=rotation_axis, ) super().__init__( robots=robots, controller_configs=controller_configs, gripper_types=gripper_types, gripper_visualizations=gripper_visualizations, initialization_noise=initialization_noise, use_camera_obs=use_camera_obs, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_camera=render_camera, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, hard_reset=hard_reset, camera_names=camera_names, camera_heights=camera_heights, camera_widths=camera_widths, camera_depths=camera_depths, ) def reward(self, action=None): """ Reward function for the task. Sparse un-normalized reward: - a discrete reward of 2.0 is provided when only Arm 1 is gripping the handle and has the handle lifted above a certain threshold Un-normalized max-wise components if using reward shaping: - Arm0 Reaching: (1) in [0, 0.25] proportional to the distance between Arm 0 and the handle - Arm0 Grasping: (2) in {0, 0.5}, nonzero if Arm 0 is gripping the hammer (any part). - Arm0 Lifting: (3) in {0, 1.0}, nonzero if Arm 0 lifts the handle from the table past a certain threshold - Arm0 Hovering: (4) in {0, [1.0, 1.25]}, nonzero only if Arm0 is actively lifting the hammer, and is proportional to the distance between the handle and Arm 1 conditioned on the handle being lifted from the table and being grasped by Arm 0 - Mutual Grasping: (5) in {0, 1.5}, nonzero if both Arm 0 and Arm 1 are gripping the hammer (Arm 1 must be gripping the handle) while lifted above the table - Handover: (6) in {0, 2.0}, nonzero when only Arm 1 is gripping the handle and has the handle lifted above the table Note that the final reward is normalized and scaled by reward_scale / 2.0 as well so that the max score is equal to reward_scale Args: action (np array): [NOT USED] Returns: float: reward value """ # Initialize reward reward = 0 # use a shaping reward if specified if self.reward_shaping: # Grab relevant parameters arm0_grasp_any, arm1_grasp_handle, hammer_height, table_height = self._get_task_info( ) # First, we'll consider the cases if the hammer is lifted above the threshold (step 3 - 6) if hammer_height - table_height > self.height_threshold: # Split cases depending on whether arm1 is currently grasping the handle or not if arm1_grasp_handle: # Check if arm0 is grasping if arm0_grasp_any: # This is step 5 reward = 1.5 else: # This is step 6 (completed task!) reward = 2.0 # This is the case where only arm0 is grasping (step 2-3) else: reward = 1.0 # Add in up to 0.25 based on distance between handle and arm1 dist = np.linalg.norm(self._gripper_1_to_handle) reaching_reward = 0.25 * (1 - np.tanh(1.0 * dist)) reward += reaching_reward # Else, the hammer is still on the ground ): else: # Split cases depending on whether arm0 is currently grasping the handle or not if arm0_grasp_any: # This is step 2 reward = 0.5 else: # This is step 1, we want to encourage arm0 to reach for the handle dist = np.linalg.norm(self._gripper_0_to_handle) reaching_reward = 0.25 * (1 - np.tanh(1.0 * dist)) reward = reaching_reward # Else this is the sparse reward setting else: # Provide reward if only Arm 1 is grasping the hammer and the handle lifted above the pre-defined threshold if self._check_success(): reward = 2.0 if self.reward_scale is not None: reward *= self.reward_scale / 2.0 return reward def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose(s) accordingly if self.env_configuration == "bimanual": xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) else: if self.env_configuration == "single-arm-opposed": # Set up robots facing towards each other by rotating them from their default position for robot, rotation, offset in zip(self.robots, (np.pi / 2, -np.pi / 2), (-0.25, 0.25)): xpos = robot.robot_model.base_xpos_offset["table"]( self.table_full_size[0]) rot = np.array((0, 0, rotation)) xpos = T.euler2mat(rot) @ np.array(xpos) xpos += np.array((0, offset, 0)) robot.robot_model.set_base_xpos(xpos) robot.robot_model.set_base_ori(rot) else: # "single-arm-parallel" configuration setting # Set up robots parallel to each other but offset from the center for robot, offset in zip(self.robots, (-0.6, 0.6)): xpos = robot.robot_model.base_xpos_offset["table"]( self.table_full_size[0]) xpos = np.array(xpos) + np.array((0, offset, 0)) robot.robot_model.set_base_xpos(xpos) # load model for table top workspace self.mujoco_arena = TableArena(table_full_size=self.table_true_size, table_friction=self.table_friction, table_offset=self.table_offset) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # Arena always gets set to zero origin self.mujoco_arena.set_origin([0, 0, 0]) # initialize objects of interest self.hammer = HammerObject(name="hammer") self.mujoco_objects = OrderedDict([("hammer", self.hammer)]) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=self.mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.mujoco_objects, visual_objects=None, initializer=self.placement_initializer, ) self.model.place_objects() def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() # Hammer object references from this env self.hammer_body_id = self.sim.model.body_name2id("hammer") self.hammer_handle_geom_id = self.sim.model.geom_name2id( "hammer_handle") self.hammer_head_geom_id = self.sim.model.geom_name2id("hammer_head") self.hammer_face_geom_id = self.sim.model.geom_name2id("hammer_face") self.hammer_claw_geom_id = self.sim.model.geom_name2id("hammer_claw") # General env references self.table_top_id = self.sim.model.site_name2id("table_top") def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() # Reset all object positions using initializer sampler if we're not directly loading from an xml if not self.deterministic_reset: # Sample from the placement initializer for all objects obj_pos, obj_quat = self.model.place_objects() # Loop through all objects and reset their positions for i, (obj_name, _) in enumerate(self.mujoco_objects.items()): # If prehensile, set the object normally if self.prehensile: self.sim.data.set_joint_qpos( obj_name + "_jnt0", np.concatenate( [np.array(obj_pos[i]), np.array(obj_quat[i])])) # Else, set the object in the hand of the robot and loop a few steps to guarantee the robot is grasping # the object initially else: eef_rot_quat = T.mat2quat( T.euler2mat( [np.pi - T.mat2euler(self._eef0_xmat)[2], 0, 0])) obj_quat[i] = T.quat_multiply(obj_quat[i], eef_rot_quat) for j in range(100): # Set object in hand self.sim.data.set_joint_qpos( obj_name + "_jnt0", np.concatenate( [self._eef0_xpos, np.array(obj_quat[i])])) # Close gripper (action = 1) and prevent arm from moving if self.env_configuration == 'bimanual': # Execute no-op action with gravity compensation torques = np.concatenate([ self.robots[0].controller["right"]. torque_compensation, self.robots[0]. controller["left"].torque_compensation ]) self.sim.data.ctrl[self.robots[ 0]._ref_joint_torq_actuator_indexes] = torques # Execute gripper action self.robots[0].grip_action([1], "right") else: # Execute no-op action with gravity compensation self.sim.data.ctrl[self.robots[0]._ref_joint_torq_actuator_indexes] =\ self.robots[0].controller.torque_compensation self.sim.data.ctrl[self.robots[1]._ref_joint_torq_actuator_indexes] = \ self.robots[1].controller.torque_compensation # Execute gripper action self.robots[0].grip_action([1]) # Take forward step self.sim.step() def _get_task_info(self): """ Helper function that grabs the current relevant locations of objects of interest within the environment Returns: 4-tuple: - (bool) True if Arm0 is grasping any part of the hammer - (bool) True if Arm1 is grasping the hammer handle - (float) Height of the hammer body - (float) Height of the table surface """ # Get height of hammer and table and define height threshold hammer_angle_offset = (self.hammer.handle_length / 2 + 2 * self.hammer.head_halfsize) * np.sin( self._hammer_angle) hammer_height = self.sim.data.geom_xpos[self.hammer_handle_geom_id][2]\ - self.hammer.get_top_offset()[2]\ - hammer_angle_offset table_height = self.sim.data.site_xpos[self.table_top_id][2] # Check if any Arm's gripper is grasping the hammer handle # Single bimanual robot setting if self.env_configuration == "bimanual": _contacts_0_lf = len( list( self.find_contacts( self.robots[0].gripper["left"]. important_geoms["left_finger"], self.hammer.all_geoms))) > 0 _contacts_0_rf = len( list( self.find_contacts( self.robots[0].gripper["left"]. important_geoms["right_finger"], self.hammer.all_geoms))) > 0 _contacts_1_lf = len( list( self.find_contacts( self.robots[0].gripper["right"]. important_geoms["left_finger"], self.hammer.handle_geoms))) > 0 _contacts_1_rf = len( list( self.find_contacts( self.robots[0].gripper["right"]. important_geoms["right_finger"], self.hammer.handle_geoms))) > 0 # Multi single arm setting else: _contacts_0_lf = len( list( self.find_contacts( self.robots[0].gripper.important_geoms["left_finger"], self.hammer.all_geoms))) > 0 _contacts_0_rf = len( list( self.find_contacts( self.robots[0].gripper.important_geoms["right_finger"], self.hammer.all_geoms))) > 0 _contacts_1_lf = len( list( self.find_contacts( self.robots[1].gripper.important_geoms["left_finger"], self.hammer.handle_geoms))) > 0 _contacts_1_rf = len( list( self.find_contacts( self.robots[1].gripper.important_geoms["right_finger"], self.hammer.handle_geoms))) > 0 arm0_grasp_any = True if _contacts_0_lf and _contacts_0_rf else False arm1_grasp_handle = True if _contacts_1_lf and _contacts_1_rf else False # Return all relevant values return arm0_grasp_any, arm1_grasp_handle, hammer_height, table_height def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: `'robot-state'`: contains robot-centric information. `'object-state'`: requires @self.use_object_obs to be True. Contains object-centric information. `'image'`: requires @self.use_camera_obs to be True. Contains a rendered frame from the simulation. `'depth'`: requires @self.use_camera_obs and @self.camera_depth to be True. Contains a rendered depth map from the simulation Returns: OrderedDict: Observations from the environment """ di = super()._get_observation() # low-level object information if self.use_object_obs: # Get robot prefix if self.env_configuration == "bimanual": pr0 = self.robots[0].robot_model.naming_prefix + "right_" pr1 = self.robots[0].robot_model.naming_prefix + "left_" else: pr0 = self.robots[0].robot_model.naming_prefix pr1 = self.robots[1].robot_model.naming_prefix # position and rotation of hammer di["hammer_pos"] = np.array(self._hammer_pos) di["hammer_quat"] = np.array(self._hammer_quat) di["handle_xpos"] = np.array(self._handle_xpos) di[pr0 + "eef_xpos"] = np.array(self._eef0_xpos) di[pr1 + "eef_xpos"] = np.array(self._eef1_xpos) di[pr0 + "gripper_to_handle"] = np.array(self._gripper_0_to_handle) di[pr1 + "gripper_to_handle"] = np.array(self._gripper_1_to_handle) di["object-state"] = np.concatenate([ di["hammer_pos"], di["hammer_quat"], di["handle_xpos"], di[pr0 + "eef_xpos"], di[pr1 + "eef_xpos"], di[pr0 + "gripper_to_handle"], di[pr1 + "gripper_to_handle"], ]) return di def _check_success(self): """ Check if hammer is successfully handed off Returns: bool: True if handover has been completed """ # Grab relevant params arm0_grasp_any, arm1_grasp_handle, hammer_height, table_height = self._get_task_info( ) return \ True if \ arm1_grasp_handle and \ not arm0_grasp_any and \ hammer_height - table_height > self.height_threshold \ else False def _check_robot_configuration(self, robots): """ Sanity check to make sure the inputted robots and configuration is acceptable Args: robots (str or list of str): Robots to instantiate within this env """ robots = robots if type(robots) == list or type(robots) == tuple else [ robots ] if self.env_configuration == "single-arm-opposed" or self.env_configuration == "single-arm-parallel": # Specifically two robots should be inputted! is_bimanual = False if type(robots) is not list or len(robots) != 2: raise ValueError( "Error: Exactly two single-armed robots should be inputted " "for this task configuration!") elif self.env_configuration == "bimanual": is_bimanual = True # Specifically one robot should be inputted! if type(robots) is list and len(robots) != 1: raise ValueError( "Error: Exactly one bimanual robot should be inputted " "for this task configuration!") else: # This is an unknown env configuration, print error raise ValueError( "Error: Unknown environment configuration received. Only 'bimanual'," "'single-arm-parallel', and 'single-arm-opposed' are supported. Got: {}" .format(self.env_configuration)) # Lastly, check to make sure all inputted robot names are of their correct type (bimanual / not bimanual) for robot in robots: if check_type(robot, RobotType.bimanual) != is_bimanual: raise ValueError( "Error: For {} configuration, expected bimanual check to return {}; " "instead, got {}.".format( self.env_configuration, is_bimanual, check_type(robot, RobotType.bimanual))) @property def _handle_xpos(self): """ Grab the position of the hammer handle. Returns: np.array: (x,y,z) position of handle """ return self.sim.data.geom_xpos[self.hammer_handle_geom_id] @property def _head_xpos(self): """ Grab the position of the hammer head. Returns: np.array: (x,y,z) position of head """ return self.sim.data.geom_xpos[self.hammer_head_geom_id] @property def _face_xpos(self): """ Grab the position of the hammer face. Returns: np.array: (x,y,z) position of face """ return self.sim.data.geom_xpos[self.hammer_face_geom_id] @property def _claw_xpos(self): """ Grab the position of the hammer claw. Returns: np.array: (x,y,z) position of claw """ return self.sim.data.geom_xpos[self.hammer_claw_geom_id] @property def _hammer_pos(self): """ Grab the position of the hammer body. Returns: np.array: (x,y,z) position of body """ return np.array(self.sim.data.body_xpos[self.hammer_body_id]) @property def _hammer_quat(self): """ Grab the orientation of the hammer body. Returns: np.array: (x,y,z,w) quaternion of the hammer body """ return T.convert_quat(self.sim.data.body_xquat[self.hammer_body_id], to="xyzw") @property def _hammer_angle(self): """ Calculate the angle of hammer with the ground, relative to it resting horizontally Returns: float: angle in radians """ mat = T.quat2mat(self._hammer_quat) z_unit = [0, 0, 1] z_rotated = np.matmul(mat, z_unit) return np.pi / 2 - np.arccos(np.dot(z_unit, z_rotated)) @property def _world_quat(self): """ Grab the world orientation Returns: np.array: (x,y,z,w) world quaternion """ return T.convert_quat(np.array([1, 0, 0, 0]), to="xyzw") @property def _eef0_xpos(self): """ Grab the position of Robot 0's end effector. Returns: np.array: (x,y,z) position of EEF0 """ if self.env_configuration == "bimanual": return np.array( self.sim.data.site_xpos[self.robots[0].eef_site_id["right"]]) else: return np.array( self.sim.data.site_xpos[self.robots[0].eef_site_id]) @property def _eef1_xpos(self): """ Grab the position of Robot 1's end effector. Returns: np.array: (x,y,z) position of EEF1 """ if self.env_configuration == "bimanual": return np.array( self.sim.data.site_xpos[self.robots[0].eef_site_id["left"]]) else: return np.array( self.sim.data.site_xpos[self.robots[1].eef_site_id]) @property def _eef0_xmat(self): """ End Effector 0 orientation as a rotation matrix Note that this draws the orientation from the "ee" site, NOT the gripper site, since the gripper orientations are inconsistent! Returns: np.array: (3,3) orientation matrix for EEF0 """ pf = self.robots[0].robot_model.naming_prefix if self.env_configuration == "bimanual": return np.array(self.sim.data.site_xmat[ self.sim.model.site_name2id(pf + "right_ee")]).reshape(3, 3) else: return np.array(self.sim.data.site_xmat[ self.sim.model.site_name2id(pf + "ee")]).reshape(3, 3) @property def _eef1_xmat(self): """ End Effector 1 orientation as a rotation matrix Note that this draws the orientation from the "right_/left_hand" body, NOT the gripper site, since the gripper orientations are inconsistent! Returns: np.array: (3,3) orientation matrix for EEF1 """ if self.env_configuration == "bimanual": pf = self.robots[0].robot_model.naming_prefix return np.array(self.sim.data.site_xmat[ self.sim.model.site_name2id(pf + "left_ee")]).reshape(3, 3) else: pf = self.robots[1].robot_model.naming_prefix return np.array(self.sim.data.site_xmat[ self.sim.model.site_name2id(pf + "ee")]).reshape(3, 3) @property def _gripper_0_to_handle(self): """ Calculate vector from the left gripper to the hammer handle. Returns: np.array: (dx,dy,dz) distance vector between handle and EEF0 """ return self._handle_xpos - self._eef0_xpos @property def _gripper_1_to_handle(self): """ Calculate vector from the right gripper to the hammer handle. Returns: np.array: (dx,dy,dz) distance vector between handle and EEF1 """ return self._handle_xpos - self._eef1_xpos
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace mujoco_arena = TableArena( table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=self.table_offset, ) # Arena always gets set to zero origin mujoco_arena.set_origin([0, 0, 0]) # Load hole object # register object with the corresponding option (objectClass, name, xrange, yrange) if self.task_configs['board'] == 'GMC_assembly': self.register_object(my_object.GMC_Assembly_Object, 'plate', xrange=[0, 0], yrange=[0, 0]) if self.task_configs['board'] == 'GMC_plate': self.register_object(my_object.GMC_Plate_Object, 'plate', xrange=[0, 0], yrange=[0, 0]) if self.task_configs['board'] == 'Square_hole_16mm': self.register_object(my_object.square_hole_16mm, 'plate', xrange=[0, 0], yrange=[0, 0]) # Load peg object if self.task_configs['peg'] == '16mm': self.register_object(my_object.Round_peg_16mm_Object, 'peg', xrange=[-0.1, -0.13], yrange=[0.3, 0.33]) elif self.task_configs['peg'] == '12mm': self.register_object(my_object.Round_peg_12mm_Object, 'peg', xrange=[-0.1, -0.13], yrange=[0.3, 0.33]) elif self.task_configs['peg'] == '9mm': raise NotImplementedError elif self.task_configs['peg'] == 'cylinder_16mm': from robosuite.models.objects.primitive import CylinderObject self.peg = CylinderObject('peg', size=(0.007, 0.025)) self.objects_of_interest.append(self.peg) self.objectsName_of_interest.append('peg') self.objectsXrange_of_interest.append([-0.1, -0.13]) self.objectsYrange_of_interest.append([0.3, 0.33]) # Create Sequential Sampler. The order is same as the order of register. # Create individual samplers per object self.placement_initializer = SequentialCompositeSampler( name="ObjectSampler") for obj_name, default_xy_range in zip( self.objectsName_of_interest, zip(self.objectsXrange_of_interest, self.objectsYrange_of_interest)): self.placement_initializer.append_sampler( sampler=UniformRandomSampler( name=f"{obj_name}Sampler", x_range=default_xy_range[0], y_range=default_xy_range[1], rotation=None, rotation_axis='z', ensure_object_boundary_in_range=True, ensure_valid_placement=True, reference_pos=self.table_offset, z_offset=0.01, )) # Add objects to the sampler for obj_to_put, obj_name in zip(self.objects_of_interest, self.objectsName_of_interest): self.placement_initializer.add_objects_to_sampler( sampler_name=f"{obj_name}Sampler", mujoco_objects=obj_to_put) if self.task_configs['board'] == 'hole': self.plate = PlateWithHoleObject(name='plate') plate_obj = self.plate.get_obj() plate_obj.set("quat", "0 0 0 1") plate_obj.set("pos", "0 0 {}".format(self.table_offset[2])) self.objects_of_interest.append(self.plate) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.objects_of_interest, )
class TwoArmLift(RobotEnv): """ This class corresponds to the lifting task for two robot arms. Args: robots (str or list of str): Specification for specific robot arm(s) to be instantiated within this env (e.g: "Sawyer" would generate one arm; ["Panda", "Panda", "Sawyer"] would generate three robot arms) Note: Must be either 2 single single-arm robots or 1 bimanual robot! env_configuration (str): Specifies how to position the robots within the environment. Can be either: :`'bimanual'`: Only applicable for bimanual robot setups. Sets up the (single) bimanual robot on the -x side of the table :`'single-arm-parallel'`: Only applicable for multi single arm setups. Sets up the (two) single armed robots next to each other on the -x side of the table :`'single-arm-opposed'`: Only applicable for multi single arm setups. Sets up the (two) single armed robots opposed from each others on the opposite +/-y sides of the table (Default option) controller_configs (str or list of dict): If set, contains relevant controller parameters for creating a custom controller. Else, uses the default controller for this specific task. Should either be single dict if same controller is to be used for all robots or else it should be a list of the same length as "robots" param gripper_types (str or list of str): type of gripper, used to instantiate gripper models from gripper factory. Default is "default", which is the default grippers(s) associated with the robot(s) the 'robots' specification. None removes the gripper, and any other (valid) model overrides the default gripper. Should either be single str if same gripper type is to be used for all robots or else it should be a list of the same length as "robots" param gripper_visualizations (bool or list of bool): True if using gripper visualization. Useful for teleoperation. Should either be single bool if gripper visualization is to be used for all robots or else it should be a list of the same length as "robots" param initialization_noise (dict or list of dict): Dict containing the initialization noise parameters. The expected keys and corresponding value types are specified below: :`'magnitude'`: The scale factor of uni-variate random noise applied to each of a robot's given initial joint positions. Setting this value to `None` or 0.0 results in no noise being applied. If "gaussian" type of noise is applied then this magnitude scales the standard deviation applied, If "uniform" type of noise is applied then this magnitude sets the bounds of the sampling range :`'type'`: Type of noise to apply. Can either specify "gaussian" or "uniform" Should either be single dict if same noise value is to be used for all robots or else it should be a list of the same length as "robots" param :Note: Specifying "default" will automatically use the default noise settings. Specifying None will automatically create the required dict with "magnitude" set to 0.0. table_full_size (3-tuple): x, y, and z dimensions of the table. table_friction (3-tuple): the three mujoco friction parameters for the table. use_camera_obs (bool): if True, every observation includes rendered image(s) use_object_obs (bool): if True, include object (cube) information in the observation. reward_scale (None or float): Scales the normalized reward function by the amount specified. If None, environment reward remains unnormalized reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. use_indicator_object (bool): if True, sets up an indicator object that is useful for debugging. has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering render_camera (str): Name of camera to render if `has_renderer` is True. Setting this value to 'None' will result in the default angle being applied, which is useful as it can be dragged / panned by the user using the mouse render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). hard_reset (bool): If True, re-loads model, sim, and render object upon a reset call, else, only calls sim.reset and resets all robosuite-internal variables camera_names (str or list of str): name of camera to be rendered. Should either be single str if same name is to be used for all cameras' rendering or else it should be a list of cameras to render. :Note: At least one camera must be specified if @use_camera_obs is True. :Note: To render all robots' cameras of a certain type (e.g.: "robotview" or "eye_in_hand"), use the convention "all-{name}" (e.g.: "all-robotview") to automatically render all camera images from each robot's camera list). camera_heights (int or list of int): height of camera frame. Should either be single int if same height is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_widths (int or list of int): width of camera frame. Should either be single int if same width is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_depths (bool or list of bool): True if rendering RGB-D, and RGB otherwise. Should either be single bool if same depth setting is to be used for all cameras or else it should be a list of the same length as "camera names" param. Raises: ValueError: [Invalid number of robots specified] ValueError: [Invalid env configuration] ValueError: [Invalid robots for specified env configuration] """ def __init__( self, robots, env_configuration="single-arm-opposed", controller_configs=None, gripper_types="default", gripper_visualizations=False, initialization_noise="default", table_full_size=(0.8, 0.8, 0.05), table_friction=(1., 5e-3, 1e-4), use_camera_obs=True, use_object_obs=True, reward_scale=1.0, reward_shaping=False, placement_initializer=None, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_camera="frontview", render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, hard_reset=True, camera_names="agentview", camera_heights=256, camera_widths=256, camera_depths=False, ): # First, verify that correct number of robots are being inputted self.env_configuration = env_configuration self._check_robot_configuration(robots) # settings for table top self.table_full_size = table_full_size self.table_friction = table_friction # reward configuration self.reward_scale = reward_scale self.reward_shaping = reward_shaping # whether to use ground-truth object states self.use_object_obs = use_object_obs # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: self.placement_initializer = UniformRandomSampler( x_range=[-0.03, 0.03], y_range=[-0.03, 0.03], ensure_object_boundary_in_range=False, rotation=(-np.pi / 3, np.pi / 3), ) super().__init__( robots=robots, controller_configs=controller_configs, gripper_types=gripper_types, gripper_visualizations=gripper_visualizations, initialization_noise=initialization_noise, use_camera_obs=use_camera_obs, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_camera=render_camera, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, hard_reset=hard_reset, camera_names=camera_names, camera_heights=camera_heights, camera_widths=camera_widths, camera_depths=camera_depths, ) def reward(self, action=None): """ Reward function for the task. Sparse un-normalized reward: - a discrete reward of 3.0 is provided if the pot is lifted and is parallel within 30 deg to the table Un-normalized summed components if using reward shaping: - Reaching: in [0, 0.5], per-arm component that is proportional to the distance between each arm and its respective pot handle, and exactly 0.5 when grasping the handle - Note that the agent only gets the lifting reward when flipping no more than 30 degrees. - Grasping: in {0, 0.25}, binary per-arm component awarded if the gripper is grasping its correct handle - Lifting: in [0, 1.5], proportional to the pot's height above the table, and capped at a certain threshold Note that the final reward is normalized and scaled by reward_scale / 3.0 as well so that the max score is equal to reward_scale Args: action (np array): [NOT USED] Returns: float: reward value """ reward = 0 # check if the pot is tilted more than 30 degrees mat = T.quat2mat(self._pot_quat) z_unit = [0, 0, 1] z_rotated = np.matmul(mat, z_unit) cos_z = np.dot(z_unit, z_rotated) cos_30 = np.cos(np.pi / 6) direction_coef = 1 if cos_z >= cos_30 else 0 # check for goal completion: cube is higher than the table top above a margin if self._check_success(): reward = 3.0 * direction_coef # use a shaping reward elif self.reward_shaping: # lifting reward pot_bottom_height = self.sim.data.site_xpos[ self.pot_center_id][2] - self.pot.get_top_offset()[2] table_height = self.sim.data.site_xpos[self.table_top_id][2] elevation = pot_bottom_height - table_height r_lift = min(max(elevation - 0.05, 0), 0.15) reward += 10. * direction_coef * r_lift _gripper_0_to_handle = self._gripper_0_to_handle _gripper_1_to_handle = self._gripper_1_to_handle # gh stands for gripper-handle # When grippers are far away, tell them to be closer # Single bimanual robot setting if self.env_configuration == "bimanual": _contacts_0_lf = len( list( self.find_contacts( self.robots[0].gripper["left"]. important_geoms["left_finger"], self.pot.handle_2_geoms()))) > 0 _contacts_0_rf = len( list( self.find_contacts( self.robots[0].gripper["left"]. important_geoms["right_finger"], self.pot.handle_2_geoms()))) > 0 _contacts_1_lf = len( list( self.find_contacts( self.robots[0].gripper["right"]. important_geoms["left_finger"], self.pot.handle_1_geoms()))) > 0 _contacts_1_rf = len( list( self.find_contacts( self.robots[0].gripper["right"]. important_geoms["right_finger"], self.pot.handle_1_geoms()))) > 0 # Multi single arm setting else: _contacts_0_lf = len( list( self.find_contacts( self.robots[0].gripper. important_geoms["left_finger"], self.pot.handle_2_geoms()))) > 0 _contacts_0_rf = len( list( self.find_contacts( self.robots[0].gripper. important_geoms["right_finger"], self.pot.handle_2_geoms()))) > 0 _contacts_1_lf = len( list( self.find_contacts( self.robots[1].gripper. important_geoms["left_finger"], self.pot.handle_1_geoms()))) > 0 _contacts_1_rf = len( list( self.find_contacts( self.robots[1].gripper. important_geoms["right_finger"], self.pot.handle_1_geoms()))) > 0 _g0h_dist = np.linalg.norm(_gripper_0_to_handle) _g1h_dist = np.linalg.norm(_gripper_1_to_handle) # Grasping reward if _contacts_0_lf and _contacts_0_rf: reward += 0.25 # Reaching reward reward += 0.5 * (1 - np.tanh(10.0 * _g0h_dist)) # Grasping reward if _contacts_1_lf and _contacts_1_rf: reward += 0.25 # Reaching reward reward += 0.5 * (1 - np.tanh(10.0 * _g1h_dist)) if self.reward_scale is not None: reward *= self.reward_scale / 3.0 return reward def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose(s) accordingly if self.env_configuration == "bimanual": xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) else: if self.env_configuration == "single-arm-opposed": # Set up robots facing towards each other by rotating them from their default position for robot, rotation in zip(self.robots, (np.pi / 2, -np.pi / 2)): xpos = robot.robot_model.base_xpos_offset["table"]( self.table_full_size[0]) rot = np.array((0, 0, rotation)) xpos = T.euler2mat(rot) @ np.array(xpos) robot.robot_model.set_base_xpos(xpos) robot.robot_model.set_base_ori(rot) else: # "single-arm-parallel" configuration setting # Set up robots parallel to each other but offset from the center for robot, offset in zip(self.robots, (-0.25, 0.25)): xpos = robot.robot_model.base_xpos_offset["table"]( self.table_full_size[0]) xpos = np.array(xpos) + np.array((0, offset, 0)) robot.robot_model.set_base_xpos(xpos) # load model for table top workspace self.mujoco_arena = TableArena( table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=(0, 0, 0.8), ) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # Arena always gets set to zero origin self.mujoco_arena.set_origin([0, 0, 0]) # initialize objects of interest self.pot = PotWithHandlesObject(name="pot") self.mujoco_objects = OrderedDict([("pot", self.pot)]) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=self.mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.mujoco_objects, visual_objects=None, initializer=self.placement_initializer, ) self.model.place_objects() def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() # Additional object references from this env self.pot_body_id = self.sim.model.body_name2id("pot") self.handle_1_site_id = self.sim.model.site_name2id("pot_handle_1") self.handle_0_site_id = self.sim.model.site_name2id("pot_handle_2") self.table_top_id = self.sim.model.site_name2id("table_top") self.pot_center_id = self.sim.model.site_name2id("pot_center") def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() # Reset all object positions using initializer sampler if we're not directly loading from an xml if not self.deterministic_reset: # Sample from the placement initializer for all objects obj_pos, obj_quat = self.model.place_objects() # Loop through all objects and reset their positions for i, (obj_name, _) in enumerate(self.mujoco_objects.items()): self.sim.data.set_joint_qpos( obj_name + "_jnt0", np.concatenate( [np.array(obj_pos[i]), np.array(obj_quat[i])])) def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: `'robot-state'`: contains robot-centric information. `'object-state'`: requires @self.use_object_obs to be True. Contains object-centric information. `'image'`: requires @self.use_camera_obs to be True. Contains a rendered frame from the simulation. `'depth'`: requires @self.use_camera_obs and @self.camera_depth to be True. Contains a rendered depth map from the simulation Returns: OrderedDict: Observations from the environment """ di = super()._get_observation() # low-level object information if self.use_object_obs: # Get robot prefix if self.env_configuration == "bimanual": pr0 = self.robots[0].robot_model.naming_prefix + "left_" pr1 = self.robots[0].robot_model.naming_prefix + "right_" else: pr0 = self.robots[0].robot_model.naming_prefix pr1 = self.robots[1].robot_model.naming_prefix # position and rotation of object cube_pos = np.array(self.sim.data.body_xpos[self.pot_body_id]) cube_quat = T.convert_quat( self.sim.data.body_xquat[self.pot_body_id], to="xyzw") di["cube_pos"] = cube_pos di["cube_quat"] = cube_quat di[pr0 + "eef_xpos"] = self._eef0_xpos di[pr1 + "eef_xpos"] = self._eef1_xpos di["handle_0_xpos"] = np.array(self._handle_0_xpos) di["handle_1_xpos"] = np.array(self._handle_1_xpos) di[pr0 + "gripper_to_handle"] = np.array(self._gripper_0_to_handle) di[pr1 + "gripper_to_handle"] = np.array(self._gripper_1_to_handle) di["object-state"] = np.concatenate([ di["cube_pos"], di["cube_quat"], di[pr0 + "eef_xpos"], di[pr1 + "eef_xpos"], di["handle_0_xpos"], di["handle_1_xpos"], di[pr0 + "gripper_to_handle"], di[pr1 + "gripper_to_handle"], ]) return di def _check_success(self): """ Check if pot is successfully lifted Returns: bool: True if pot is lifted """ pot_bottom_height = self.sim.data.site_xpos[ self.pot_center_id][2] - self.pot.get_top_offset()[2] table_height = self.sim.data.site_xpos[self.table_top_id][2] # cube is higher than the table top above a margin return pot_bottom_height > table_height + 0.10 def _check_robot_configuration(self, robots): """ Sanity check to make sure the inputted robots and configuration is acceptable Args: robots (str or list of str): Robots to instantiate within this env """ robots = robots if type(robots) == list or type(robots) == tuple else [ robots ] if self.env_configuration == "single-arm-opposed" or self.env_configuration == "single-arm-parallel": # Specifically two robots should be inputted! is_bimanual = False if type(robots) is not list or len(robots) != 2: raise ValueError( "Error: Exactly two single-armed robots should be inputted " "for this task configuration!") elif self.env_configuration == "bimanual": is_bimanual = True # Specifically one robot should be inputted! if type(robots) is list and len(robots) != 1: raise ValueError( "Error: Exactly one bimanual robot should be inputted " "for this task configuration!") else: # This is an unknown env configuration, print error raise ValueError( "Error: Unknown environment configuration received. Only 'bimanual'," "'single-arm-parallel', and 'single-arm-opposed' are supported. Got: {}" .format(self.env_configuration)) # Lastly, check to make sure all inputted robot names are of their correct type (bimanual / not bimanual) for robot in robots: if check_type(robot, RobotType.bimanual) != is_bimanual: raise ValueError( "Error: For {} configuration, expected bimanual check to return {}; " "instead, got {}.".format( self.env_configuration, is_bimanual, check_type(robot, RobotType.bimanual))) @property def _handle_0_xpos(self): """ Grab the position of the left (blue) hammer handle. Returns: np.array: (x,y,z) position of handle """ return self.sim.data.site_xpos[self.handle_0_site_id] @property def _handle_1_xpos(self): """ Grab the position of the right (green) hammer handle. Returns: np.array: (x,y,z) position of handle """ return self.sim.data.site_xpos[self.handle_1_site_id] @property def _pot_quat(self): """ Grab the orientation of the pot body. Returns: np.array: (x,y,z,w) quaternion of the pot body """ return T.convert_quat(self.sim.data.body_xquat[self.pot_body_id], to="xyzw") @property def _world_quat(self): """ Grab the world orientation Returns: np.array: (x,y,z,w) world quaternion """ return T.convert_quat(np.array([1, 0, 0, 0]), to="xyzw") @property def _eef0_xpos(self): """ Grab the position of Robot 0's end effector. Returns: np.array: (x,y,z) position of EEF0 """ if self.env_configuration == "bimanual": return np.array( self.sim.data.site_xpos[self.robots[0].eef_site_id["left"]]) else: return np.array( self.sim.data.site_xpos[self.robots[0].eef_site_id]) @property def _eef1_xpos(self): """ Grab the position of Robot 1's end effector. Returns: np.array: (x,y,z) position of EEF1 """ if self.env_configuration == "bimanual": return np.array( self.sim.data.site_xpos[self.robots[0].eef_site_id["right"]]) else: return np.array( self.sim.data.site_xpos[self.robots[1].eef_site_id]) @property def _gripper_0_to_handle(self): """ Calculate vector from the left gripper to the left pot handle. Returns: np.array: (dx,dy,dz) distance vector between handle and EEF0 """ return self._handle_0_xpos - self._eef0_xpos @property def _gripper_1_to_handle(self): """ Calculate vector from the right gripper to the right pot handle. Returns: np.array: (dx,dy,dz) distance vector between handle and EEF0 """ return self._handle_1_xpos - self._eef1_xpos
class TwoArmPegInHole(TwoArmEnv): """ This class corresponds to the peg-in-hole task for two robot arms. Args: robots (str or list of str): Specification for specific robot arm(s) to be instantiated within this env (e.g: "Sawyer" would generate one arm; ["Panda", "Panda", "Sawyer"] would generate three robot arms) Note: Must be either 2 single single-arm robots or 1 bimanual robot! env_configuration (str): Specifies how to position the robots within the environment. Can be either: :`'bimanual'`: Only applicable for bimanual robot setups. Sets up the (single) bimanual robot on the -x side of the table :`'single-arm-parallel'`: Only applicable for multi single arm setups. Sets up the (two) single armed robots next to each other on the -x side of the table :`'single-arm-opposed'`: Only applicable for multi single arm setups. Sets up the (two) single armed robots opposed from each others on the opposite +/-y sides of the table. Note that "default" corresponds to either "bimanual" if a bimanual robot is used or "single-arm-opposed" if two single-arm robots are used. controller_configs (str or list of dict): If set, contains relevant controller parameters for creating a custom controller. Else, uses the default controller for this specific task. Should either be single dict if same controller is to be used for all robots or else it should be a list of the same length as "robots" param gripper_types (str or list of str): type of gripper, used to instantiate gripper models from gripper factory. For this environment, setting a value other than the default (None) will raise an AssertionError, as this environment is not meant to be used with any gripper at all. initialization_noise (dict or list of dict): Dict containing the initialization noise parameters. The expected keys and corresponding value types are specified below: :`'magnitude'`: The scale factor of uni-variate random noise applied to each of a robot's given initial joint positions. Setting this value to `None` or 0.0 results in no noise being applied. If "gaussian" type of noise is applied then this magnitude scales the standard deviation applied, If "uniform" type of noise is applied then this magnitude sets the bounds of the sampling range :`'type'`: Type of noise to apply. Can either specify "gaussian" or "uniform" Should either be single dict if same noise value is to be used for all robots or else it should be a list of the same length as "robots" param :Note: Specifying "default" will automatically use the default noise settings. Specifying None will automatically create the required dict with "magnitude" set to 0.0. use_camera_obs (bool or list of bool): if True, every observation for a specific robot includes a rendered image. Should either be single bool if camera obs value is to be used for all robots or else it should be a list of the same length as "robots" param use_object_obs (bool): if True, include object (cube) information in the observation. reward_scale (None or float): Scales the normalized reward function by the amount specified. If None, environment reward remains unnormalized reward_shaping (bool): if True, use dense rewards. peg_radius (2-tuple): low and high limits of the (uniformly sampled) radius of the peg peg_length (float): length of the peg has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering render_camera (str): Name of camera to render if `has_renderer` is True. Setting this value to 'None' will result in the default angle being applied, which is useful as it can be dragged / panned by the user using the mouse render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. render_gpu_device_id (int): corresponds to the GPU device id to use for offscreen rendering. Defaults to -1, in which case the device will be inferred from environment variables (GPUS or CUDA_VISIBLE_DEVICES). control_freq (float): how many control signals to receive in every second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). hard_reset (bool): If True, re-loads model, sim, and render object upon a reset call, else, only calls sim.reset and resets all robosuite-internal variables camera_names (str or list of str): name of camera to be rendered. Should either be single str if same name is to be used for all cameras' rendering or else it should be a list of cameras to render. :Note: At least one camera must be specified if @use_camera_obs is True. :Note: To render all robots' cameras of a certain type (e.g.: "robotview" or "eye_in_hand"), use the convention "all-{name}" (e.g.: "all-robotview") to automatically render all camera images from each robot's camera list). camera_heights (int or list of int): height of camera frame. Should either be single int if same height is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_widths (int or list of int): width of camera frame. Should either be single int if same width is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_depths (bool or list of bool): True if rendering RGB-D, and RGB otherwise. Should either be single bool if same depth setting is to be used for all cameras or else it should be a list of the same length as "camera names" param. Raises: AssertionError: [Gripper specified] ValueError: [Invalid number of robots specified] ValueError: [Invalid env configuration] ValueError: [Invalid robots for specified env configuration] """ def __init__( self, robots, env_configuration="single-arm-opposed", controller_configs=None, gripper_types=None, initialization_noise="default", use_camera_obs=True, use_object_obs=True, reward_scale=1.0, reward_shaping=False, peg_radius=(0.015, 0.03), peg_length=0.13, has_renderer=False, has_offscreen_renderer=True, render_camera="frontview", render_collision_mesh=False, render_visual_mesh=True, render_gpu_device_id=-1, control_freq=20, horizon=1000, ignore_done=False, hard_reset=True, camera_names="agentview", camera_heights=256, camera_widths=256, camera_depths=False, ): # Assert that the gripper type is None assert gripper_types is None, "Tried to specify gripper other than None in TwoArmPegInHole environment!" # reward configuration self.reward_scale = reward_scale self.reward_shaping = reward_shaping # whether to use ground-truth object states self.use_object_obs = use_object_obs # Save peg specs self.peg_radius = peg_radius self.peg_length = peg_length super().__init__( robots=robots, env_configuration=env_configuration, controller_configs=controller_configs, mount_types="default", gripper_types=gripper_types, initialization_noise=initialization_noise, use_camera_obs=use_camera_obs, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_camera=render_camera, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, render_gpu_device_id=render_gpu_device_id, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, hard_reset=hard_reset, camera_names=camera_names, camera_heights=camera_heights, camera_widths=camera_widths, camera_depths=camera_depths, ) def reward(self, action=None): """ Reward function for the task. Sparse un-normalized reward: - a discrete reward of 5.0 is provided if the peg is inside the plate's hole - Note that we enforce that it's inside at an appropriate angle (cos(theta) > 0.95). Un-normalized summed components if using reward shaping: - Reaching: in [0, 1], to encourage the arms to approach each other - Perpendicular Distance: in [0,1], to encourage the arms to approach each other - Parallel Distance: in [0,1], to encourage the arms to approach each other - Alignment: in [0, 1], to encourage having the right orientation between the peg and hole. - Placement: in {0, 1}, nonzero if the peg is in the hole with a relatively correct alignment Note that the final reward is normalized and scaled by reward_scale / 5.0 as well so that the max score is equal to reward_scale """ reward = 0 # Right location and angle if self._check_success(): reward = 1.0 # use a shaping reward if self.reward_shaping: # Grab relevant values t, d, cos = self._compute_orientation() # reaching reward hole_pos = self.sim.data.body_xpos[self.hole_body_id] gripper_site_pos = self.sim.data.body_xpos[self.peg_body_id] dist = np.linalg.norm(gripper_site_pos - hole_pos) reaching_reward = 1 - np.tanh(1.0 * dist) reward += reaching_reward # Orientation reward reward += 1 - np.tanh(d) reward += 1 - np.tanh(np.abs(t)) reward += cos # if we're not reward shaping, scale sparse reward so that the max reward is identical to its dense version else: reward *= 5.0 if self.reward_scale is not None: reward *= self.reward_scale / 5.0 return reward def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose(s) accordingly if self.env_configuration == "bimanual": xpos = self.robots[0].robot_model.base_xpos_offset["empty"] self.robots[0].robot_model.set_base_xpos(xpos) else: if self.env_configuration == "single-arm-opposed": # Set up robots facing towards each other by rotating them from their default position for robot, rotation in zip(self.robots, (np.pi / 2, -np.pi / 2)): xpos = robot.robot_model.base_xpos_offset["empty"] rot = np.array((0, 0, rotation)) xpos = T.euler2mat(rot) @ np.array(xpos) robot.robot_model.set_base_xpos(xpos) robot.robot_model.set_base_ori(rot) else: # "single-arm-parallel" configuration setting # Set up robots parallel to each other but offset from the center for robot, offset in zip(self.robots, (-0.25, 0.25)): xpos = robot.robot_model.base_xpos_offset["empty"] xpos = np.array(xpos) + np.array((0, offset, 0)) robot.robot_model.set_base_xpos(xpos) # Add arena and robot mujoco_arena = EmptyArena() # Arena always gets set to zero origin mujoco_arena.set_origin([0, 0, 0]) # Modify default agentview camera mujoco_arena.set_camera(camera_name="agentview", pos=[ 1.0666432116509934, 1.4903257668114777e-08, 2.0563394967349096 ], quat=[ 0.6530979871749878, 0.27104058861732483, 0.27104055881500244, 0.6530978679656982 ]) # initialize objects of interest self.hole = PlateWithHoleObject(name="hole") tex_attrib = { "type": "cube", } mat_attrib = { "texrepeat": "1 1", "specular": "0.4", "shininess": "0.1", } greenwood = CustomMaterial( texture="WoodGreen", tex_name="greenwood", mat_name="greenwood_mat", tex_attrib=tex_attrib, mat_attrib=mat_attrib, ) self.peg = CylinderObject( name="peg", size_min=(self.peg_radius[0], self.peg_length), size_max=(self.peg_radius[1], self.peg_length), material=greenwood, rgba=[0, 1, 0, 1], joints=None, ) # Load hole object hole_obj = self.hole.get_obj() hole_obj.set("quat", "0 0 0.707 0.707") hole_obj.set("pos", "0.11 0 0.17") # Load peg object peg_obj = self.peg.get_obj() peg_obj.set("pos", array_to_string((0, 0, self.peg_length))) # Append appropriate objects to arms if self.env_configuration == "bimanual": r_eef, l_eef = [ self.robots[0].robot_model.eef_name[arm] for arm in self.robots[0].arms ] r_model, l_model = [ self.robots[0].robot_model, self.robots[0].robot_model ] else: r_eef, l_eef = [ robot.robot_model.eef_name for robot in self.robots ] r_model, l_model = [ self.robots[0].robot_model, self.robots[1].robot_model ] r_body = find_elements(root=r_model.worldbody, tags="body", attribs={"name": r_eef}, return_first=True) l_body = find_elements(root=l_model.worldbody, tags="body", attribs={"name": l_eef}, return_first=True) r_body.append(peg_obj) l_body.append(hole_obj) # task includes arena, robot, and objects of interest # We don't add peg and hole directly since they were already appended to the robots self.model = ManipulationTask( mujoco_arena=mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], ) # Make sure to add relevant assets from peg and hole objects self.model.merge_assets(self.hole) self.model.merge_assets(self.peg) def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() # Additional object references from this env self.hole_body_id = self.sim.model.body_name2id(self.hole.root_body) self.peg_body_id = self.sim.model.body_name2id(self.peg.root_body) def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: `'robot-state'`: contains robot-centric information. `'object-state'`: requires @self.use_object_obs to be True. Contains object-centric information. `'image'`: requires @self.use_camera_obs to be True. Contains a rendered frame from the simulation. `'depth'`: requires @self.use_camera_obs and @self.camera_depth to be True. Contains a rendered depth map from the simulation Returns: OrderedDict: Observations from the environment """ di = super()._get_observation() # low-level object information if self.use_object_obs: # Get robot prefix if self.env_configuration == "bimanual": pr0 = self.robots[0].robot_model.naming_prefix + "left_" pr1 = self.robots[0].robot_model.naming_prefix + "right_" else: pr0 = self.robots[0].robot_model.naming_prefix pr1 = self.robots[1].robot_model.naming_prefix # position and rotation of peg and hole hole_pos = np.array(self.sim.data.body_xpos[self.hole_body_id]) hole_quat = T.convert_quat( self.sim.data.body_xquat[self.hole_body_id], to="xyzw") di["hole_pos"] = hole_pos di["hole_quat"] = hole_quat peg_pos = np.array(self.sim.data.body_xpos[self.peg_body_id]) peg_quat = T.convert_quat( self.sim.data.body_xquat[self.peg_body_id], to="xyzw") di["peg_to_hole"] = peg_pos - hole_pos di["peg_quat"] = peg_quat # Relative orientation parameters t, d, cos = self._compute_orientation() di["angle"] = cos di["t"] = t di["d"] = d di["object-state"] = np.concatenate([ di["hole_pos"], di["hole_quat"], di["peg_to_hole"], di["peg_quat"], [di["angle"]], [di["t"]], [di["d"]], ]) return di def _check_success(self): """ Check if peg is successfully aligned and placed within the hole Returns: bool: True if peg is placed in hole correctly """ t, d, cos = self._compute_orientation() return d < 0.06 and -0.12 <= t <= 0.14 and cos > 0.95 def _compute_orientation(self): """ Helper function to return the relative positions between the hole and the peg. In particular, the intersection of the line defined by the peg and the plane defined by the hole is computed; the parallel distance, perpendicular distance, and angle are returned. Returns: 3-tuple: - (float): parallel distance - (float): perpendicular distance - (float): angle """ peg_mat = self.sim.data.body_xmat[self.peg_body_id] peg_mat.shape = (3, 3) peg_pos = self.sim.data.body_xpos[self.peg_body_id] hole_pos = self.sim.data.body_xpos[self.hole_body_id] hole_mat = self.sim.data.body_xmat[self.hole_body_id] hole_mat.shape = (3, 3) v = peg_mat @ np.array([0, 0, 1]) v = v / np.linalg.norm(v) center = hole_pos + hole_mat @ np.array([0.1, 0, 0]) t = (center - peg_pos) @ v / (np.linalg.norm(v)**2) d = np.linalg.norm(np.cross(v, peg_pos - center)) / np.linalg.norm(v) hole_normal = hole_mat @ np.array([0, 0, 1]) return ( t, d, abs( np.dot(hole_normal, v) / np.linalg.norm(hole_normal) / np.linalg.norm(v)), ) def _peg_pose_in_hole_frame(self): """ A helper function that takes in a named data field and returns the pose of that object in the base frame. Returns: np.array: (4,4) matrix corresponding to the pose of the peg in the hole frame """ # World frame peg_pos_in_world = self.sim.data.get_body_xpos(self.peg.root_body) peg_rot_in_world = self.sim.data.get_body_xmat( self.peg.root_body).reshape((3, 3)) peg_pose_in_world = T.make_pose(peg_pos_in_world, peg_rot_in_world) # World frame hole_pos_in_world = self.sim.data.get_body_xpos(self.hole.root_body) hole_rot_in_world = self.sim.data.get_body_xmat( self.hole.root_body).reshape((3, 3)) hole_pose_in_world = T.make_pose(hole_pos_in_world, hole_rot_in_world) world_pose_in_hole = T.pose_inv(hole_pose_in_world) peg_pose_in_hole = T.pose_in_A_to_pose_in_B(peg_pose_in_world, world_pose_in_hole) return peg_pose_in_hole
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace #mujoco_arena = TableArena( # table_full_size=self.table_full_size, # table_friction=self.table_friction, # table_offset=self.table_offset, #) mujoco_arena = EmptyArena() # Arena always gets set to zero origin mujoco_arena.set_origin([0, 0, 0]) # initialize objects of interest tex_attrib = { "type": "ball", } mat_attrib = { "texrepeat": "1 1", "specular": "0.4", "shininess": "0.1", } redwood = CustomMaterial( texture="WoodRed", tex_name="redwood", mat_name="redwood_mat", tex_attrib=tex_attrib, mat_attrib=mat_attrib, ) self.ball = BallObject( name="ball", size_min=[0.018], # [0.015, 0.015, 0.015], size_max=[0.024], # [0.018, 0.018, 0.018]) rgba=[1, 0, 0, 1], density=1, joints=[ {"name":"ball_x", "type":"slide", "damping":100, "axis":"1 0 0", }, {"name":"ball_y", "type":"slide", "damping":100, "axis":"0 1 0"}, {"name":"ball_z", "type":"slide", "axis":"0 0 1", "damping":100, } ] ) # get current end effector position #max_pos = [-.75, .75] #np.clip(self.max_object_distance + eef_pos[:3], -1, 1) #min_pos = [-.75, .75] #np.clip(-self.max_object_distance + eef_pos[:3], -1, 1) ## Create placement initializer # TODO I think we need to create every time since joint position will be different if self.placement_initializer is not None: self.placement_initializer.reset() self.placement_initializer.add_objects(self.ball) else: self.placement_initializer = UniformRandomSampler( name="ObjectSampler", mujoco_objects=self.ball, x_range=[-.75, .75], y_range=[-.75, .75], rotation=None, ensure_object_boundary_in_range=False, ensure_valid_placement=True, reference_pos=self.table_offset, ) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.ball, )
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose(s) accordingly if self.env_configuration == "bimanual": xpos = self.robots[0].robot_model.base_xpos_offset["empty"] self.robots[0].robot_model.set_base_xpos(xpos) else: if self.env_configuration == "single-arm-opposed": # Set up robots facing towards each other by rotating them from their default position for robot, rotation in zip(self.robots, (np.pi / 2, -np.pi / 2)): xpos = robot.robot_model.base_xpos_offset["empty"] rot = np.array((0, 0, rotation)) xpos = T.euler2mat(rot) @ np.array(xpos) robot.robot_model.set_base_xpos(xpos) robot.robot_model.set_base_ori(rot) else: # "single-arm-parallel" configuration setting # Set up robots parallel to each other but offset from the center for robot, offset in zip(self.robots, (-0.25, 0.25)): xpos = robot.robot_model.base_xpos_offset["empty"] xpos = np.array(xpos) + np.array((0, offset, 0)) robot.robot_model.set_base_xpos(xpos) # Add arena and robot mujoco_arena = EmptyArena() # Arena always gets set to zero origin mujoco_arena.set_origin([0, 0, 0]) # Modify default agentview camera mujoco_arena.set_camera(camera_name="agentview", pos=[ 1.0666432116509934, 1.4903257668114777e-08, 2.0563394967349096 ], quat=[ 0.6530979871749878, 0.27104058861732483, 0.27104055881500244, 0.6530978679656982 ]) # initialize objects of interest self.hole = PlateWithHoleObject(name="hole") tex_attrib = { "type": "cube", } mat_attrib = { "texrepeat": "1 1", "specular": "0.4", "shininess": "0.1", } greenwood = CustomMaterial( texture="WoodGreen", tex_name="greenwood", mat_name="greenwood_mat", tex_attrib=tex_attrib, mat_attrib=mat_attrib, ) self.peg = CylinderObject( name="peg", size_min=(self.peg_radius[0], self.peg_length), size_max=(self.peg_radius[1], self.peg_length), material=greenwood, rgba=[0, 1, 0, 1], joints=None, ) # Load hole object hole_obj = self.hole.get_obj() hole_obj.set("quat", "0 0 0.707 0.707") hole_obj.set("pos", "0.11 0 0.17") # Load peg object peg_obj = self.peg.get_obj() peg_obj.set("pos", array_to_string((0, 0, self.peg_length))) # Append appropriate objects to arms if self.env_configuration == "bimanual": r_eef, l_eef = [ self.robots[0].robot_model.eef_name[arm] for arm in self.robots[0].arms ] r_model, l_model = [ self.robots[0].robot_model, self.robots[0].robot_model ] else: r_eef, l_eef = [ robot.robot_model.eef_name for robot in self.robots ] r_model, l_model = [ self.robots[0].robot_model, self.robots[1].robot_model ] r_body = find_elements(root=r_model.worldbody, tags="body", attribs={"name": r_eef}, return_first=True) l_body = find_elements(root=l_model.worldbody, tags="body", attribs={"name": l_eef}, return_first=True) r_body.append(peg_obj) l_body.append(hole_obj) # task includes arena, robot, and objects of interest # We don't add peg and hole directly since they were already appended to the robots self.model = ManipulationTask( mujoco_arena=mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], ) # Make sure to add relevant assets from peg and hole objects self.model.merge_assets(self.hole) self.model.merge_assets(self.peg)
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose(s) accordingly if self.env_configuration == "bimanual": xpos = self.robots[0].robot_model.base_xpos_offset["table"](self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) else: if self.env_configuration == "single-arm-opposed": # Set up robots facing towards each other by rotating them from their default position for robot, rotation in zip(self.robots, (np.pi/2, -np.pi/2)): xpos = robot.robot_model.base_xpos_offset["table"](self.table_full_size[0]) rot = np.array((0, 0, rotation)) xpos = T.euler2mat(rot) @ np.array(xpos) robot.robot_model.set_base_xpos(xpos) robot.robot_model.set_base_ori(rot) else: # "single-arm-parallel" configuration setting # Set up robots parallel to each other but offset from the center for robot, offset in zip(self.robots, (-0.25, 0.25)): xpos = robot.robot_model.base_xpos_offset["table"](self.table_full_size[0]) xpos = np.array(xpos) + np.array((0, offset, 0)) robot.robot_model.set_base_xpos(xpos) # load model for table top workspace mujoco_arena = TableArena( table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=self.table_offset, ) # Arena always gets set to zero origin mujoco_arena.set_origin([0, 0, 0]) # initialize objects of interest self.pot = PotWithHandlesObject(name="pot") # Create placement initializer if self.placement_initializer is not None: self.placement_initializer.reset() self.placement_initializer.add_objects(self.pot) else: self.placement_initializer = UniformRandomSampler( name="ObjectSampler", mujoco_objects=self.pot, x_range=[-0.03, 0.03], y_range=[-0.03, 0.03], ensure_object_boundary_in_range=False, ensure_valid_placement=True, reference_pos=self.table_offset, rotation=(np.pi + -np.pi / 3, np.pi + np.pi / 3), ) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.pot, )
class NutAssembly(RobotEnv): """ This class corresponds to the nut assembly task for a single robot arm. Args: robots (str or list of str): Specification for specific robot arm(s) to be instantiated within this env (e.g: "Sawyer" would generate one arm; ["Panda", "Panda", "Sawyer"] would generate three robot arms) Note: Must be a single single-arm robot! controller_configs (str or list of dict): If set, contains relevant controller parameters for creating a custom controller. Else, uses the default controller for this specific task. Should either be single dict if same controller is to be used for all robots or else it should be a list of the same length as "robots" param gripper_types (str or list of str): type of gripper, used to instantiate gripper models from gripper factory. Default is "default", which is the default grippers(s) associated with the robot(s) the 'robots' specification. None removes the gripper, and any other (valid) model overrides the default gripper. Should either be single str if same gripper type is to be used for all robots or else it should be a list of the same length as "robots" param gripper_visualizations (bool or list of bool): True if using gripper visualization. Useful for teleoperation. Should either be single bool if gripper visualization is to be used for all robots or else it should be a list of the same length as "robots" param initialization_noise (dict or list of dict): Dict containing the initialization noise parameters. The expected keys and corresponding value types are specified below: :`'magnitude'`: The scale factor of uni-variate random noise applied to each of a robot's given initial joint positions. Setting this value to `None` or 0.0 results in no noise being applied. If "gaussian" type of noise is applied then this magnitude scales the standard deviation applied, If "uniform" type of noise is applied then this magnitude sets the bounds of the sampling range :`'type'`: Type of noise to apply. Can either specify "gaussian" or "uniform" Should either be single dict if same noise value is to be used for all robots or else it should be a list of the same length as "robots" param :Note: Specifying "default" will automatically use the default noise settings. Specifying None will automatically create the required dict with "magnitude" set to 0.0. table_full_size (3-tuple): x, y, and z dimensions of the table. table_friction (3-tuple): the three mujoco friction parameters for the table. use_camera_obs (bool): if True, every observation includes rendered image(s) use_object_obs (bool): if True, include object (cube) information in the observation. reward_scale (None or float): Scales the normalized reward function by the amount specified. If None, environment reward remains unnormalized reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. single_object_mode (int): specifies which version of the task to do. Note that the observations change accordingly. :`0`: corresponds to the full task with both types of nuts. :`1`: corresponds to an easier task with only one type of nut initialized on the table with every reset. The type is randomized on every reset. :`2`: corresponds to an easier task with only one type of nut initialized on the table with every reset. The type is kept constant and will not change between resets. nut_type (string): if provided, should be either "round" or "square". Determines which type of nut (round or square) will be spawned on every environment reset. Only used if @single_object_mode is 2. use_indicator_object (bool): if True, sets up an indicator object that is useful for debugging. has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering render_camera (str): Name of camera to render if `has_renderer` is True. Setting this value to 'None' will result in the default angle being applied, which is useful as it can be dragged / panned by the user using the mouse render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). hard_reset (bool): If True, re-loads model, sim, and render object upon a reset call, else, only calls sim.reset and resets all robosuite-internal variables camera_names (str or list of str): name of camera to be rendered. Should either be single str if same name is to be used for all cameras' rendering or else it should be a list of cameras to render. :Note: At least one camera must be specified if @use_camera_obs is True. :Note: To render all robots' cameras of a certain type (e.g.: "robotview" or "eye_in_hand"), use the convention "all-{name}" (e.g.: "all-robotview") to automatically render all camera images from each robot's camera list). camera_heights (int or list of int): height of camera frame. Should either be single int if same height is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_widths (int or list of int): width of camera frame. Should either be single int if same width is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_depths (bool or list of bool): True if rendering RGB-D, and RGB otherwise. Should either be single bool if same depth setting is to be used for all cameras or else it should be a list of the same length as "camera names" param. Raises: AssertionError: [Invalid nut type specified] AssertionError: [Invalid number of robots specified] """ def __init__( self, robots, controller_configs=None, gripper_types="default", gripper_visualizations=False, initialization_noise="default", table_full_size=(0.8, 0.8, 0.05), table_friction=(1, 0.005, 0.0001), use_camera_obs=True, use_object_obs=True, reward_scale=1.0, reward_shaping=False, placement_initializer=None, single_object_mode=0, nut_type=None, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_camera="frontview", render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, hard_reset=True, camera_names="agentview", camera_heights=256, camera_widths=256, camera_depths=False, ): # First, verify that only one robot is being inputted self._check_robot_configuration(robots) # task settings self.single_object_mode = single_object_mode self.nut_to_id = {"square": 0, "round": 1} if nut_type is not None: assert (nut_type in self.nut_to_id.keys() ), "invalid @nut_type argument - choose one of {}".format( list(self.nut_to_id.keys())) self.nut_id = self.nut_to_id[ nut_type] # use for convenient indexing self.obj_to_use = None # settings for table top self.table_full_size = table_full_size self.table_friction = table_friction # reward configuration self.reward_scale = reward_scale self.reward_shaping = reward_shaping # whether to use ground-truth object states self.use_object_obs = use_object_obs # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: # treat sampling of each type of nut differently since we require different # sampling ranges for each self.placement_initializer = SequentialCompositeSampler() self.placement_initializer.sample_on_top( "SquareNut0", surface_name="table", x_range=[-0.115, -0.11], y_range=[0.11, 0.225], rotation=None, rotation_axis='z', z_offset=0.02, ensure_object_boundary_in_range=False, ) self.placement_initializer.sample_on_top( "RoundNut0", surface_name="table", x_range=[-0.115, -0.11], y_range=[-0.225, -0.11], rotation=None, rotation_axis='z', z_offset=0.02, ensure_object_boundary_in_range=False, ) super().__init__( robots=robots, controller_configs=controller_configs, gripper_types=gripper_types, gripper_visualizations=gripper_visualizations, initialization_noise=initialization_noise, use_camera_obs=use_camera_obs, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_camera=render_camera, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, hard_reset=hard_reset, camera_names=camera_names, camera_heights=camera_heights, camera_widths=camera_widths, camera_depths=camera_depths, ) def reward(self, action=None): """ Reward function for the task. Sparse un-normalized reward: - a discrete reward of 1.0 per nut if it is placed around its correct peg Un-normalized components if using reward shaping, where the maximum is returned if not solved: - Reaching: in [0, 0.1], proportional to the distance between the gripper and the closest nut - Grasping: in {0, 0.35}, nonzero if the gripper is grasping a nut - Lifting: in {0, [0.35, 0.5]}, nonzero only if nut is grasped; proportional to lifting height - Hovering: in {0, [0.5, 0.7]}, nonzero only if nut is lifted; proportional to distance from nut to peg Note that a successfully completed task (nut around peg) will return 1.0 per nut irregardless of whether the environment is using sparse or shaped rewards Note that the final reward is normalized and scaled by reward_scale / 2.0 (or 1.0 if only a single nut is being used) as well so that the max score is equal to reward_scale Args: action (np.array): [NOT USED] Returns: float: reward value """ # compute sparse rewards self._check_success() reward = np.sum(self.objects_on_pegs) # add in shaped rewards if self.reward_shaping: staged_rewards = self.staged_rewards() reward += max(staged_rewards) if self.reward_scale is not None: reward *= self.reward_scale if self.single_object_mode == 0: reward /= 2.0 return reward def staged_rewards(self): """ Calculates staged rewards based on current physical states. Stages consist of reaching, grasping, lifting, and hovering. Returns: 4-tuple: - (float) reaching reward - (float) grasping reward - (float) lifting reward - (float) hovering reward """ reach_mult = 0.1 grasp_mult = 0.35 lift_mult = 0.5 hover_mult = 0.7 # filter out objects that are already on the correct pegs names_to_reach = [] objs_to_reach = [] geoms_to_grasp = [] geoms_by_array = [] for i in range(len(self.ob_inits)): if self.objects_on_pegs[i]: continue obj_str = str(self.item_names[i]) + "0" names_to_reach.append(obj_str) objs_to_reach.append(self.obj_body_id[obj_str]) geoms_to_grasp.extend(self.obj_geom_id[obj_str]) geoms_by_array.append(self.obj_geom_id[obj_str]) ### reaching reward governed by distance to closest object ### r_reach = 0. if len(objs_to_reach): # reaching reward via minimum distance to the handles of the objects (the last geom of each nut) geom_ids = [elem[-1] for elem in geoms_by_array] target_geom_pos = self.sim.data.geom_xpos[geom_ids] gripper_site_pos = self.sim.data.site_xpos[ self.robots[0].eef_site_id] dists = np.linalg.norm(target_geom_pos - gripper_site_pos.reshape(1, -1), axis=1) r_reach = (1 - np.tanh(10.0 * min(dists))) * reach_mult ### grasping reward for touching any objects of interest ### touch_left_finger = False touch_right_finger = False for i in range(self.sim.data.ncon): c = self.sim.data.contact[i] if c.geom1 in geoms_to_grasp: if c.geom2 in self.l_finger_geom_ids: touch_left_finger = True if c.geom2 in self.r_finger_geom_ids: touch_right_finger = True elif c.geom2 in geoms_to_grasp: if c.geom1 in self.l_finger_geom_ids: touch_left_finger = True if c.geom1 in self.r_finger_geom_ids: touch_right_finger = True has_grasp = touch_left_finger and touch_right_finger r_grasp = int(has_grasp) * grasp_mult ### lifting reward for picking up an object ### r_lift = 0. table_pos = np.array(self.sim.data.body_xpos[self.table_body_id]) if len(objs_to_reach) and r_grasp > 0.: z_target = table_pos[2] + 0.2 object_z_locs = self.sim.data.body_xpos[objs_to_reach][:, 2] z_dists = np.maximum(z_target - object_z_locs, 0.) r_lift = grasp_mult + (1 - np.tanh(15.0 * min(z_dists))) * ( lift_mult - grasp_mult) ### hover reward for getting object above peg ### r_hover = 0. if len(objs_to_reach): r_hovers = np.zeros(len(objs_to_reach)) for i in range(len(objs_to_reach)): if names_to_reach[i].startswith(self.item_names[0]): peg_pos = np.array( self.sim.data.body_xpos[self.peg1_body_id])[:2] elif names_to_reach[i].startswith(self.item_names[1]): peg_pos = np.array( self.sim.data.body_xpos[self.peg2_body_id])[:2] else: raise Exception("Got invalid object to reach: {}".format( names_to_reach[i])) ob_xy = self.sim.data.body_xpos[objs_to_reach[i]][:2] dist = np.linalg.norm(peg_pos - ob_xy) r_hovers[i] = r_lift + (1 - np.tanh(10.0 * dist)) * ( hover_mult - lift_mult) r_hover = np.max(r_hovers) return r_reach, r_grasp, r_lift, r_hover def on_peg(self, obj_pos, peg_id): if peg_id == 0: peg_pos = np.array(self.sim.data.body_xpos[self.peg1_body_id]) else: peg_pos = np.array(self.sim.data.body_xpos[self.peg2_body_id]) res = False if (abs(obj_pos[0] - peg_pos[0]) < 0.03 and abs(obj_pos[1] - peg_pos[1]) < 0.03 and obj_pos[2] < self.mujoco_arena.table_offset[2] + 0.05): res = True return res def clear_objects(self, obj): """ Clears objects without the name @obj out of the task space. This is useful for supporting task modes with single types of objects, as in @self.single_object_mode without changing the model definition. Args: obj (str): Name of object to keep in the task space """ for obj_name, obj_mjcf in self.mujoco_objects.items(): if obj_name == obj: continue else: sim_state = self.sim.get_state() # print(self.sim.model.get_joint_qpos_addr(obj_name)) sim_state.qpos[self.sim.model.get_joint_qpos_addr( obj_name + "_jnt0")[0]] = 10 self.sim.set_state(sim_state) self.sim.forward() def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Verify the correct robot has been loaded assert isinstance(self.robots[0], SingleArm), \ "Error: Expected one single-armed robot! Got {} type instead.".format(type(self.robots[0])) # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace self.mujoco_arena = PegsArena(table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=(0, 0, 0.82)) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # Arena always gets set to zero origin self.mujoco_arena.set_origin([0, 0, 0]) # define mujoco objects self.ob_inits = [SquareNutObject, RoundNutObject] self.item_names = ["SquareNut", "RoundNut"] self.item_names_org = list(self.item_names) self.obj_to_use = (self.item_names[1] + "{}").format(0) self.ngeoms = [5, 9] lst = [] for i in range(len(self.ob_inits)): ob_name = (self.item_names[i] + "0") ob = self.ob_inits[i]( name=ob_name, joints=[dict(type="free", damping="0.0005") ], # damp the free joint for each object ) lst.append((ob_name, ob)) self.mujoco_objects = OrderedDict(lst) self.n_objects = len(self.mujoco_objects) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=self.mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.mujoco_objects, visual_objects=None, initializer=self.placement_initializer, ) # set positions of objects self.model.place_objects() def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() # Additional object references from this env self.obj_body_id = {} self.obj_geom_id = {} self.table_body_id = self.sim.model.body_name2id("table") self.peg1_body_id = self.sim.model.body_name2id("peg1") self.peg2_body_id = self.sim.model.body_name2id("peg2") for i in range(len(self.ob_inits)): obj_str = str(self.item_names[i]) + "0" self.obj_body_id[obj_str] = self.sim.model.body_name2id(obj_str) geom_ids = [] for j in range(self.ngeoms[i]): geom_ids.append( self.sim.model.geom_name2id(obj_str + "-{}".format(j))) self.obj_geom_id[obj_str] = geom_ids # information of objects self.object_names = list(self.mujoco_objects.keys()) self.object_site_ids = [ self.sim.model.site_name2id(ob_name) for ob_name in self.object_names ] # id of grippers for contact checking self.l_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.robots[0].gripper.important_geoms["left_finger"] ] self.r_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.robots[0].gripper.important_geoms["right_finger"] ] # self.sim.data.contact # list, geom1, geom2 self.collision_check_geom_names = self.sim.model._geom_name2id.keys() self.collision_check_geom_ids = [ self.sim.model._geom_name2id[k] for k in self.collision_check_geom_names ] # keep track of which objects are on their corresponding pegs self.objects_on_pegs = np.zeros(len(self.ob_inits)) def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() # Reset all object positions using initializer sampler if we're not directly loading from an xml if not self.deterministic_reset: # Sample from the placement initializer for all objects obj_pos, obj_quat = self.model.place_objects() # Loop through all objects and reset their positions for i, (obj_name, _) in enumerate(self.mujoco_objects.items()): self.sim.data.set_joint_qpos( obj_name + "_jnt0", np.concatenate( [np.array(obj_pos[i]), np.array(obj_quat[i])])) # information of objects self.object_names = list(self.mujoco_objects.keys()) self.object_site_ids = [ self.sim.model.site_name2id(ob_name) for ob_name in self.object_names ] # Move objects out of the scene depending on the mode if self.single_object_mode == 1: self.obj_to_use = (random.choice(self.item_names) + "{}").format(0) self.clear_objects(self.obj_to_use) elif self.single_object_mode == 2: self.obj_to_use = (self.item_names[self.nut_id] + "{}").format(0) self.clear_objects(self.obj_to_use) def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: `'robot-state'`: contains robot-centric information. `'object-state'`: requires @self.use_object_obs to be True. Contains object-centric information. `'image'`: requires @self.use_camera_obs to be True. Contains a rendered frame from the simulation. `'depth'`: requires @self.use_camera_obs and @self.camera_depth to be True. Contains a rendered depth map from the simulation Returns: OrderedDict: Observations from the environment """ di = super()._get_observation() # low-level object information if self.use_object_obs: # Get robot prefix pr = self.robots[0].robot_model.naming_prefix # remember the keys to collect into object info object_state_keys = [] # for conversion to relative gripper frame gripper_pose = T.pose2mat( (di[pr + "eef_pos"], di[pr + "eef_quat"])) world_pose_in_gripper = T.pose_inv(gripper_pose) for i in range(len(self.item_names_org)): if self.single_object_mode == 2 and self.nut_id != i: # skip observations continue obj_str = str(self.item_names_org[i]) + "0" obj_pos = np.array( self.sim.data.body_xpos[self.obj_body_id[obj_str]]) obj_quat = T.convert_quat( self.sim.data.body_xquat[self.obj_body_id[obj_str]], to="xyzw") di["{}_pos".format(obj_str)] = obj_pos di["{}_quat".format(obj_str)] = obj_quat object_pose = T.pose2mat((obj_pos, obj_quat)) rel_pose = T.pose_in_A_to_pose_in_B(object_pose, world_pose_in_gripper) rel_pos, rel_quat = T.mat2pose(rel_pose) di["{}_to_{}eef_pos".format(obj_str, pr)] = rel_pos di["{}_to_{}eef_quat".format(obj_str, pr)] = rel_quat object_state_keys.append("{}_pos".format(obj_str)) object_state_keys.append("{}_quat".format(obj_str)) object_state_keys.append("{}_to_{}eef_pos".format(obj_str, pr)) object_state_keys.append("{}_to_{}eef_quat".format( obj_str, pr)) if self.single_object_mode == 1: # zero out other objs for obj_str, obj_mjcf in self.mujoco_objects.items(): if obj_str == self.obj_to_use: continue else: di["{}_pos".format(obj_str)] *= 0.0 di["{}_quat".format(obj_str)] *= 0.0 di["{}_to_{}eef_pos".format(obj_str, pr)] *= 0.0 di["{}_to_{}eef_quat".format(obj_str, pr)] *= 0.0 di["object-state"] = np.concatenate( [di[k] for k in object_state_keys]) return di def _check_success(self): """ Check if all nuts have been successfully placed around their corresponding pegs. Returns: bool: True if all nuts are placed correctly """ # remember objects that are on the correct pegs gripper_site_pos = self.sim.data.site_xpos[self.robots[0].eef_site_id] for i in range(len(self.ob_inits)): obj_str = str(self.item_names[i]) + "0" obj_pos = self.sim.data.body_xpos[self.obj_body_id[obj_str]] dist = np.linalg.norm(gripper_site_pos - obj_pos) r_reach = 1 - np.tanh(10.0 * dist) self.objects_on_pegs[i] = int( self.on_peg(obj_pos, i) and r_reach < 0.6) if self.single_object_mode > 0: return np.sum(self.objects_on_pegs) > 0 # need one object on peg # returns True if all objects are on correct pegs return np.sum(self.objects_on_pegs) == len(self.ob_inits) def _visualization(self): """ Do any needed visualization here. Overrides superclass implementations. """ # color the gripper site appropriately based on distance to cube if self.robots[0].gripper_visualization: # find closest object square_dist = lambda x: np.sum( np.square(x - self.sim.data.get_site_xpos(self.robots[ 0].gripper.visualization_sites["grip_site"]))) dists = np.array(list(map(square_dist, self.sim.data.site_xpos))) dists[ self.robots[0]. eef_site_id] = np.inf # make sure we don't pick the same site dists[self.robots[0].eef_cylinder_id] = np.inf ob_dists = dists[ self.object_site_ids] # filter out object sites we care about min_dist = np.min(ob_dists) ob_id = np.argmin(ob_dists) # set RGBA for the EEF site here max_dist = 0.1 scaled = (1.0 - min(min_dist / max_dist, 1.))**15 rgba = np.zeros(4) rgba[0] = 1 - scaled rgba[1] = scaled rgba[3] = 0.5 self.sim.model.site_rgba[self.robots[0].eef_site_id] = rgba def _check_robot_configuration(self, robots): """ Sanity check to make sure the inputted robots and configuration is acceptable Args: robots (str or list of str): Robots to instantiate within this env """ if type(robots) is list: assert len( robots ) == 1, "Error: Only one robot should be inputted for this task!"