class PandaLift(PandaEnv): """ This class corresponds to the lifting task for the Panda robot arm. """ def __init__( self, gripper_type="PandaGripper", table_full_size=(0.8, 0.8, 0.8), table_friction=(1., 5e-3, 1e-4), use_camera_obs=True, use_object_obs=True, reward_shaping=False, placement_initializer=None, gripper_visualization=False, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, camera_name="frontview", camera_height=256, camera_width=256, camera_depth=False, ): """ Args: gripper_type (str): type of gripper, used to instantiate gripper models from gripper factory. table_full_size (3-tuple): x, y, and z dimensions of the table. table_friction (3-tuple): the three mujoco friction parameters for the table. use_camera_obs (bool): if True, every observation includes a rendered image. use_object_obs (bool): if True, include object (cube) information in the observation. reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. gripper_visualization (bool): True if using gripper visualization. Useful for teleoperation. use_indicator_object (bool): if True, sets up an indicator object that is useful for debugging. has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering. render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). camera_name (str): name of camera to be rendered. Must be set if @use_camera_obs is True. camera_height (int): height of camera frame. camera_width (int): width of camera frame. camera_depth (bool): True if rendering RGB-D, and RGB otherwise. """ # settings for table top self.table_full_size = table_full_size self.table_friction = table_friction # whether to use ground-truth object states self.use_object_obs = use_object_obs # reward configuration self.reward_shaping = reward_shaping # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: self.placement_initializer = UniformRandomSampler( x_range=[-0.03, 0.03], y_range=[-0.03, 0.03], ensure_object_boundary_in_range=False, z_rotation=True, ) super().__init__( gripper_type=gripper_type, gripper_visualization=gripper_visualization, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, use_camera_obs=use_camera_obs, camera_name=camera_name, camera_height=camera_height, camera_width=camera_width, camera_depth=camera_depth, ) def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() self.mujoco_robot.set_base_xpos([0, 0, 0]) # load model for table top workspace self.mujoco_arena = TableArena(table_full_size=self.table_full_size, table_friction=self.table_friction) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # The panda robot has a pedestal, we want to align it with the table self.mujoco_arena.set_origin( [0.16 + self.table_full_size[0] / 2, 0, 0]) # initialize objects of interest cube = BoxObject( size_min=[0.020, 0.020, 0.020], # [0.015, 0.015, 0.015], size_max=[0.022, 0.022, 0.022], # [0.018, 0.018, 0.018]) rgba=[1, 0, 0, 1], ) self.mujoco_objects = OrderedDict([("cube", cube)]) # task includes arena, robot, and objects of interest self.model = TableTopTask( self.mujoco_arena, self.mujoco_robot, self.mujoco_objects, initializer=self.placement_initializer, ) self.model.place_objects() def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() self.cube_body_id = self.sim.model.body_name2id("cube") self.l_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.left_finger_geoms ] self.r_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.right_finger_geoms ] self.cube_geom_id = self.sim.model.geom_name2id("cube") def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() # reset positions of objects self.model.place_objects() # reset joint positions init_pos = self.mujoco_robot.init_qpos init_pos += np.random.randn(init_pos.shape[0]) * 0.02 self.sim.data.qpos[self._ref_joint_pos_indexes] = np.array(init_pos) def reward(self, action=None): """ Reward function for the task. The dense reward has three components. Reaching: in [0, 1], to encourage the arm to reach the cube Grasping: in {0, 0.25}, non-zero if arm is grasping the cube Lifting: in {0, 1}, non-zero if arm has lifted the cube The sparse reward only consists of the lifting component. Args: action (np array): unused for this task Returns: reward (float): the reward """ reward = 0. # sparse completion reward if self._check_success(): reward = 1.0 # use a shaping reward if self.reward_shaping: # reaching reward cube_pos = self.sim.data.body_xpos[self.cube_body_id] gripper_site_pos = self.sim.data.site_xpos[self.eef_site_id] dist = np.linalg.norm(gripper_site_pos - cube_pos) reaching_reward = 1 - np.tanh(10.0 * dist) reward += reaching_reward # grasping reward touch_left_finger = False touch_right_finger = False for i in range(self.sim.data.ncon): c = self.sim.data.contact[i] if c.geom1 in self.l_finger_geom_ids and c.geom2 == self.cube_geom_id: touch_left_finger = True if c.geom1 == self.cube_geom_id and c.geom2 in self.l_finger_geom_ids: touch_left_finger = True if c.geom1 in self.r_finger_geom_ids and c.geom2 == self.cube_geom_id: touch_right_finger = True if c.geom1 == self.cube_geom_id and c.geom2 in self.r_finger_geom_ids: touch_right_finger = True if touch_left_finger and touch_right_finger: reward += 0.25 return reward def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: robot-state: contains robot-centric information. object-state: requires @self.use_object_obs to be True. contains object-centric information. image: requires @self.use_camera_obs to be True. contains a rendered frame from the simulation. depth: requires @self.use_camera_obs and @self.camera_depth to be True. contains a rendered depth map from the simulation """ di = super()._get_observation() # camera observations if self.use_camera_obs: camera_obs = self.sim.render( camera_name=self.camera_name, width=self.camera_width, height=self.camera_height, depth=self.camera_depth, ) if self.camera_depth: di["image"], di["depth"] = camera_obs else: di["image"] = camera_obs # low-level object information if self.use_object_obs: # position and rotation of object cube_pos = np.array(self.sim.data.body_xpos[self.cube_body_id]) cube_quat = convert_quat(np.array( self.sim.data.body_xquat[self.cube_body_id]), to="xyzw") di["cube_pos"] = cube_pos di["cube_quat"] = cube_quat gripper_site_pos = np.array( self.sim.data.site_xpos[self.eef_site_id]) di["gripper_to_cube"] = gripper_site_pos - cube_pos di["object-state"] = np.concatenate( [cube_pos, cube_quat, di["gripper_to_cube"]]) return di def _check_contact(self): """ Returns True if gripper is in contact with an object. """ collision = False for contact in self.sim.data.contact[:self.sim.data.ncon]: if (self.sim.model.geom_id2name( contact.geom1) in self.gripper.contact_geoms() or self.sim.model.geom_id2name( contact.geom2) in self.gripper.contact_geoms()): collision = True break return collision def _check_success(self): """ Returns True if task has been completed. """ cube_height = self.sim.data.body_xpos[self.cube_body_id][2] table_height = self.table_full_size[2] # cube is higher than the table top above a margin return cube_height > table_height + 0.04 def _gripper_visualization(self): """ Do any needed visualization here. Overrides superclass implementations. """ # color the gripper site appropriately based on distance to cube if self.gripper_visualization: # get distance to cube cube_site_id = self.sim.model.site_name2id("cube") dist = np.sum( np.square(self.sim.data.site_xpos[cube_site_id] - self.sim.data.get_site_xpos("grip_site"))) # set RGBA for the EEF site here max_dist = 0.1 scaled = (1.0 - min(dist / max_dist, 1.))**15 rgba = np.zeros(4) rgba[0] = 1 - scaled rgba[1] = scaled rgba[3] = 0.5 self.sim.model.site_rgba[self.eef_site_id] = rgba
class PandaPush(change_dof(PandaEnv, 7, 8)): # don't need to control a gripper """ This class corresponds to the pushing task for the Panda robot arm. """ parameters_spec = { **PandaEnv.parameters_spec, 'table_size_0': [0.7, 0.9], 'table_size_1': [0.7, 0.9], 'table_size_2': [0.7, 0.9], #'table_friction_0': [0.4, 1.6], 'table_friction_1': [0.0025, 0.0075], 'table_friction_2': [0.00005, 0.00015], 'boxobject_size_0': [0.018, 0.022], 'boxobject_size_1': [0.018, 0.022], 'boxobject_size_2': [0.018, 0.022], 'boxobject_friction_0': [0.05, 0.15], #'boxobject_friction_1': [0.0025, 0.0075], # fixed this to zero 'boxobject_friction_2': [0.00005, 0.00015], 'boxobject_density_1000': [0.06, 0.14], } def reset_props(self, table_size_0=0.8, table_size_1=0.8, table_size_2=0.8, table_friction_0=0., table_friction_1=0.005, table_friction_2=0.0001, boxobject_size_0=0.020, boxobject_size_1=0.020, boxobject_size_2=0.020, boxobject_friction_0=0.1, boxobject_friction_1=0.0, boxobject_friction_2=0.0001, boxobject_density_1000=0.1, **kwargs): self.table_full_size = (table_size_0, table_size_1, table_size_2) self.table_friction = (table_friction_0, table_friction_1, table_friction_2) self.boxobject_size = (boxobject_size_0, boxobject_size_1, boxobject_size_2) self.boxobject_friction = (boxobject_friction_0, boxobject_friction_1, boxobject_friction_2) self.boxobject_density = boxobject_density_1000 * 1000. super().reset_props(**kwargs) def __init__(self, use_object_obs=True, reward_shaping=True, placement_initializer=None, object_obs_process=True, **kwargs): """ Args: use_object_obs (bool): if True, include object (cube) information in the observation. reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. object_obs_process (bool): if True, process the object observation to get a task_state. Setting this to False is useful when some transformation (eg. noise) need to be done to object observation raw data prior to the processing. """ # whether to use ground-truth object states self.use_object_obs = use_object_obs # reward configuration self.reward_shaping = reward_shaping # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: # self.placement_initializer = UniformRandomSampler( # x_range=[-0.1, 0.1], # y_range=[-0.1, 0.1], # ensure_object_boundary_in_range=False, # z_rotation=None, # ) self.placement_initializer = UniformRandomSamplerObjectSpecific( x_ranges=[[-0.03, -0.02], [0.09, 0.1]], y_ranges=[[-0.05, -0.04], [-0.05, -0.04]], ensure_object_boundary_in_range=False, z_rotation=None, ) # for first initialization self.table_full_size = (0.8, 0.8, 0.8) self.table_friction = (0., 0.005, 0.0001) self.boxobject_size = (0.02, 0.02, 0.02) self.boxobject_friction = (0.1, 0.005, 0.0001) self.boxobject_density = 100. self.object_obs_process = object_obs_process super().__init__(gripper_visualization=True, **kwargs) def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() self.mujoco_robot.set_base_xpos([0, 0, 0]) # load model for table top workspace self.mujoco_arena = TableArena(table_full_size=self.table_full_size, table_friction=self.table_friction) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # The panda robot has a pedestal, we want to align it with the table self.mujoco_arena.set_origin( [0.16 + self.table_full_size[0] / 2, 0, 0]) # initialize objects of interest # in original robosuite, a simple domain randomization is included in BoxObject implementation, and called here. We choose to discard that implementation. cube = FullyFrictionalBoxObject( size=self.boxobject_size, friction=self.boxobject_friction, density=self.boxobject_density, rgba=[1, 0, 0, 1], ) self.mujoco_cube = cube goal = CylinderObject( size=[0.03, 0.001], rgba=[0, 1, 0, 1], ) self.mujoco_goal = goal self.mujoco_objects = OrderedDict([("cube", cube), ("goal", goal)]) # task includes arena, robot, and objects of interest self.model = TableTopTask( self.mujoco_arena, self.mujoco_robot, self.mujoco_objects, initializer=self.placement_initializer, visual_objects=['goal'], ) self.model.place_objects() def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() self.cube_body_id = self.sim.model.body_name2id("cube") self.l_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.left_finger_geoms ] self.r_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.right_finger_geoms ] self.cube_geom_id = self.sim.model.geom_name2id("cube") # gripper ids self.goal_body_id = self.sim.model.body_name2id('goal') self.goal_site_id = self.sim.model.site_name2id('goal') def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() self.sim.forward() # reset positions of objects self.model.place_objects() # reset joint positions init_pos = self.mujoco_robot.init_qpos init_pos += np.random.randn(init_pos.shape[0]) * 0.02 self.sim.data.qpos[self._ref_joint_pos_indexes] = np.array(init_pos) # shut the gripper self.sim.data.qpos[ self._ref_joint_gripper_actuator_indexes] = np.array([0., -0.]) # set other reference attributes eef_rot_in_world = self.sim.data.get_body_xmat("right_hand").reshape( (3, 3)) self.world_rot_in_eef = copy.deepcopy( eef_rot_in_world.T ) # TODO inspect on this: should we set a golden reference other than a initial position? # reward function from sawyer_push def reward(self, action=None): """ Reward function for the task. The dense reward has three components. Reaching: in [-inf, 0], to encourage the arm to reach the object Goal Distance: in [-inf, 0] the distance between the pushed object and the goal Safety reward in [-inf, 0], -1 for every joint that is at its limit. The sparse reward only receives a {0,1} upon reaching the goal Args: action (np array): The action taken in that timestep Returns: reward (float): the reward previously in robosuite-extra, when dense reward is used, the return value will be a dictionary. but we removed that feature. """ reward = 0. # sparse completion reward if not self.reward_shaping and self._check_success(): reward = 1.0 # use a dense reward if self.reward_shaping: object_pos = self.sim.data.body_xpos[self.cube_body_id] # max joint angles reward joint_limits = self._joint_ranges current_joint_pos = self._joint_positions hitting_limits_reward = -int( any([(x < joint_limits[i, 0] + 0.05 or x > joint_limits[i, 1] - 0.05) for i, x in enumerate(current_joint_pos)])) reward += hitting_limits_reward # reaching reward gripper_site_pos = self.sim.data.site_xpos[self.eef_site_id] dist = np.linalg.norm(gripper_site_pos - object_pos) reaching_reward = -0.15 * dist reward += reaching_reward # print(gripper_site_pos, object_pos, reaching_reward) # Success Reward success = self._check_success() if (success): reward += 0.1 # goal distance reward goal_pos = self.sim.data.site_xpos[self.goal_site_id] dist = np.linalg.norm(goal_pos - object_pos) goal_distance_reward = -dist reward += goal_distance_reward # punish when there is a line of object--gripper--goal angle_g_o_g = angle_between(gripper_site_pos - object_pos, goal_pos - object_pos) if not success and angle_g_o_g < np.pi / 2.: reward += -0.03 - 0.02 * (np.pi / 2. - angle_g_o_g) # print('grippersitepos', gripper_site_pos, # 'objpos', object_pos, # 'jointangles', hitting_limits_reward, # 'reaching', reaching_reward, # 'success', success, # 'goaldist', goal_distance_reward) unstable = reward < -2.5 # # Return all three types of rewards # reward = {"reward": reward, "reaching_distance": -10 * reaching_reward, # "goal_distance": - goal_distance_reward, # "hitting_limits_reward": hitting_limits_reward, # "unstable":unstable} return reward def _check_success(self): """ Returns True if task has been completed. """ object_pos = self.sim.data.body_xpos[self.cube_body_id] goal_pos = self.sim.data.site_xpos[self.goal_site_id] dist = np.linalg.norm(goal_pos - object_pos) goal_horizontal_radius = self.model.mujoco_objects[ 'goal'].get_horizontal_radius() # object centre is within the goal radius return dist < goal_horizontal_radius def step(self, action): """ explicitly shut the gripper """ joined_action = np.append(action, [1.]) return super().step(joined_action) def world2eef(self, world): return self.world_rot_in_eef.dot(world) def put_raw_object_obs(self, di): # Extract position and velocity of the eef eef_pos_in_world = self.sim.data.get_body_xpos("right_hand") eef_xvelp_in_world = self.sim.data.get_body_xvelp("right_hand") # print('eef_pos_in_world', eef_pos_in_world) # Get the position, velocity, rotation and rotational velocity of the object in the world frame object_pos_in_world = self.sim.data.body_xpos[self.cube_body_id] object_xvelp_in_world = self.sim.data.get_body_xvelp('cube') object_rot_in_world = self.sim.data.get_body_xmat('cube') # Get the z-angle with respect to the reference position and do sin-cosine encoding # world_rotation_in_reference = np.array([[0., 1., 0., ], [-1., 0., 0., ], [0., 0., 1., ]]) # object_rotation_in_ref = world_rotation_in_reference.dot(object_rot_in_world) # object_euler_in_ref = T.mat2euler(object_rotation_in_ref) # z_angle = object_euler_in_ref[2] object_quat = convert_quat(self.sim.data.body_xquat[self.cube_body_id], to='xyzw') # Get the goal position in the world goal_site_pos_in_world = np.array( self.sim.data.site_xpos[self.goal_site_id]) # Record observations into a dictionary di['goal_pos_in_world'] = goal_site_pos_in_world di['eef_pos_in_world'] = eef_pos_in_world di['eef_vel_in_world'] = eef_xvelp_in_world di['object_pos_in_world'] = object_pos_in_world di['object_vel_in_world'] = object_xvelp_in_world # di["z_angle"] = np.array([z_angle]) di['object_quat'] = object_quat def process_object_obs(self, di): # z_angle = di['z_angle'] # sine_cosine = np.array([np.sin(8*z_angle), np.cos(8*z_angle)]).reshape((2,)) eef_to_object_in_world = di['object_pos_in_world'] - di[ 'eef_pos_in_world'] # eef_to_object_in_eef = self.world2eef(eef_to_object_in_world) object_to_goal_in_world = di['goal_pos_in_world'] - di[ 'object_pos_in_world'] # object_to_goal_in_eef = self.world2eef(object_to_goal_in_world) # object_xvelp_in_eef = self.world2eef(di['object_vel_in_world']) # eef_xvelp_in_eef = self.world2eef(di['eef_vel_in_world']) task_state = np.concatenate([ eef_to_object_in_world, object_to_goal_in_world, di['eef_vel_in_world'], di['object_vel_in_world'], di['object_quat'] ]) di['task_state'] = task_state def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: gripper_to_object : The x-y component of the gripper to object distance object_to_goal : The x-y component of the object-to-goal distance object_z_rot : the roation of the object around an axis sticking out the table object_xvelp: x-y linear velocity of the object gripper_xvelp: x-y linear velocity of the gripper task-state : a concatenation of all the above. """ # di = super()._get_observation() # joint angles & vel, which we don't need. di = OrderedDict() # camera observations if self.use_camera_obs: camera_obs = self.sim.render( camera_name=self.camera_name, width=self.camera_width, height=self.camera_height, depth=self.camera_depth, ) if self.camera_depth: di["image"], di["depth"] = camera_obs else: di["image"] = camera_obs # low-level object information if self.use_object_obs: self.put_raw_object_obs(di) if self.object_obs_process: self.process_object_obs(di) return di def _check_contact(self): """ Returns True if gripper is in contact with an object. """ collision = False for contact in self.sim.data.contact[:self.sim.data.ncon]: if (self.sim.model.geom_id2name( contact.geom1) in self.gripper.contact_geoms() or self.sim.model.geom_id2name( contact.geom2) in self.gripper.contact_geoms()): collision = True break return collision def _check_contact_with(self, object): """ Returns True if gripper is in contact with an object. """ collision = False for contact in self.sim.data.contact[:self.sim.data.ncon]: if ((self.sim.model.geom_id2name( contact.geom1) in self.gripper.contact_geoms() and contact.geom2 == self.sim.model.geom_name2id(object)) or (self.sim.model.geom_id2name( contact.geom2) in self.gripper.contact_geoms() and contact.geom1 == self.sim.model.geom_name2id(object))): collision = True break return collision def _gripper_visualization(self): """ Do any needed visualization here. Overrides superclass implementations. """ # color the gripper site appropriately based on distance to cube if self.gripper_visualization: # get distance to cube cube_site_id = self.sim.model.site_name2id("cube") dist = np.sum( np.square(self.sim.data.site_xpos[cube_site_id] - self.sim.data.get_site_xpos("grip_site"))) # set RGBA for the EEF site here max_dist = 0.1 scaled = (1.0 - min(dist / max_dist, 1.))**15 rgba = np.zeros(4) rgba[0] = 1 - scaled rgba[1] = scaled rgba[3] = 0.5 self.sim.model.site_rgba[self.eef_site_id] = rgba
class PandaStack(PandaEnv): """ This class corresponds to the stacking task for the Panda robot arm. """ def __init__( self, gripper_type="PandaGripper", table_full_size=(0.8, 0.8, 0.8), table_friction=(1., 5e-3, 1e-4), use_camera_obs=True, use_object_obs=True, reward_shaping=False, placement_initializer=None, gripper_visualization=False, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, camera_name="frontview", camera_height=256, camera_width=256, camera_depth=False, ): """ Args: gripper_type (str): type of gripper, used to instantiate gripper models from gripper factory. table_full_size (3-tuple): x, y, and z dimensions of the table. table_friction (3-tuple): the three mujoco friction parameters for the table. use_camera_obs (bool): if True, every observation includes a rendered image. use_object_obs (bool): if True, include object (cube) information in the observation. reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. gripper_visualization (bool): True if using gripper visualization. Useful for teleoperation. use_indicator_object (bool): if True, sets up an indicator object that is useful for debugging. has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering. render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). camera_name (str): name of camera to be rendered. Must be set if @use_camera_obs is True. camera_height (int): height of camera frame. camera_width (int): width of camera frame. camera_depth (bool): True if rendering RGB-D, and RGB otherwise. """ # settings for table top self.table_full_size = table_full_size self.table_friction = table_friction # whether to show visual aid about where is the gripper self.gripper_visualization = gripper_visualization # whether to use ground-truth object states self.use_object_obs = use_object_obs # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: self.placement_initializer = UniformRandomSampler( x_range=[-0.08, 0.08], y_range=[-0.08, 0.08], ensure_object_boundary_in_range=False, z_rotation=True, ) super().__init__( gripper_type=gripper_type, gripper_visualization=gripper_visualization, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, use_camera_obs=use_camera_obs, camera_name=camera_name, camera_height=camera_height, camera_width=camera_width, camera_depth=camera_depth, ) # reward configuration self.reward_shaping = reward_shaping # information of objects # self.object_names = [o['object_name'] for o in self.object_metadata] self.object_names = list(self.mujoco_objects.keys()) self.object_site_ids = [ self.sim.model.site_name2id(ob_name) for ob_name in self.object_names ] # id of grippers for contact checking self.finger_names = self.gripper.contact_geoms() # self.sim.data.contact # list, geom1, geom2 self.collision_check_geom_names = self.sim.model._geom_name2id.keys() self.collision_check_geom_ids = [ self.sim.model._geom_name2id[k] for k in self.collision_check_geom_names ] def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() self.mujoco_robot.set_base_xpos([0, 0, 0]) # load model for table top workspace self.mujoco_arena = TableArena(table_full_size=self.table_full_size, table_friction=self.table_friction) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # The panda robot has a pedestal, we want to align it with the table self.mujoco_arena.set_origin( [0.16 + self.table_full_size[0] / 2, 0, 0]) # initialize objects of interest cubeA = BoxObject(size_min=[0.02, 0.02, 0.02], size_max=[0.02, 0.02, 0.02], rgba=[1, 0, 0, 1]) cubeB = BoxObject( size_min=[0.025, 0.025, 0.025], size_max=[0.025, 0.025, 0.025], rgba=[0, 1, 0, 1], ) self.mujoco_objects = OrderedDict([("cubeA", cubeA), ("cubeB", cubeB)]) self.n_objects = len(self.mujoco_objects) # task includes arena, robot, and objects of interest self.model = TableTopTask( self.mujoco_arena, self.mujoco_robot, self.mujoco_objects, initializer=self.placement_initializer, ) self.model.place_objects() def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() self.cubeA_body_id = self.sim.model.body_name2id("cubeA") self.cubeB_body_id = self.sim.model.body_name2id("cubeB") self.l_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.left_finger_geoms ] self.r_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.right_finger_geoms ] self.cubeA_geom_id = self.sim.model.geom_name2id("cubeA") self.cubeB_geom_id = self.sim.model.geom_name2id("cubeB") def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() # reset positions of objects self.model.place_objects() # reset joint positions init_pos = self.mujoco_robot.init_qpos init_pos += np.random.randn(init_pos.shape[0]) * 0.02 self.sim.data.qpos[self._ref_joint_pos_indexes] = np.array(init_pos) def reward(self, action): """ Reward function for the task. The dense reward has five components. Reaching: in [0, 1], to encourage the arm to reach the cube Grasping: in {0, 0.25}, non-zero if arm is grasping the cube Lifting: in {0, 1}, non-zero if arm has lifted the cube Aligning: in [0, 0.5], encourages aligning one cube over the other Stacking: in {0, 2}, non-zero if cube is stacked on other cube The sparse reward only consists of the stacking component. However, the sparse reward is either 0 or 1. Args: action (np array): unused for this task Returns: reward (float): the reward """ r_reach, r_lift, r_stack = self.staged_rewards() if self.reward_shaping: reward = max(r_reach, r_lift, r_stack) else: reward = 1.0 if r_stack > 0 else 0.0 return reward def staged_rewards(self): """ Helper function to return staged rewards based on current physical states. Returns: r_reach (float): reward for reaching and grasping r_lift (float): reward for lifting and aligning r_stack (float): reward for stacking """ # reaching is successful when the gripper site is close to # the center of the cube cubeA_pos = self.sim.data.body_xpos[self.cubeA_body_id] cubeB_pos = self.sim.data.body_xpos[self.cubeB_body_id] gripper_site_pos = self.sim.data.site_xpos[self.eef_site_id] dist = np.linalg.norm(gripper_site_pos - cubeA_pos) r_reach = (1 - np.tanh(10.0 * dist)) * 0.25 # collision checking touch_left_finger = False touch_right_finger = False touch_cubeA_cubeB = False for i in range(self.sim.data.ncon): c = self.sim.data.contact[i] if c.geom1 in self.l_finger_geom_ids and c.geom2 == self.cubeA_geom_id: touch_left_finger = True if c.geom1 == self.cubeA_geom_id and c.geom2 in self.l_finger_geom_ids: touch_left_finger = True if c.geom1 in self.r_finger_geom_ids and c.geom2 == self.cubeA_geom_id: touch_right_finger = True if c.geom1 == self.cubeA_geom_id and c.geom2 in self.r_finger_geom_ids: touch_right_finger = True if c.geom1 == self.cubeA_geom_id and c.geom2 == self.cubeB_geom_id: touch_cubeA_cubeB = True if c.geom1 == self.cubeB_geom_id and c.geom2 == self.cubeA_geom_id: touch_cubeA_cubeB = True # additional grasping reward if touch_left_finger and touch_right_finger: r_reach += 0.25 # lifting is successful when the cube is above the table top # by a margin cubeA_height = cubeA_pos[2] table_height = self.table_full_size[2] cubeA_lifted = cubeA_height > table_height + 0.04 r_lift = 1.0 if cubeA_lifted else 0.0 # Aligning is successful when cubeA is right above cubeB if cubeA_lifted: horiz_dist = np.linalg.norm( np.array(cubeA_pos[:2]) - np.array(cubeB_pos[:2])) r_lift += 0.5 * (1 - np.tanh(horiz_dist)) # stacking is successful when the block is lifted and # the gripper is not holding the object r_stack = 0 not_touching = not touch_left_finger and not touch_right_finger if not_touching and r_lift > 0 and touch_cubeA_cubeB: r_stack = 2.0 return (r_reach, r_lift, r_stack) def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: robot-state: contains robot-centric information. object-state: requires @self.use_object_obs to be True. contains object-centric information. image: requires @self.use_camera_obs to be True. contains a rendered frame from the simulation. depth: requires @self.use_camera_obs and @self.camera_depth to be True. contains a rendered depth map from the simulation """ di = super()._get_observation() if self.use_camera_obs: camera_obs = self.sim.render( camera_name=self.camera_name, width=self.camera_width, height=self.camera_height, depth=self.camera_depth, ) if self.camera_depth: di["image"], di["depth"] = camera_obs else: di["image"] = camera_obs # low-level object information if self.use_object_obs: # position and rotation of the first cube cubeA_pos = np.array(self.sim.data.body_xpos[self.cubeA_body_id]) cubeA_quat = convert_quat(np.array( self.sim.data.body_xquat[self.cubeA_body_id]), to="xyzw") di["cubeA_pos"] = cubeA_pos di["cubeA_quat"] = cubeA_quat # position and rotation of the second cube cubeB_pos = np.array(self.sim.data.body_xpos[self.cubeB_body_id]) cubeB_quat = convert_quat(np.array( self.sim.data.body_xquat[self.cubeB_body_id]), to="xyzw") di["cubeB_pos"] = cubeB_pos di["cubeB_quat"] = cubeB_quat # relative positions between gripper and cubes gripper_site_pos = np.array( self.sim.data.site_xpos[self.eef_site_id]) di["gripper_to_cubeA"] = gripper_site_pos - cubeA_pos di["gripper_to_cubeB"] = gripper_site_pos - cubeB_pos di["cubeA_to_cubeB"] = cubeA_pos - cubeB_pos di["object-state"] = np.concatenate([ cubeA_pos, cubeA_quat, cubeB_pos, cubeB_quat, di["gripper_to_cubeA"], di["gripper_to_cubeB"], di["cubeA_to_cubeB"], ]) return di def _check_contact(self): """ Returns True if gripper is in contact with an object. """ collision = False for contact in self.sim.data.contact[:self.sim.data.ncon]: if (self.sim.model.geom_id2name(contact.geom1) in self.finger_names or self.sim.model.geom_id2name( contact.geom2) in self.finger_names): collision = True break return collision def _check_success(self): """ Returns True if task has been completed. """ _, _, r_stack = self.staged_rewards() return r_stack > 0 def _gripper_visualization(self): """ Do any needed visualization here. Overrides superclass implementations. """ # color the gripper site appropriately based on distance to nearest object if self.gripper_visualization: # find closest object square_dist = lambda x: np.sum( np.square(x - self.sim.data.get_site_xpos("grip_site"))) dists = np.array(list(map(square_dist, self.sim.data.site_xpos))) dists[ self. eef_site_id] = np.inf # make sure we don't pick the same site dists[self.eef_cylinder_id] = np.inf ob_dists = dists[ self.object_site_ids] # filter out object sites we care about min_dist = np.min(ob_dists) # set RGBA for the EEF site here max_dist = 0.1 scaled = (1.0 - min(min_dist / max_dist, 1.))**15 rgba = np.zeros(4) rgba[0] = 1 - scaled rgba[1] = scaled rgba[3] = 0.5 self.sim.model.site_rgba[self.eef_site_id] = rgba
class SawyerReach(SawyerEnv): """ This class corresponds to the a primitive policy for the reach task on the Sawyer robot arm. Uniform random sample on x, y, or z axis in range of 20 to 60 cm for x and y, 0 to 40 cm for z (center of table) """ def __init__( self, gripper_type="TwoFingerGripper", lower_goal_range=[-0.1, -0.1, -0.1], upper_goal_range=[0.1, 0.1, 0.2], seed=False, random_arm_init=None, # please pass in [low, high] table_full_size=(0.8, 0.8, 0.6), table_friction=(1., 5e-3, 1e-4), use_camera_obs=False, use_object_obs=True, reward_shaping=True, placement_initializer=None, gripper_visualization=False, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, camera_name="frontview", camera_height=256, camera_width=256, camera_depth=False, ): """ Args: prim_axis (str): which axis is being explored gripper_type (str): type of gripper, used to instantiate gripper models from gripper factory. table_full_size (3-tuple): x, y, and z dimensions of the table. table_friction (3-tuple): the three mujoco friction parameters for the table. use_camera_obs (bool): if True, every observation includes a rendered image. use_object_obs (bool): if True, include object (cube) information in the observation. reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. gripper_visualization (bool): True if using gripper visualization. Useful for teleoperation. use_indicator_object (bool): if True, sets up an indicator object that is useful for debugging. has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering. render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). camera_name (str): name of camera to be rendered. Must be set if @use_camera_obs is True. camera_height (int): height of camera frame. camera_width (int): width of camera frame. camera_depth (bool): True if rendering RGB-D, and RGB otherwise. """ self.upper_goal_range = upper_goal_range self.lower_goal_range = lower_goal_range self.random_arm_init = random_arm_init self.seed = seed self.distance_threshold = 0.015 # settings for table top self.table_full_size = table_full_size self.table_friction = table_friction # whether to use ground-truth object states self.use_object_obs = use_object_obs # reward configuration self.reward_shaping = reward_shaping # object placement initializer self.placement_initializer = placement_initializer # order: di["eef_pos"] di["gripper_qpos"] di["gripper_to_marker"] self.goal # dim: 3, 2, 3, 3 # low = -np.ones(11) * np.inf # high = np.ones(11) * np.inf # self.observation_space = spaces.Box(low=low, high=high) self.controller = SawyerIKController( bullet_data_path=os.path.join(robosuite.models.assets_root, "bullet_data"), robot_jpos_getter=self._robot_jpos_getter, ) super().__init__( gripper_type=gripper_type, gripper_visualization=gripper_visualization, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, use_camera_obs=use_camera_obs, camera_name=camera_name, camera_height=camera_height, camera_width=camera_width, camera_depth=camera_depth, ) def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() self.mujoco_robot.set_base_xpos([0, 0, 0]) # load model for table top workspace self.mujoco_arena = TableArena(table_full_size=self.table_full_size, table_friction=self.table_friction) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # The sawyer robot has a pedestal, we want to align it with the table self.mujoco_arena.set_origin( [0.16 + self.table_full_size[0] / 2, 0, 0]) self.mujoco_objects = OrderedDict([]) # task includes arena, robot, and objects of interest self.model = TableTopTask( self.mujoco_arena, self.mujoco_robot, self.mujoco_objects, initializer=self.placement_initializer, ) self.model.place_objects() def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() # self.cube_body_id = self.sim.model.body_name2id("cube") self.l_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.left_finger_geoms ] self.r_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.right_finger_geoms ] # self.cube_geom_id = self.sim.model.geom_name2id("cube") def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() # reset positions of objects self.model.place_objects() if self.random_arm_init is not None: if self.seed: constant_quat = np.array( [-0.01704371, -0.99972409, 0.00199679, -0.01603944]) target_position = np.array([ 0.5 + np.random.uniform(0.0, 0.0), np.random.uniform(0.0, 0.0), # self.table_full_size[2] + 0.15211762]) 0.8 + 0.20211762 ]) else: # random initialization of arm constant_quat = np.array( [-0.01704371, -0.99972409, 0.00199679, -0.01603944]) target_position = np.array([ 0.5 + np.random.uniform(self.random_arm_init[0], self.random_arm_init[1]), np.random.uniform(self.random_arm_init[0], self.random_arm_init[1]), self.table_full_size[2] + 0.20211762 ]) self.controller.sync_ik_robot(self._robot_jpos_getter(), simulate=True) joint_list = self.controller.inverse_kinematics( target_position, constant_quat) init_pos = np.array(joint_list) else: # default robosuite init init_pos = np.array( [-0.5538, -0.8208, 0.4155, 1.8409, -0.4955, 0.6482, 1.9628]) init_pos += np.random.randn(init_pos.shape[0]) * 0.02 self.sim.data.qpos[self._ref_joint_pos_indexes] = np.array(init_pos) def reset(self): self._destroy_viewer() self._reset_internal() self.sim.forward() # reset goal (marker) gripper_site_pos = np.array(self.sim.data.site_xpos[self.eef_site_id]) distance = np.random.uniform(self.lower_goal_range, self.upper_goal_range) while np.linalg.norm(distance) <= (self.distance_threshold * 1.75): distance = np.random.uniform(self.lower_goal_range, self.upper_goal_range) if self.seed: distance = np.array((0.05, -0.1, 0.1)) self.goal = gripper_site_pos + distance return self._get_observation() def _robot_jpos_getter(self): return np.array([0, -1.18, 0.00, 2.18, 0.00, 0.57, 3.3161]) def reward(self, action=None): """ Reward function for the task. The dense reward has one component. Reaching: in [0, 1], to encourage the arm to reach the marker The sparse reward only consists of the lifting component. Args: action (np array): unused for this task Returns: reward (float): the reward """ # velocity_pen = np.linalg(np.array( # [self.sim.data.qvel[x] for x in self._ref_joint_vel_indexes] # )) marker_pos = self.goal gripper_site_pos = self.sim.data.site_xpos[self.eef_site_id] return self.compute_reward(gripper_site_pos, marker_pos, None) # for goalenv wrapper def compute_reward(self, achieved_goal, desired_goal, info=None): d = np.linalg.norm(achieved_goal - desired_goal) if self.reward_shaping: # dense reward = 1 - np.tanh(10 * d) if d <= self.distance_threshold: reward = 10.0 else: # sparse (-1 or 0) #reward = -np.float32(d > distance_threshold) if d > self.distance_threshold: reward = -1.0 else: reward = 60.0 return reward # for goalenv wrapper # TODO check the desired_goal/achieved_goal for indexing def get_goalenv_dict(self, obs_dict): # using only object-state and robot-state ob_lst = [] di = {} for key in obs_dict: if key in ["robot-state", "object-state"]: ob_lst.append(obs_dict[key]) di['observation'] = np.concatenate(ob_lst) di['desired_goal'] = obs_dict['object-state'][0:3] di['achieved_goal'] = obs_dict['robot-state'][23:26] return di def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: robot-state: contains robot-centric information. object-state: requires @self.use_object_obs to be True. contains object-centric information. image: requires @self.use_camera_obs to be True. contains a rendered frame from the simulation. depth: requires @self.use_camera_obs and @self.camera_depth to be True. contains a rendered depth map from the simulation """ di = super()._get_observation() # camera observations if self.use_camera_obs: camera_obs = self.sim.render( camera_name=self.camera_name, width=self.camera_width, height=self.camera_height, depth=self.camera_depth, ) if self.camera_depth: di["image"], di["depth"] = camera_obs else: di["image"] = camera_obs # low-level object information if self.use_object_obs: gripper_site_pos = np.array( self.sim.data.site_xpos[self.eef_site_id]) di["gripper_to_marker"] = gripper_site_pos - self.goal di["object-state"] = np.concatenate( [di["gripper_to_marker"], self.goal]) return di def _gripper_visualization(self): """ Do any needed visualization here. Overrides superclass implementations. """ # color the gripper site appropriately based on distance to cube if self.gripper_visualization: # get distance to cube cube_site_id = self.sim.model.site_name2id("cube") dist = np.sum( np.square(self.sim.data.site_xpos[cube_site_id] - self.sim.data.get_site_xpos("grip_site"))) # set RGBA for the EEF site here max_dist = 0.1 scaled = (1.0 - min(dist / max_dist, 1.))**15 rgba = np.zeros(4) rgba[0] = 1 - scaled rgba[1] = scaled rgba[3] = 0.5 self.sim.model.site_rgba[self.eef_site_id] = rgba
class BaxterLift(BaxterEnv): """ This class corresponds to the bimanual lifting task for the Baxter robot. """ def __init__(self, gripper_type_right="TwoFingerGripper", gripper_type_left="LeftTwoFingerGripper", table_full_size=(0.8, 0.8, 0.8), table_friction=(1., 5e-3, 1e-4), use_object_obs=True, reward_shaping=True, **kwargs): """ Args: gripper_type_right (str): type of gripper used on the right hand. gripper_type_lefft (str): type of gripper used on the right hand. table_full_size (3-tuple): x, y, and z dimensions of the table. table_friction (3-tuple): the three mujoco friction parameters for the table. use_object_obs (bool): if True, include object (pot) information in the observation. reward_shaping (bool): if True, use dense rewards. Inherits the Baxter environment; refer to other parameters described there. """ # initialize the pot self.pot = PotWithHandlesObject() self.mujoco_objects = OrderedDict([("pot", self.pot)]) # settings for table top self.table_full_size = table_full_size self.table_friction = table_friction # whether to use ground-truth object states self.use_object_obs = use_object_obs # reward configuration self.reward_shaping = reward_shaping self.object_initializer = UniformRandomSampler( x_range=(-0.15, -0.04), y_range=(-0.015, 0.015), z_rotation=(-0.15 * np.pi, 0.15 * np.pi), ensure_object_boundary_in_range=False, ) super().__init__(gripper_left=gripper_type_left, gripper_right=gripper_type_right, **kwargs) def _load_model(self): """ Loads the arena and pot object. """ super()._load_model() self.mujoco_robot.set_base_xpos([0, 0, 0]) # load model for table top workspace self.mujoco_arena = TableArena(table_full_size=self.table_full_size, table_friction=self.table_friction) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # The sawyer robot has a pedestal, we want to align it with the table self.mujoco_arena.set_origin( [0.45 + self.table_full_size[0] / 2, 0, 0]) # task includes arena, robot, and objects of interest self.model = TableTopTask( self.mujoco_arena, self.mujoco_robot, self.mujoco_objects, self.object_initializer, ) self.model.place_objects() def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flattened array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() self.cube_body_id = self.sim.model.body_name2id("pot") self.handle_1_site_id = self.sim.model.site_name2id("pot_handle_1") self.handle_2_site_id = self.sim.model.site_name2id("pot_handle_2") self.table_top_id = self.sim.model.site_name2id("table_top") self.pot_center_id = self.sim.model.site_name2id("pot_center") def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() self.model.place_objects() def reward(self, action): """ Reward function for the task. 1. the agent only gets the lifting reward when flipping no more than 30 degrees. 2. the lifting reward is smoothed and ranged from 0 to 2, capped at 2.0. the initial lifting reward is 0 when the pot is on the table; the agent gets the maximum 2.0 reward when the pot’s height is above a threshold. 3. the reaching reward is 0.5 when the left gripper touches the left handle, or when the right gripper touches the right handle before the gripper geom touches the handle geom, and once it touches we use 0.5 """ reward = 0 cube_height = self.sim.data.site_xpos[ self.pot_center_id][2] - self.pot.get_top_offset()[2] table_height = self.sim.data.site_xpos[self.table_top_id][2] # check if the pot is tilted more than 30 degrees mat = T.quat2mat(self._pot_quat) z_unit = [0, 0, 1] z_rotated = np.matmul(mat, z_unit) cos_z = np.dot(z_unit, z_rotated) cos_30 = np.cos(np.pi / 6) direction_coef = 1 if cos_z >= cos_30 else 0 # cube is higher than the table top above a margin if cube_height > table_height + 0.15: reward = 1.0 * direction_coef # use a shaping reward if self.reward_shaping: reward = 0 # lifting reward elevation = cube_height - table_height r_lift = min(max(elevation - 0.05, 0), 0.2) reward += 10. * direction_coef * r_lift l_gripper_to_handle = self._l_gripper_to_handle r_gripper_to_handle = self._r_gripper_to_handle # gh stands for gripper-handle # When grippers are far away, tell them to be closer l_contacts = list( self.find_contacts(self.gripper_left.contact_geoms(), self.pot.handle_1_geoms())) r_contacts = list( self.find_contacts(self.gripper_right.contact_geoms(), self.pot.handle_2_geoms())) l_gh_dist = np.linalg.norm(l_gripper_to_handle) r_gh_dist = np.linalg.norm(r_gripper_to_handle) if len(l_contacts) > 0: reward += 0.5 else: reward += 0.5 * (1 - np.tanh(l_gh_dist)) if len(r_contacts) > 0: reward += 0.5 else: reward += 0.5 * (1 - np.tanh(r_gh_dist)) return reward @property def _handle_1_xpos(self): """Returns the position of the first handle.""" return self.sim.data.site_xpos[self.handle_1_site_id] @property def _handle_2_xpos(self): """Returns the position of the second handle.""" return self.sim.data.site_xpos[self.handle_2_site_id] @property def _pot_quat(self): """Returns the orientation of the pot.""" return T.convert_quat(self.sim.data.body_xquat[self.cube_body_id], to="xyzw") @property def _world_quat(self): """World quaternion.""" return T.convert_quat(np.array([1, 0, 0, 0]), to="xyzw") @property def _l_gripper_to_handle(self): """Returns vector from the left gripper to the handle.""" return self._handle_1_xpos - self._l_eef_xpos @property def _r_gripper_to_handle(self): """Returns vector from the right gripper to the handle.""" return self._handle_2_xpos - self._r_eef_xpos def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: robot-state: contains robot-centric information. object-state: requires @self.use_object_obs to be True. contains object-centric information. image: requires @self.use_camera_obs to be True. contains a rendered frame from the simulation. depth: requires @self.use_camera_obs and @self.camera_depth to be True. contains a rendered depth map from the simulation """ di = super()._get_observation() # camera observations if self.use_camera_obs: camera_obs = self.sim.render( camera_name=self.camera_name, width=self.camera_width, height=self.camera_height, depth=self.camera_depth, ) if self.camera_depth: di["image"], di["depth"] = camera_obs else: di["image"] = camera_obs # low-level object information if self.use_object_obs: # position and rotation of object cube_pos = self.sim.data.body_xpos[self.cube_body_id] cube_quat = T.convert_quat( self.sim.data.body_xquat[self.cube_body_id], to="xyzw") di["cube_pos"] = cube_pos di["cube_quat"] = cube_quat di["l_eef_xpos"] = self._l_eef_xpos di["r_eef_xpos"] = self._r_eef_xpos di["handle_1_xpos"] = self._handle_1_xpos di["handle_2_xpos"] = self._handle_2_xpos di["l_gripper_to_handle"] = self._l_gripper_to_handle di["r_gripper_to_handle"] = self._r_gripper_to_handle di["object-state"] = np.concatenate([ di["cube_pos"], di["cube_quat"], di["l_eef_xpos"], di["r_eef_xpos"], di["handle_1_xpos"], di["handle_2_xpos"], di["l_gripper_to_handle"], di["r_gripper_to_handle"], ]) return di def _check_contact(self): """ Returns True if gripper is in contact with an object. """ collision = False contact_geoms = (self.gripper_right.contact_geoms() + self.gripper_left.contact_geoms()) for contact in self.sim.data.contact[:self.sim.data.ncon]: if (self.sim.model.geom_id2name(contact.geom1) in contact_geoms or self.sim.model.geom_id2name( contact.geom2) in contact_geoms): collision = True break return collision def _check_success(self): """ Returns True if task is successfully completed """ # cube is higher than the table top above a margin cube_height = self.sim.data.body_xpos[self.cube_body_id][2] table_height = self.table_full_size[2] return cube_height > table_height + 0.10
class SawyerPrimitivePick(SawyerEnv): """ This class corresponds to the lifting task for the Sawyer robot arm. """ def __init__( self, instructive=0.0, decay=0.0, random_arm_init=None, gripper_type="TwoFingerGripper", table_full_size=(0.8, 0.8, 0.6), table_friction=(1., 5e-3, 1e-4), use_camera_obs=True, use_object_obs=True, reward_shaping=False, placement_initializer=None, gripper_visualization=False, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, camera_name="frontview", camera_height=256, camera_width=256, camera_depth=False, ): """ Args: gripper_type (str): type of gripper, used to instantiate gripper models from gripper factory. table_full_size (3-tuple): x, y, and z dimensions of the table. table_friction (3-tuple): the three mujoco friction parameters for the table. use_camera_obs (bool): if True, every observation includes a rendered image. use_object_obs (bool): if True, include object (cube) information in the observation. reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. gripper_visualization (bool): True if using gripper visualization. Useful for teleoperation. use_indicator_object (bool): if True, sets up an indicator object that is useful for debugging. has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering. render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). camera_name (str): name of camera to be rendered. Must be set if @use_camera_obs is True. camera_height (int): height of camera frame. camera_width (int): width of camera frame. camera_depth (bool): True if rendering RGB-D, and RGB otherwise. """ self.random_arm_init = random_arm_init self.instructive = instructive self.instructive_counter = 0 self.eval_mode = False self.decay = decay # settings for table top self.table_full_size = table_full_size self.table_friction = table_friction # whether to use ground-truth object states self.use_object_obs = use_object_obs # reward configuration self.reward_shaping = reward_shaping # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: self.placement_initializer = UniformRandomSampler( x_range=[-0.00, 0.00], y_range=[-0.00, 0.00], ensure_object_boundary_in_range=False, z_rotation=None, ) # order: di["eef_pos"] di["gripper_qpos"] di["gripper_to_marker"] self.goal # dim: 3, 2, 3, 3 # low = -np.ones(11) * np.inf # high = np.ones(11) * np.inf # self.observation_space = spaces.Box(low=low, high=high) self.controller = SawyerIKController( bullet_data_path=os.path.join(robosuite.models.assets_root, "bullet_data"), robot_jpos_getter=self._robot_jpos_getter, ) super().__init__( gripper_type=gripper_type, gripper_visualization=gripper_visualization, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, use_camera_obs=use_camera_obs, camera_name=camera_name, camera_height=camera_height, camera_width=camera_width, camera_depth=camera_depth, ) def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() self.mujoco_robot.set_base_xpos([0, 0, 0]) # load model for table top workspace self.mujoco_arena = TableArena(table_full_size=self.table_full_size, table_friction=self.table_friction) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # The sawyer robot has a pedestal, we want to align it with the table self.mujoco_arena.set_origin( [0.16 + self.table_full_size[0] / 2, 0, 0]) # initialize objects of interest cube = BoxObject( # size_min=[0.020, 0.020, 0.020], # [0.015, 0.015, 0.015], # size_max=[0.022, 0.022, 0.022], # [0.018, 0.018, 0.018]) size_min=[0.015, 0.015, 0.015], size_max=[0.015, 0.015, 0.015], rgba=[1, 0, 0, 1], ) self.mujoco_objects = OrderedDict([("cube", cube)]) # task includes arena, robot, and objects of interest self.model = TableTopTask( self.mujoco_arena, self.mujoco_robot, self.mujoco_objects, initializer=self.placement_initializer, ) self.model.place_objects() def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() self.cube_body_id = self.sim.model.body_name2id("cube") self.l_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.left_finger_geoms ] self.r_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.right_finger_geoms ] self.cube_geom_id = self.sim.model.geom_name2id("cube") def _reset_internal(self): """ Resets simulation internal configurations. """ super(SawyerEnv, self)._reset_internal() self.model.place_objects() if self.random_arm_init: # random initialization of arm constant_quat = np.array( [-0.01704371, -0.99972409, 0.00199679, -0.01603944]) target_position = np.array([ 0.5 + np.random.uniform(self.random_arm_init[0], self.random_arm_init[1]), np.random.uniform(self.random_arm_init[0], self.random_arm_init[1]), self.table_full_size[2] + 0.15211762 ]) self.controller.sync_ik_robot(self._robot_jpos_getter(), simulate=True) joint_list = self.controller.inverse_kinematics( target_position, constant_quat) init_pos = np.array(joint_list) else: # default robosuite init init_pos = np.array( [-0.5538, -0.8208, 0.4155, 1.8409, -0.4955, 0.6482, 1.9628]) init_pos += np.random.randn(init_pos.shape[0]) * 0.02 self.sim.data.qpos[self._ref_joint_pos_indexes] = init_pos self.sim.data.qpos[self._ref_gripper_joint_pos_indexes] = np.array( [0.0115, -0.0115]) # Open self.sim.forward() self.sim.data.qpos[10:12] = self.sim.data.site_xpos[ self.eef_site_id][:2] # decay rate (1 / (1 + decay_param * #resets)) chance = self.instructive * ( 1 / (1 + self.decay * self.instructive_counter)) if np.random.uniform() < chance: # and not self.eval_mode: self.sim.data.qpos[13] = self.sim.data.site_xpos[ self.eef_site_id][2] self.sim.data.qpos[self._ref_gripper_joint_pos_indexes] = np.array( [-0.0, -0.0]) #np.array([-0.21021952, -0.00024167]) # gripped self.instructive_counter = self.instructive_counter + 1 cube_pos = np.array(self.sim.data.body_xpos[self.cube_body_id]) self.goal = cube_pos + np.array((0, 0, 0.075)) def _robot_jpos_getter(self): return np.array([0, -1.18, 0.00, 2.18, 0.00, 0.57, 3.3161]) def reward(self, action=None): """ Reward function for the task. The dense reward has three components. Reaching: in [0, 1], to encourage the arm to reach the cube Grasping: in {0, 0.25}, non-zero if arm is grasping the cube Lifting: in {0, 1}, non-zero if arm has lifted the cube The sparse reward only consists of the lifting component. Args: action (np array): unused for this task Returns: reward (float): the reward """ cube_height = self.sim.data.body_xpos[self.cube_body_id] return self.compute_reward(cube_height, self.goal) # for goalenv wrapper def compute_reward(self, achieved_goal, desired_goal, info=None): # -1 if cube is below, 0 if cube is above reward = -1.0 if achieved_goal[2] > desired_goal[2]: reward = 100.0 return reward # for goalenv wrapper def get_goalenv_dict(self, obs_dict): # using only object-state and robot-state ob_lst = [] di = {} for key in obs_dict: if key in ["robot-state", "object-state"]: ob_lst.append(obs_dict[key]) di['observation'] = np.concatenate(ob_lst) di['desired_goal'] = self.goal di['achieved_goal'] = obs_dict['object-state'][0:3] return di def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: robot-state: contains robot-centric information. object-state: requires @self.use_object_obs to be True. contains object-centric information. image: requires @self.use_camera_obs to be True. contains a rendered frame from the simulation. depth: requires @self.use_camera_obs and @self.camera_depth to be True. contains a rendered depth map from the simulation """ di = super()._get_observation() # camera observations if self.use_camera_obs: camera_obs = self.sim.render( camera_name=self.camera_name, width=self.camera_width, height=self.camera_height, depth=self.camera_depth, ) if self.camera_depth: di["image"], di["depth"] = camera_obs else: di["image"] = camera_obs # low-level object information if self.use_object_obs: # position and rotation of object cube_pos = np.array(self.sim.data.body_xpos[self.cube_body_id]) cube_quat = convert_quat(np.array( self.sim.data.body_xquat[self.cube_body_id]), to="xyzw") di["cube_pos"] = cube_pos di["cube_quat"] = cube_quat gripper_site_pos = np.array( self.sim.data.site_xpos[self.eef_site_id]) di["gripper_to_cube"] = gripper_site_pos - cube_pos # di["object-state"] = np.concatenate( # [cube_pos, cube_quat, di["gripper_to_cube"]] # ) di["object-state"] = np.concatenate( [di["gripper_to_cube"], cube_pos]) return di def _gripper_visualization(self): """ Do any needed visualization here. Overrides superclass implementations. """ # color the gripper site appropriately based on distance to cube if self.gripper_visualization: # get distance to cube cube_site_id = self.sim.model.site_name2id("cube") dist = np.sum( np.square(self.sim.data.site_xpos[cube_site_id] - self.sim.data.get_site_xpos("grip_site"))) # set RGBA for the EEF site here max_dist = 0.1 scaled = (1.0 - min(dist / max_dist, 1.))**15 rgba = np.zeros(4) rgba[0] = 1 - scaled rgba[1] = scaled rgba[3] = 0.5 self.sim.model.site_rgba[self.eef_site_id] = rgba
class PandaOpenDoor(change_dof(PandaEnv, 8, 8)): # keep the dimension to control the gripper; better not remove change_dof """ This class corresponds to the pushing task for the Panda robot arm. """ parameters_spec = { **PandaEnv.parameters_spec, 'table_size_0': [0.7, 0.9], 'table_size_1': [0.7, 0.9], 'table_size_2': [0.7, 0.9], #'table_friction_0': [0.4, 1.6], 'table_friction_1': [0.0025, 0.0075], 'table_friction_2': [0.00005, 0.00015], # 'boxobject_size_0': [0.018, 0.022], # 'boxobject_size_1': [0.018, 0.022], # 'boxobject_size_2': [0.018, 0.022], # 'boxobject_friction_0': [0.04, 1.6], #'boxobject_friction_1': [0.0025, 0.0075], # fixed this to zero # 'boxobject_friction_2': [0.00005, 0.00015], # 'boxobject_density_1000': [0.6, 1.4], } def reset_props(self, table_size_0=0.8, table_size_1=0.8, table_size_2=0.9, table_friction_0=0., table_friction_1=0.005, table_friction_2=0.0001, # boxobject_size_0=0.020, boxobject_size_1=0.020, boxobject_size_2=0.020, # boxobject_friction_0=0.1, boxobject_friction_1=0.0, boxobject_friction_2=0.0001, # boxobject_density_1000=0.1, **kwargs): self.table_full_size = (table_size_0, table_size_1, table_size_2) self.table_friction = (table_friction_0, table_friction_1, table_friction_2) # self.boxobject_size = (boxobject_size_0, boxobject_size_1, boxobject_size_2) # self.boxobject_friction = (boxobject_friction_0, boxobject_friction_1, boxobject_friction_2) # self.boxobject_density = boxobject_density_1000 * 1000. super().reset_props(**kwargs) def __init__(self, use_object_obs=True, reward_shaping=True, placement_initializer=None, object_obs_process=True, **kwargs): """ Args: use_object_obs (bool): if True, include object (cube) information in the observation. reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. object_obs_process (bool): if True, process the object observation to get a task_state. Setting this to False is useful when some transformation (eg. noise) need to be done to object observation raw data prior to the processing. """ # whether to use ground-truth object states self.use_object_obs = use_object_obs # reward configuration self.reward_shaping = reward_shaping # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: # self.placement_initializer = UniformRandomSampler( # x_range=[-0.1, 0.1], # y_range=[-0.1, 0.1], # ensure_object_boundary_in_range=False, # z_rotation=None, # ) self.placement_initializer = UniformRandomSamplerObjectSpecific( x_ranges=[[-0.03, -0.02], [0.09, 0.1]], y_ranges=[[-0.05, -0.04], [-0.05, -0.04]], ensure_object_boundary_in_range=False, z_rotation=None, ) # for first initialization self.table_full_size = (0.8, 0.8, 0.8) self.table_friction = (0., 0.005, 0.0001) # self.boxobject_size = (0.02, 0.02, 0.02) # self.boxobject_friction = (0.1, 0.005, 0.0001) # self.boxobject_density = 100. self.object_obs_process = object_obs_process super().__init__(gripper_visualization=True, **kwargs) def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() self.mujoco_robot.set_base_xpos([0, 0, 0]) # load model for table top workspace self.mujoco_arena = TableCabinetArena( table_full_size=self.table_full_size, table_friction=self.table_friction ) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # The panda robot has a pedestal, we want to align it with the table self.mujoco_arena.set_origin([0.16 + self.table_full_size[0] / 2, 0, 0]) # initialize objects of interest # cube = FullyFrictionalBoxObject( # size=self.boxobject_size, # friction=self.boxobject_friction, # density=self.boxobject_density, # rgba=[1, 0, 0, 1], # ) # self.mujoco_cube = cube # goal = CylinderObject( # size=[0.03, 0.001], # rgba=[0, 1, 0, 1], # ) # self.mujoco_goal = goal # self.mujoco_objects = OrderedDict([("cube", cube), ("goal", goal)]) self.mujoco_objects = None # task includes arena, robot, and objects of interest self.model = TableTopTask( self.mujoco_arena, self.mujoco_robot, self.mujoco_objects, initializer=self.placement_initializer, visual_objects=[], ) if self.mujoco_objects is not None: self.model.place_objects() def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() # self.cube_body_id = self.sim.model.body_name2id("cube") self.l_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.left_finger_geoms ] self.r_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.right_finger_geoms ] # self.cube_geom_id = self.sim.model.geom_name2id("cube") # gripper ids # self.goal_body_id = self.sim.model.body_name2id('goal') # self.goal_site_id = self.sim.model.site_name2id('goal') def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() self.sim.forward() # reset positions of objects if self.mujoco_objects is not None: self.model.place_objects() # reset joint positions # init_pos = self.mujoco_robot.init_qpos # print(init_pos) # init_pos += np.random.randn(init_pos.shape[0]) * 0.02 # self.sim.data.qpos[self._ref_joint_pos_indexes] = np.array(init_pos) # self.sim.data.qpos[self._ref_joint_pos_indexes] = [0.02085236, 0.20386552, 0.00569112, -2.60645364, 2.8973697, 3.53509316, 2.89737955] # a initial gesture: facing downwards # self.sim.data.qpos[self._ref_joint_pos_indexes] = [ 0.10234113, -1.32314358, 0.18383693, -2.88485115, 1.64673455, 3.22235274, 2.3644487 ] # a good initial gesture: facing horizontally self.sim.data.qpos[self._ref_joint_pos_indexes] = [ 0.10259647, -0.77839656, 0.27246156, -2.35741103, 1.647504, 3.43102572, -0.85707793] # open the gripper # self.sim.data.qpos[self._ref_joint_gripper_actuator_indexes] = np.array([0.2, -0.2]) self.sim.data.ctrl[-2:] = np.array([0.04, -0.04]) # panda gripper finger joint range is -0.04~0.04 # set other reference attributes eef_rot_in_world = self.sim.data.get_body_xmat("right_hand").reshape((3, 3)) self.world_rot_in_eef = copy.deepcopy(eef_rot_in_world.T) # TODO inspect on this: should we set a golden reference other than a initial position? # reward function from sawyer_push def reward(self, action=None): """ Reward function for the task. The dense reward has three components. Reaching: in [-inf, 0], to encourage the arm to reach the object Goal Distance: in [-inf, 0] the distance between the pushed object and the goal Safety reward in [-inf, 0], -1 for every joint that is at its limit. The sparse reward only receives a {0,1} upon reaching the goal Args: action (np array): The action taken in that timestep Returns: reward (float): the reward previously in robosuite-extra, when dense reward is used, the return value will be a dictionary. but we removed that feature. """ reward = 0. self.door_open_angle = abs(self.sim.data.get_joint_qpos("hinge0")) reward_door_open = self.door_open_angle reward_dist = 0. reward_ori = 0. if self.door_open_angle < 0.03: # A distance reward: minimize the distance between the gripper and door konb when the door is almost closed reward_dist = -np.linalg.norm(self.get_finger2knob_dist_vec()) # An orientation reward: make the orientation of gripper horizontal (better for knob grasping) when the door is almost closed fingerEulerDesired = [np.pi, 0, np.pi/2] # horizontal gesture for gripper finger_ori = self.get_finger_ori() ori_diff = sin_cos_encoding(fingerEulerDesired) - sin_cos_encoding(finger_ori) # use sin_cos_encoding to avoid value jump in 2PI measure reward_ori = -np.linalg.norm(ori_diff) * 0.04 # worldHknob = self.sim.data.get_body_xquat("knob_link") # knobHee_desired = euler2quat(quat2euler([0.5, 0.5, 0.5, -0.5])) # worldHee_desired = quat_mul(worldHknob, knobHee_desired) # print(quat2euler(knobHee_desired), self.get_finger_ori()) reward = reward_door_open + reward_dist + reward_ori # A summary of reward values # Success Reward success = self._check_success() if (success): reward += 0.1 self.done = True # worldHknob = self.sim.data.get_body_xquat("knob_link") # knobHee_desired = euler2quat(quat2euler([0.5, 0.5, -0.5, 0.5])) # worldHee_desired = quat_mul(worldHknob, knobHee_desired) # print(quat2euler(worldHee_desired), quat2euler(worldHknob), self.get_finger_ori() ) # # sparse completion reward # if not self.reward_shaping and self._check_success(): # reward = 1.0 # # use a dense reward # if self.reward_shaping: # object_pos = self.sim.data.body_xpos[self.cube_body_id] # # max joint angles reward # joint_limits = self._joint_ranges # current_joint_pos = self._joint_positions # hitting_limits_reward = - int(any([(x < joint_limits[i, 0] + 0.05 or x > joint_limits[i, 1] - 0.05) for i, x in # enumerate(current_joint_pos)])) # reward += hitting_limits_reward # # reaching reward # gripper_site_pos = self.sim.data.site_xpos[self.eef_site_id] # dist = np.linalg.norm(gripper_site_pos - object_pos) # reaching_reward = -0.4 * dist # reward += reaching_reward # # print(gripper_site_pos, object_pos, reaching_reward) # # Success Reward # success = self._check_success() # if (success): # reward += 0.1 # # goal distance reward # goal_pos = self.sim.data.site_xpos5707963267948966bject--gripper--goal # angle_g_o_g = angle_between(gripper_site_pos - object_pos, # goal_pos - object_pos) # if not success and angle_g_o_g < np.pi / 2.: # reward += -0.03 - 0.02 * (np.pi / 2. - angle_g_o_g) # # print('grippersitepos', gripper_site_pos, # # 'objpos', object_pos, # # 'jointangles', hitting_limits_reward, # # 'reaching', reaching_reward, # # 'success', success, # # 'goaldist', goal_distance_reward) # unstable = reward < -2.5 # # Return all three types of rewards # reward = {"reward": reward, "reaching_distance": -10 * reaching_reward, # "goal_distance": - goal_distance_reward, # "hitting_limits_reward": hitting_limits_reward, # "unstable":unstable} return reward def _check_success(self): """ Returns True if task has been completed. """ if self.door_open_angle >= 1.55: # 1.57 ~ PI/2 return True else: return False # object_pos = self.sim.data.body_xpos[self.cube_body_id] # goal_pos = self.sim.data.site_xpos[self.goal_site_id] # dist = np.linalg.norm(goal_pos - object_pos) # goal_horizontal_radius = self.model.mujoco_objects['goal'].get_horizontal_radius() # # object centre is within the goal radius # return dist < goal_horizontal_radius # return False def step(self, action): # """ explicitly shut the gripper """ # joined_action = np.append(action, [1.]) return super().step(action) def world2eef(self, world): return self.world_rot_in_eef.dot(world) # def put_raw_object_obs(self, di): # Extract position and velocity of the eef # eef_pos_in_world = self.sim.data.get_body_xpos("right_hand") # eef_xvelp_in_world = self.sim.data.get_body_xvelp("right_hand") # print('eef_pos_in_world', eef_pos_in_world) # Get the position, velocity, rotation and rotational velocity of the object in the world frame # object_pos_in_world = self.sim.data.body_xpos[self.cube_body_id] # object_xvelp_in_world = self.sim.data.get_body_xvelp('cube') # object_rot_in_world = self.sim.data.get_body_xmat('cube') # Get the z-angle with respect to the reference position and do sin-cosine encoding # world_rotation_in_reference = np.array([[0., 1., 0., ], [-1., 0., 0., ], [0., 0., 1., ]]) # object_rotation_in_ref = world_rotation_in_reference.dot(object_rot_in_world) # object_euler_in_ref = T.mat2euler(object_rotation_in_ref) # z_angle = object_euler_in_ref[2] # object_quat = convert_quat(self.sim.data.body_xquat[self.cube_body_id], to='xyzw') # Get the goal position in the world # goal_site_pos_in_world = np.array(self.sim.data.site_xpos[self.goal_site_id]) # Record observations into a dictionary # di['goal_pos_in_world'] = goal_site_pos_in_world # di['eef_pos_in_world'] = eef_pos_in_world # di['eef_vel_in_world'] = eef_xvelp_in_world # di['object_pos_in_world'] = object_pos_in_world # di['object_vel_in_world'] = object_xvelp_in_world # di["z_angle"] = np.array([z_angle]) # di['object_quat'] = object_quat # def process_object_obs(self, di): # # z_angle = di['z_angle'] # # sine_cosine = np.array([np.sin(8*z_angle), np.cos(8*z_angle)]).reshape((2,)) # eef_to_object_in_world = di['object_pos_in_world'] - di['eef_pos_in_world'] # # eef_to_object_in_eef = self.world2eef(eef_to_object_in_world) # object_to_goal_in_world = di['goal_pos_in_world'] - di['object_pos_in_world'] # # object_to_goal_in_eef = self.world2eef(object_to_goal_in_world) # # object_xvelp_in_eef = self.world2eef(di['object_vel_in_world']) # # eef_xvelp_in_eef = self.world2eef(di['eef_vel_in_world']) # task_state = np.concatenate([ # eef_to_object_in_world, # object_to_goal_in_world, # di['eef_pos_in_world'], # di['eef_vel_in_world'], # di['object_vel_in_world'], # di['object_quat'] # ]) # di['task_state'] = task_state def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: gripper_to_object : The x-y component of the gripper to object distance object_to_goal : The x-y component of the object-to-goal distance object_z_rot : the roation of the object around an axis sticking out the table object_xvelp: x-y linear velocity of the object gripper_xvelp: x-y linear velocity of the gripper task-state : a concatenation of all the above. """ # di = super()._get_observation() # joint angles & vel, which we don't need. di = OrderedDict() # camera observations if self.use_camera_obs: camera_obs = self.sim.render( camera_name=self.camera_name, width=self.camera_width, height=self.camera_height, depth=self.camera_depth, ) if self.camera_depth: di["image"], di["depth"] = camera_obs else: di["image"] = camera_obs # low-level object information if self.use_object_obs: eef_pos_in_world = self.sim.data.get_body_xpos("right_hand") eef_xvelp_in_world = self.sim.data.get_body_xvelp("right_hand") di['eef_pos_in_world'] = eef_pos_in_world # dim=3 di['eef_vel_in_world'] = eef_xvelp_in_world # dim=3 di['finger_knob_dist'] = self.get_finger2knob_dist_vec() # dim=3 di['door_hinge_angle'] = [self.sim.data.get_joint_qpos("hinge0")] # dim=1 task_state = np.concatenate([ di['eef_pos_in_world'], di['eef_vel_in_world'], di['finger_knob_dist'], di['door_hinge_angle'], ]) di['task_state'] = task_state return di def _check_contact(self): """ Returns True if gripper is in contact with an object. """ collision = False for contact in self.sim.data.contact[: self.sim.data.ncon]: if ( self.sim.model.geom_id2name(contact.geom1) in self.gripper.contact_geoms() or self.sim.model.geom_id2name(contact.geom2) in self.gripper.contact_geoms() ): collision = True break return collision def _check_contact_with(self, object): """ Returns True if gripper is in contact with an object. """ collision = False for contact in self.sim.data.contact[: self.sim.data.ncon]: if ( (self.sim.model.geom_id2name(contact.geom1) in self.gripper.contact_geoms() and contact.geom2 == self.sim.model.geom_name2id(object)) or (self.sim.model.geom_id2name(contact.geom2) in self.gripper.contact_geoms() and contact.geom1 == self.sim.model.geom_name2id(object)) ): collision = True break return collision def _gripper_visualization(self): """ Do any needed visualization here. Overrides superclass implementations. """ if self.gripper_visualization: rgba = np.zeros(4) self.sim.model.site_rgba[self.eef_site_id] = rgba def get_finger_ori(self): finger_rel_quat = self.sim.data.get_body_xquat("rightfinger") hand_quat = self.sim.data.get_body_xquat("right_hand") finger_world_quat = quat_mul(finger_rel_quat, hand_quat) # TODO: which one at first? return quat2euler(finger_world_quat) def get_hand_pos(self): return self.sim.data.get_geom_xpos("hand_visual") def get_knob_pos(self): knob_pos = self.sim.data.get_geom_xpos("center_cabinet_knob") return knob_pos def get_finger2knob_dist_vec(self): return self.get_hand_pos() - self.get_knob_pos()
class PandaLift(PandaEnv): dof = 8 """ This class corresponds to the lifting task for the Panda robot arm. """ parameters_spec = { **PandaEnv.parameters_spec, 'table_size_0': [0.7, 0.9], 'table_size_1': [0.7, 0.9], 'table_size_2': [0.7, 0.9], 'table_friction_0': [0.4, 1.6], 'table_friction_1': [0.0025, 0.0075], 'table_friction_2': [0.00005, 0.00015], 'boxobject_friction_0': [0.4, 1.6], # 'boxobject_friction_1': [0.0025, 0.0075], # fixed this to zero 'boxobject_friction_2': [0.00005, 0.00015], 'boxobject_density_1000': [0.6, 1.4], } def reset_props(self, table_size_0=0.8, table_size_1=0.8, table_size_2=0.8, table_friction_0=1.0, table_friction_1=0.005, table_friction_2=0.0001, boxobject_size_0=0.020, boxobject_size_1=0.020, boxobject_size_2=0.020, boxobject_friction_0=1.0, boxobject_friction_1=0.0, boxobject_friction_2=0.0001, boxobject_density_1000=0.1, **kwargs): self.table_full_size = (table_size_0, table_size_1, table_size_2) self.table_friction = (table_friction_0, table_friction_1, table_friction_2) self.boxobject_size = (boxobject_size_0, boxobject_size_1, boxobject_size_2) self.boxobject_friction = (boxobject_friction_0, boxobject_friction_1, boxobject_friction_2) self.boxobject_density = boxobject_density_1000 * 1000. super().reset_props(**kwargs) def __init__(self, use_object_obs=True, reward_shaping=True, placement_initializer=None, object_obs_process=True, **kwargs): """ Args: use_object_obs (bool): if True, include object (cube) information in the observation. reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. object_obs_process (bool): if True, process the object observation to get a task_state. Setting this to False is useful when some transformation (eg. noise) need to be done to object observation raw data prior to the processing. """ # whether to use ground-truth object states self.use_object_obs = use_object_obs # reward configuration self.reward_shaping = reward_shaping # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: self.placement_initializer = UniformRandomSampler( x_range=[-0.03, 0.03], y_range=[-0.03, 0.03], ensure_object_boundary_in_range=False, z_rotation=None, ) # for first initialization self.table_full_size = (0.8, 0.8, 0.8) self.table_friction = (1.0, 0.005, 0.0001) self.boxobject_size = (0.02, 0.02, 0.02) self.boxobject_friction = (1.0, 0.005, 0.0001) self.boxobject_density = 100. self.object_obs_process = object_obs_process super().__init__(gripper_visualization=True, **kwargs) def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() self.mujoco_robot.set_base_xpos([0, 0, 0]) # load model for table top workspace self.mujoco_arena = TableArena(table_full_size=self.table_full_size, table_friction=self.table_friction) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # The panda robot has a pedestal, we want to align it with the table self.mujoco_arena.set_origin( [0.16 + self.table_full_size[0] / 2, 0, 0]) # initialize objects of interest # in original robosuite, a simple domain randomization is included in BoxObject implementation, and called here. We choose to discard that implementation. cube = FullyFrictionalBoxObject( size=self.boxobject_size, friction=self.boxobject_friction, density=self.boxobject_density, rgba=[1, 0, 0, 1], ) self.mujoco_objects = OrderedDict([("cube", cube)]) # task includes arena, robot, and objects of interest self.model = TableTopTask( self.mujoco_arena, self.mujoco_robot, self.mujoco_objects, initializer=self.placement_initializer, ) self.model.place_objects() def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() self.cube_body_id = self.sim.model.body_name2id("cube") self.l_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.left_finger_geoms ] self.r_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.right_finger_geoms ] self.cube_geom_id = self.sim.model.geom_name2id("cube") def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() # reset positions of objects self.model.place_objects() # reset joint positions init_pos = self.mujoco_robot.init_qpos init_pos += np.random.randn(init_pos.shape[0]) * 0.02 self.sim.data.qpos[self._ref_joint_pos_indexes] = np.array(init_pos) def reward(self, action=None): """ Reward function for the task. The dense reward has three components. Reaching: in [0, 1], to encourage the arm to reach the cube Grasping: in {0, 0.25}, non-zero if arm is grasping the cube Lifting: in {0, 1}, non-zero if arm has lifted the cube The sparse reward only consists of the lifting component. Args: action (np array): unused for this task Returns: reward (float): the reward """ reward = 0. # sparse completion reward if self._check_success(): reward = 1.0 # use a shaping reward if self.reward_shaping: # reaching reward cube_pos = self.sim.data.body_xpos[self.cube_body_id] gripper_site_pos = self.sim.data.site_xpos[self.eef_site_id] dist = np.linalg.norm(gripper_site_pos - cube_pos) reaching_reward = 1 - np.tanh(10.0 * dist) reward += reaching_reward # grasping reward touch_left_finger = False touch_right_finger = False for i in range(self.sim.data.ncon): c = self.sim.data.contact[i] if c.geom1 in self.l_finger_geom_ids and c.geom2 == self.cube_geom_id: touch_left_finger = True if c.geom1 == self.cube_geom_id and c.geom2 in self.l_finger_geom_ids: touch_left_finger = True if c.geom1 in self.r_finger_geom_ids and c.geom2 == self.cube_geom_id: touch_right_finger = True if c.geom1 == self.cube_geom_id and c.geom2 in self.r_finger_geom_ids: touch_right_finger = True if touch_left_finger and touch_right_finger: reward += 0.25 return reward def put_raw_object_obs(self, di): di['cube_pos'] = np.array(self.sim.data.body_xpos[self.cube_body_id]) di['cube_quat'] = convert_quat(np.array( self.sim.data.body_xquat[self.cube_body_id]), to="xyzw") di['gripper_site_pos'] = np.array( self.sim.data.site_xpos[self.eef_site_id]) def process_object_obs(self, di): gripper_to_cube = di['gripper_site_pos'] - di['cube_pos'] task_state = np.concatenate( [di['cube_pos'], di['cube_quat'], gripper_to_cube]) di['task_state'] = task_state def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: robot-state: contains robot-centric information. object-state: requires @self.use_object_obs to be True. contains object-centric information. image: requires @self.use_camera_obs to be True. contains a rendered frame from the simulation. depth: requires @self.use_camera_obs and @self.camera_depth to be True. contains a rendered depth map from the simulation """ di = super()._get_observation() # camera observations if self.use_camera_obs: camera_obs = self.sim.render( camera_name=self.camera_name, width=self.camera_width, height=self.camera_height, depth=self.camera_depth, ) if self.camera_depth: di["image"], di["depth"] = camera_obs else: di["image"] = camera_obs # low-level object information if self.use_object_obs: self.put_raw_object_obs(di) if self.object_obs_process: self.process_object_obs(di) return di def _check_contact(self): """ Returns True if gripper is in contact with an object. """ collision = False for contact in self.sim.data.contact[:self.sim.data.ncon]: if (self.sim.model.geom_id2name( contact.geom1) in self.gripper.contact_geoms() or self.sim.model.geom_id2name( contact.geom2) in self.gripper.contact_geoms()): collision = True break return collision def _check_success(self): """ Returns True if task has been completed. """ cube_height = self.sim.data.body_xpos[self.cube_body_id][2] table_height = self.table_full_size[2] # cube is higher than the table top above a margin return cube_height > table_height + 0.04 def _gripper_visualization(self): """ Do any needed visualization here. Overrides superclass implementations. """ # color the gripper site appropriately based on distance to cube if self.gripper_visualization: # get distance to cube cube_site_id = self.sim.model.site_name2id("cube") dist = np.sum( np.square(self.sim.data.site_xpos[cube_site_id] - self.sim.data.get_site_xpos("grip_site"))) # set RGBA for the EEF site here max_dist = 0.1 scaled = (1.0 - min(dist / max_dist, 1.))**15 rgba = np.zeros(4) rgba[0] = 1 - scaled rgba[1] = scaled rgba[3] = 0.5 self.sim.model.site_rgba[self.eef_site_id] = rgba
class SawyerGrasp(SawyerEnv): """ This class corresponds to the lifting task for the Sawyer robot arm. """ def __init__( self, gripper_type="TwoFingerGripper", table_full_size=(0.8, 0.8, 1.1), table_friction=(1., 5e-3, 1e-4), use_camera_obs=True, use_object_obs=True, reward_shaping=False, placement_initializer=None, gripper_visualization=False, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, camera_name="frontview", camera_height=256, camera_width=256, camera_depth=False, target_object='cube', ): """ Args: gripper_type (str): type of gripper, used to instantiate gripper models from gripper factory. table_full_size (3-tuple): x, y, and z dimensions of the table. table_friction (3-tuple): the three mujoco friction parameters for the table. use_camera_obs (bool): if True, every observation includes a rendered image. use_object_obs (bool): if True, include object (cube) information in the observation. reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. gripper_visualization (bool): True if using gripper visualization. Useful for teleoperation. use_indicator_object (bool): if True, sets up an indicator object that is useful for debugging. has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering. render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). camera_name (str): name of camera to be rendered. Must be set if @use_camera_obs is True. camera_height (int): height of camera frame. camera_width (int): width of camera frame. camera_depth (bool): True if rendering RGB-D, and RGB otherwise. """ self.target_object = target_object # settings for table top self.table_full_size = table_full_size self.table_friction = table_friction # whether to use ground-truth object states self.use_object_obs = use_object_obs # reward configuration self.reward_shaping = reward_shaping # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: # self.placement_initializer = UniformRandomSampler( # x_range=[-0.03, 0.03], # y_range=[-0.03, 0.03], # ensure_object_boundary_in_range=False, # z_rotation=True, # ) if self.target_object == 'cube': self.placement_initializer = UniformRandomSampler( x_range=[0.08, 0.12], y_range=[0.14, 0.18], ensure_object_boundary_in_range=False, z_rotation=[-np.pi/4, 0], #None for random ) else: self.placement_initializer = UniformRandomSampler( x_range=BOX_TEST_OBJECTS[self.target_object]['x_range'], y_range=BOX_TEST_OBJECTS[self.target_object]['y_range'], ensure_object_boundary_in_range=False, z_rotation=BOX_TEST_OBJECTS[self.target_object]['angle'], #None for random ) super().__init__( gripper_type=gripper_type, gripper_visualization=gripper_visualization, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, use_camera_obs=use_camera_obs, camera_name=camera_name, camera_height=camera_height, camera_width=camera_width, camera_depth=camera_depth, ) def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() self.mujoco_robot.set_base_xpos([0, 0, 0]) # load model for table top workspace self.mujoco_arena = TableArena( table_full_size=self.table_full_size, table_friction=self.table_friction ) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # The sawyer robot has a pedestal, we want to align it with the table self.mujoco_arena.set_origin([0.16 + self.table_full_size[0] / 2, 0, 0]) # initialize objects of interest if self.target_object == 'cube': cube = BoxObject( size_min=[0.05, 0.010, 0.05], # [0.015, 0.015, 0.015], size_max=[0.15, 0.015, 0.08], # [0.018, 0.018, 0.018]) rgba=[1, 0, 0, 1], friction=20, density=50, ) else: cube = BoxObject( size=BOX_TEST_OBJECTS[self.target_object]['size'], rgba=[1, 0, 0, 1], friction=20, density=50, ) # if self.target_object == 'cylinder': # cube = CylinderObject( # size_min=[0.015, 0.01], # size_max=[0.025, 0.05], # rgba=[1, 0, 0, 1], # ) # elif self.target_object == 'ball': # cube = BallObject( # size_min=[0.01], # size_max=[0.02], # rgba=[1, 0, 0, 1], # ) self.object = cube # if self.target_object == 'can': # cube = CanObject() # elif self.target_object == 'bottle': # cube = BottleObject() # elif self.target_object == 'milk': # cube = MilkObject() # elif self.target_object == 'cereal': # cube = CerealObject() # elif self.target_object == 'lemon': # cube = LemonObject() self.mujoco_objects = OrderedDict([("cube", cube)]) # task includes arena, robot, and objects of interest self.model = TableTopTask( self.mujoco_arena, self.mujoco_robot, self.mujoco_objects, initializer=self.placement_initializer, ) self.model.place_objects() def _reset_internal(self): """ Sets initial pose of arm and grippers. """ super()._reset_internal() self.sim.data.qpos[self._ref_joint_pos_indexes] = np.zeros(self.dof) if self.has_gripper: self.sim.data.qpos[ self._ref_gripper_joint_pos_indexes ] = self.gripper.init_qpos def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() self.cube_body_id = self.sim.model.body_name2id("cube") self.table_body_id = self.sim.model.body_name2id("table") self.l_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.left_finger_geoms ] self.r_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.right_finger_geoms ] self.cube_geom_id = self.sim.model.geom_name2id("cube") def _get_target_grasp(self): pose = self.pose_in_base_from_name('cube') wpos = self._get_observation()['cube_pos'] height = wpos[2] - self.table_full_size[2] pos = pose[:3, 3] + np.array([-0.02, 0.01, GRIPPER_LENGTH - GRIP_CONTACT]) return pos, 0 # relative to table top and center def _get_grasp_gt(self): table_top_center, _ = self._get_table_top_center() pose = self.pose_in_base_from_name('cube') pos = pose[:3, 3] pos_gt = pos - table_top_center grasp_height = self.object.get_top_offset()[-1] if len(self.object.size) == 1: #sphere grasp_height = grasp_height / 2 pos_gt[2] = grasp_height rot_gt = pose[:3, :3] return pos_gt, rot_gt def _gen_grasp_gt(self): GRIPPER_LENGTH = 0.15 GRIPPER_Y_OFFSET = 0.025 table_top_center, _ = self._get_table_top_center() pose = self.pose_in_base_from_name('cube') pos = pose[:3, 3] pos_gt = pos - table_top_center grasp_height = self.object.get_top_offset()[-1] if len(self.object.size) == 1: #sphere grasp_height = grasp_height / 2 pos_gt[2] = grasp_height #+ GRIPPER_LENGTH # pos_gt[1] += GRIPPER_Y_OFFSET cube_length = self.object.size[0] # pos_gt[0] += np.random.uniform(low=-cube_length/2, high = cube_length/2) rot_gt = pose[:3, :3] return pos_gt, rot_gt def _get_table_top_center(self): pose = self.pose_in_base_from_name('table') pos = pose[:3, 3] rot = pose[:3, :3] pos[2] += self.table_full_size[2]/2 return pos, rot def _get_body_pos(self, name, base=True): if base: pose = self.pose_in_base_from_name(name) body_pos = pose[:3, 3] body_quat = pose[:3,:3] else: body_id = self.sim.model.body_name2id(name) body_pos = np.array(self.sim.data.body_xpos[body_id]) body_quat = convert_quat( np.array(self.sim.data.body_xquat[body_id]), to="xyzw") return body_pos, body_quat def _get_cam_pos(self, cam_id): cam_pos = np.array(self.sim.model.cam_pos[cam_id]) cam_quat = convert_quat(np.array(self.sim.model.cam_quat[cam_id]), to="xyzw") return cam_pos, cam_quat def _get_cam_K(self, cam_id): W = self.camera_width H = self.camera_height fovy = self.sim.model.cam_fovy[cam_id] f = 0.5 * W / math.tan(fovy * math.pi / 360) return f, W/2, H/2 #np.array([[f, 0, W / 2], [0, f, H / 2], [0, 0, 1]]) def _set_cam_pos(self, cam_id, cam_pos, cam_quat): self.sim.model.cam_pos[cam_id] = cam_pos self.sim.model.cam_quat[cam_id] = convert_quat(cam_quat, to="wxyz") def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() # reset positions of objects self.model.place_objects() # reset joint positions init_pos = np.array([-0.5538, -0.8208, 0.4155, 1.8409, -0.4955, 0.6482, 1.9628]) init_pos += np.random.randn(init_pos.shape[0]) * 0.02 self.sim.data.qpos[self._ref_joint_pos_indexes] = np.array(init_pos) def reward(self, action=None): """ Reward function for the task. The dense reward has three components. Reaching: in [0, 1], to encourage the arm to reach the cube Grasping: in {0, 0.25}, non-zero if arm is grasping the cube Lifting: in {0, 1}, non-zero if arm has lifted the cube The sparse reward only consists of the lifting component. Args: action (np array): unused for this task Returns: reward (float): the reward """ reward = 0. # sparse completion reward if self._check_success(): reward = 1.0 # use a shaping reward if self.reward_shaping: # reaching reward cube_pos = self.sim.data.body_xpos[self.cube_body_id] gripper_site_pos = self.sim.data.site_xpos[self.eef_site_id] dist = np.linalg.norm(gripper_site_pos - cube_pos) reaching_reward = 1 - np.tanh(10.0 * dist) reward += reaching_reward # grasping reward touch_left_finger = False touch_right_finger = False for i in range(self.sim.data.ncon): c = self.sim.data.contact[i] if c.geom1 in self.l_finger_geom_ids and c.geom2 == self.cube_geom_id: touch_left_finger = True if c.geom1 == self.cube_geom_id and c.geom2 in self.l_finger_geom_ids: touch_left_finger = True if c.geom1 in self.r_finger_geom_ids and c.geom2 == self.cube_geom_id: touch_right_finger = True if c.geom1 == self.cube_geom_id and c.geom2 in self.r_finger_geom_ids: touch_right_finger = True if touch_left_finger and touch_right_finger: reward += 0.25 return reward def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: robot-state: contains robot-centric information. object-state: requires @self.use_object_obs to be True. contains object-centric information. image: requires @self.use_camera_obs to be True. contains a rendered frame from the simulation. depth: requires @self.use_camera_obs and @self.camera_depth to be True. contains a rendered depth map from the simulation """ di = super()._get_observation() # camera observations if self.use_camera_obs: camera_obs = self.sim.render( camera_name=self.camera_name, width=self.camera_width, height=self.camera_height, depth=self.camera_depth, ) if self.camera_depth: di["image"], di["depth"] = camera_obs else: di["image"] = camera_obs # low-level object information if self.use_object_obs: # position and rotation of object cube_pos = np.array(self.sim.data.body_xpos[self.cube_body_id]) cube_quat = convert_quat( np.array(self.sim.data.body_xquat[self.cube_body_id]), to="xyzw" ) di["cube_pos"] = cube_pos di["cube_quat"] = cube_quat gripper_site_pos = np.array(self.sim.data.site_xpos[self.eef_site_id]) di["gripper_to_cube"] = gripper_site_pos - cube_pos di["object-state"] = np.concatenate( [cube_pos, cube_quat, di["gripper_to_cube"]] ) return di def _check_contact(self): """ Returns True if gripper is in contact with an object. """ collision = False for contact in self.sim.data.contact[: self.sim.data.ncon]: if ( self.sim.model.geom_id2name(contact.geom1) in self.gripper.contact_geoms() or self.sim.model.geom_id2name(contact.geom2) in self.gripper.contact_geoms() ): collision = True break return collision def _check_success(self): """ Returns True if task has been completed. """ cube_height = self.object.get_top_offset()[-1]#self.sim.data.body_xpos[self.cube_body_id][2] table_height = self.table_full_size[2] # cube is higher than the table top above a margin # return cube_height > table_height + 0.04 return self.sim.data.body_xpos[self.cube_body_id][2] > (cube_height + table_height + 0.05) def _gripper_visualization(self): """ Do any needed visualization here. Overrides superclass implementations. """ # color the gripper site appropriately based on distance to cube if self.gripper_visualization: # get distance to cube cube_site_id = self.sim.model.site_name2id("cube") dist = np.sum( np.square( self.sim.data.site_xpos[cube_site_id] - self.sim.data.get_site_xpos("grip_site") ) ) # set RGBA for the EEF site here max_dist = 0.1 scaled = (1.0 - min(dist / max_dist, 1.)) ** 15 rgba = np.zeros(4) rgba[0] = 1 - scaled rgba[1] = scaled rgba[3] = 0.5 self.sim.model.site_rgba[self.eef_site_id] = rgba