def __init__(self): self.pr = PyRep() self.pr.launch(SCENE_FILE, headless=False) self.pr.start() # self.agent = Panda() self.agent = Sawyer() self.gripper = BaxterGripper() self.agent.set_control_loop_enabled(False) self.agent.set_motor_locked_at_zero_velocity(True) self.target = Shape('target') self.agent_ee_tip = self.agent.get_tip() self.initial_joint_positions = self.agent.get_joint_positions()
class ReacherEnv(object): def __init__(self): self.pr = PyRep() self.pr.launch(SCENE_FILE, headless=False) self.pr.start() # self.agent = Panda() self.agent = Sawyer() self.gripper = BaxterGripper() self.agent.set_control_loop_enabled(False) self.agent.set_motor_locked_at_zero_velocity(True) self.target = Shape('target') self.agent_ee_tip = self.agent.get_tip() self.initial_joint_positions = self.agent.get_joint_positions() def _get_state(self): # Return state containing arm joint angles/velocities & target position return (self.agent.get_joint_positions() + self.agent.get_joint_velocities() + self.target.get_position()) def reset(self): # Get a random position within a cuboid and set the target position pos = list(np.random.uniform(POS_MIN, POS_MAX)) self.target.set_position(pos) self.agent.set_joint_positions(self.initial_joint_positions) self.pr.step() return self._get_state() def step(self, action): self.agent.set_joint_target_velocities(action) # Execute action on arm self.pr.step() # Step the physics simulation ax, ay, az = self.agent_ee_tip.get_position() tx, ty, tz = self.target.get_position() # Reward is negative distance to target distance = (ax - tx)**2 + (ay - ty)**2 + (az - tz)**2 done = False if distance < 5: done = True self.gripper.actuate( 0, velocity=0.04 ) # if done, close the hand, 0 for close and 1 for open. self.pr.step() # Step the physics simulation reward = -np.sqrt(distance) return self._get_state(), reward, done def shutdown(self): self.pr.stop() self.pr.shutdown()
def __init__(self, visualize=True, mode="train", **params): super().__init__(visualize=visualize, mode=mode) # Scene selection scene_file_path = os.path.join( os.getcwd(), 'deep_simulation/scenes/UR10_gripper.ttt') # Simulator launch self.env = PyRep() self.env.launch(scene_file_path, headless=False) self.env.start() self.env.step() # Task related initialisations in Simulator self.vision_sensor = objects.vision_sensor.VisionSensor( "Vision_sensor") self.gripper = objects.dummy.Dummy("UR10_target") self.gripper_zero_pose = self.gripper.get_pose() self.goal = objects.dummy.Dummy("goal_target") self.goal_STL = objects.shape.Shape("goal") self.goal_STL_zero_pose = self.goal_STL.get_pose() self.grasped_STL = objects.shape.Shape("BaxterGripper") self.stacking_area = objects.shape.Shape("Plane") self.vision_sensor = objects.vision_sensor.VisionSensor( "Vision_sensor") self.baxter_gripper = BaxterGripper() self.UR10_arm = UR10() self.gripper_dummy = objects.dummy.Dummy("gripper_dummy") self.step_counter = 0 self.max_step_count = 100 self.target_pose = None self.initial_distance = None self.image_width, self.image_height = 320, 240 self.vision_sensor.set_resolution( (self.image_width, self.image_height)) self._history_len = 1 self._observation_space = Dict({ "cam_image": Box(0, 255, [self.image_height, self.image_width, 1], dtype=np.uint8) }) self._action_space = Box(-1, 1, (3, )) self._state_space = extend_space(self._observation_space, self._history_len)
def __init__(self, headless, control_mode='end_position', visual_control=False): ''' :visual_control: bool, controlled by visual state or not (vector state). ''' self.reward_offset = 10.0 self.reward_range = self.reward_offset # reward range for register gym env when using vectorized env wrapper self.fall_down_offset = 0.1 # for judging the target object fall off the table self.metadata = [] # gym env format self.visual_control = visual_control self.control_mode = control_mode self.pr = PyRep() if control_mode == 'end_position': # need to use different scene, the one with all joints in inverse kinematics mode SCENE_FILE = join(dirname(abspath(__file__)), './scenes/sawyer_reacher_rl_new_ik.ttt') elif control_mode == 'joint_velocity': # the scene with all joints in force/torche mode for forward kinematics SCENE_FILE = join(dirname(abspath(__file__)), './scenes/sawyer_reacher_rl_new.ttt') self.pr.launch(SCENE_FILE, headless=headless) self.pr.start() self.agent = Sawyer() self.gripper = BaxterGripper() self.gripper_left_pad = Shape( 'BaxterGripper_leftPad') # the pad on the gripper finger self.proximity_sensor = ProximitySensor( 'BaxterGripper_attachProxSensor' ) # need the name of the sensor here self.vision_sensor = VisionSensor( 'Vision_sensor') # need the name of the sensor here if control_mode == 'end_position': self.agent.set_control_loop_enabled(True) # if false, won't work self.action_space = np.zeros( 4) # 3 DOF end position control + 1 rotation of gripper elif control_mode == 'joint_velocity': self.agent.set_control_loop_enabled(False) self.action_space = np.zeros( 8) # 7 DOF velocity control + 1 rotation of gripper else: raise NotImplementedError if self.visual_control == False: self.observation_space = np.zeros( 17 ) # position and velocity of 7 joints + position of the target else: self.observation_space = np.zeros(100) # dim of img! self.agent.set_motor_locked_at_zero_velocity(True) self.target = Shape('target') # object self.tip_target = Dummy( 'Sawyer_target') # the target point of the tip to move towards # self.table = Shape('diningTable') self.agent_ee_tip = self.agent.get_tip() self.tip_pos = self.agent_ee_tip.get_position() self.tip_quat = self.agent_ee_tip.get_quaternion( ) # tip rotation as quaternion, if not control the rotation # set a proper initial gesture/tip position if control_mode == 'end_position': # agent_position=self.agent.get_position() # initial_pos_offset = [0.4, 0.3, 0.2] # initial relative position of gripper to the whole arm # initial_pos = [(a + o) for (a, o) in zip(agent_position, initial_pos_offset)] initial_pos = [0.3, 0.1, 0.9] self.tip_target.set_position(initial_pos) # one big step for rotation setting is enough, with reset_dynamics=True, set the rotation instantaneously self.tip_target.set_orientation( [0, 3.1415, 1.5707], reset_dynamics=True) # make gripper face downwards self.pr.step() elif control_mode == 'joint_velocity': self.initial_joint_positions = [ 0.001815199851989746, -1.4224984645843506, 0.704303503036499, 2.54307222366333, 2.972468852996826, -0.4989511966705322, 4.105560302734375 ] self.agent.set_joint_positions(self.initial_joint_positions) self.initial_joint_positions = self.agent.get_joint_positions() self.initial_tip_positions = self.agent_ee_tip.get_position() self.initial_target_positions = self.target.get_position()
class ReacherEnv(object): def __init__(self, headless, control_mode='end_position', visual_control=False): ''' :visual_control: bool, controlled by visual state or not (vector state). ''' self.reward_offset = 10.0 self.reward_range = self.reward_offset # reward range for register gym env when using vectorized env wrapper self.fall_down_offset = 0.1 # for judging the target object fall off the table self.metadata = [] # gym env format self.visual_control = visual_control self.control_mode = control_mode self.pr = PyRep() if control_mode == 'end_position': # need to use different scene, the one with all joints in inverse kinematics mode SCENE_FILE = join(dirname(abspath(__file__)), './scenes/sawyer_reacher_rl_new_ik.ttt') elif control_mode == 'joint_velocity': # the scene with all joints in force/torche mode for forward kinematics SCENE_FILE = join(dirname(abspath(__file__)), './scenes/sawyer_reacher_rl_new.ttt') self.pr.launch(SCENE_FILE, headless=headless) self.pr.start() self.agent = Sawyer() self.gripper = BaxterGripper() self.gripper_left_pad = Shape( 'BaxterGripper_leftPad') # the pad on the gripper finger self.proximity_sensor = ProximitySensor( 'BaxterGripper_attachProxSensor' ) # need the name of the sensor here self.vision_sensor = VisionSensor( 'Vision_sensor') # need the name of the sensor here if control_mode == 'end_position': self.agent.set_control_loop_enabled(True) # if false, won't work self.action_space = np.zeros( 4) # 3 DOF end position control + 1 rotation of gripper elif control_mode == 'joint_velocity': self.agent.set_control_loop_enabled(False) self.action_space = np.zeros( 8) # 7 DOF velocity control + 1 rotation of gripper else: raise NotImplementedError if self.visual_control == False: self.observation_space = np.zeros( 17 ) # position and velocity of 7 joints + position of the target else: self.observation_space = np.zeros(100) # dim of img! self.agent.set_motor_locked_at_zero_velocity(True) self.target = Shape('target') # object self.tip_target = Dummy( 'Sawyer_target') # the target point of the tip to move towards # self.table = Shape('diningTable') self.agent_ee_tip = self.agent.get_tip() self.tip_pos = self.agent_ee_tip.get_position() self.tip_quat = self.agent_ee_tip.get_quaternion( ) # tip rotation as quaternion, if not control the rotation # set a proper initial gesture/tip position if control_mode == 'end_position': # agent_position=self.agent.get_position() # initial_pos_offset = [0.4, 0.3, 0.2] # initial relative position of gripper to the whole arm # initial_pos = [(a + o) for (a, o) in zip(agent_position, initial_pos_offset)] initial_pos = [0.3, 0.1, 0.9] self.tip_target.set_position(initial_pos) # one big step for rotation setting is enough, with reset_dynamics=True, set the rotation instantaneously self.tip_target.set_orientation( [0, 3.1415, 1.5707], reset_dynamics=True) # make gripper face downwards self.pr.step() elif control_mode == 'joint_velocity': self.initial_joint_positions = [ 0.001815199851989746, -1.4224984645843506, 0.704303503036499, 2.54307222366333, 2.972468852996826, -0.4989511966705322, 4.105560302734375 ] self.agent.set_joint_positions(self.initial_joint_positions) self.initial_joint_positions = self.agent.get_joint_positions() self.initial_tip_positions = self.agent_ee_tip.get_position() self.initial_target_positions = self.target.get_position() def _get_state(self): # Return state containing arm joint angles/velocities & target position return np.array(self.agent.get_joint_positions() + self.agent.get_joint_velocities() + self.target.get_position()) def _is_holding(self): ''' Return is holding the target or not, return bool. ''' # Nothe that the collision check is not always accurate all the time, # for continuous conllision, maybe only the first 4-5 frames of collision can be detected pad_collide_object = self.gripper_left_pad.check_collision(self.target) if pad_collide_object and self.proximity_sensor.is_detected( self.target) == True: return True else: return False def _get_visual_state(self): # Return a numpy array of size (width, height, 3) return self.vision_sensor.capture_rgb( ) # A numpy array of size (width, height, 3) def _is_holding(self): # Return is holding the target or not, return bool # Nothe that the collision check is not always accurate all the time, # for continuous conllision, maybe only the first 4-5 frames of collision can be detected pad_collide_object = self.gripper_left_pad.check_collision(self.target) if pad_collide_object and self.proximity_sensor.is_detected( self.target) == True: return True else: return False # def _move(self, action): # ''' # Move the tip according to the action with inverse kinematics for 'end_position' control; # with control of tip target in inverse kinematics mode instead of using .solve_ik() in forward kinematics mode. # ''' # robot_moving_unit=0.01 # the amount of single step move of robot, not accurate # moving_loop_itr=int(np.sum(np.abs(action[:3]))/robot_moving_unit)+1 # adaptive number of moving steps, with minimal of 1 step. # # print(moving_loop_itr) # small_step = list(1./moving_loop_itr*np.array(action)) # break the action into small steps, as the robot cannot move to the target position within one frame # pos=self.agent_ee_tip.get_position() # ''' # there is a mismatch between the object set_orientation() and get_orientation(): # the (x,y,z) in set_orientation() will be (y,x,-z) in get_orientation(). # ''' # ori_z=-self.agent_ee_tip.get_orientation()[2] # the minus is because the mismatch between the set and get # assert len(small_step) == len(pos)+1 # 3 values for position, 1 value for rotation # for _ in range(moving_loop_itr): # for idx in range(len(pos)): # pos[idx] += small_step[idx] # self.tip_target.set_position(pos) # self.pr.step() # ''' deprecated! no need to use small steps for the rotation with reset_dynamics=True''' # # ori_z+=small_step[3] # change the orientation along z-axis with a small step # # self.tip_target.set_orientation([0,3.1415,ori_z], reset_dynamics=True) # make gripper face downwards # # self.pr.step() # ''' one big step for z-rotation is enough, with reset_dynamics=True, set the rotation instantaneously ''' # ori_z+=action[3] # self.tip_target.set_orientation([0,3.1415,ori_z], reset_dynamics=True) # make gripper face downwards # self.pr.step() def _move(self, action): ''' Move the tip according to the action with inverse kinematics for 'end_position' control; with control of tip target in inverse kinematics mode instead of using .solve_ik() in forward kinematics mode. Mode 2: a close-loop control, using ik. ''' pos = self.gripper.get_position() bounding_offset = 0.1 step_factor = 0.2 # small step factor mulitplied on the gradient step calculated by inverse kinematics max_itr = 20 # maximum moving iterations max_error = 0.1 # upper bound of distance error for movement at each call rotation_norm = 5. # factor for normalization of rotation values # check if state+action will be within of the bounding box, if so, move normally; else no action. # x_min < x < x_max and y_min < y < y_max and z > z_min if pos[0]+action[0]>POS_MIN[0]-bounding_offset and pos[0]+action[0]<POS_MAX[0]+bounding_offset \ and pos[1]+action[1] > POS_MIN[1]-bounding_offset and pos[1]+action[1] < POS_MAX[1]+bounding_offset \ and pos[2]+action[2] > POS_MIN[2]-bounding_offset: ''' there is a mismatch between the object set_orientation() and get_orientation(): the (x,y,z) in set_orientation() will be (y,x,-z) in get_orientation(). ''' ori_z = -self.agent_ee_tip.get_orientation()[ 2] # the minus is because the mismatch between the set and get target_pos = np.array(self.agent_ee_tip.get_position()) + np.array( action[:3]) diff = 1 itr = 0 # print('before: ', ori_z) while np.sum(np.abs(diff)) > max_error and itr < max_itr: itr += 1 # set pos in small step cur_pos = self.agent_ee_tip.get_position() diff = target_pos - cur_pos pos = cur_pos + step_factor * diff self.tip_target.set_position(pos.tolist()) self.pr.step() ''' one big step for z-rotation is enough ''' ori_z += rotation_norm * action[3] self.tip_target.set_orientation([0, np.pi, ori_z ]) # make gripper face downwards self.pr.step() else: # print("Potential Movement Out of the Bounding Box!") pass # no action if potentially out of the bounding box def reset(self): # Get a random position within a cuboid and set the target position max_itr = 10 pos = list(np.random.uniform(POS_MIN, POS_MAX)) self.target.set_position(pos) self.target.set_orientation([0, 0, 0]) self.pr.step() # changing the color or texture for domain randomization self.target.set_color( np.random.uniform(low=0, high=1, size=3).tolist() ) # set [r,g,b] 3 channel values of object color # set end position to be initialized if self.control_mode == 'end_position': # JointMode.IK self.agent.set_control_loop_enabled(True) self.tip_target.set_position( self.initial_tip_positions ) # cannot set joint positions directly due to in ik mode nor force/torch mode self.pr.step() # prevent stuck case itr = 0 while np.sum( np.abs( np.array(self.agent_ee_tip.get_position() - np.array(self.initial_tip_positions))) ) > 0.1 and itr < max_itr: itr += 1 self.step(np.random.uniform( -0.2, 0.2, 4)) # take random actions for preventing the stuck cases self.pr.step() elif self.control_mode == 'joint_velocity': # JointMode.FORCE self.agent.set_joint_positions( self.initial_joint_positions ) # sometimes the gripper is stuck, cannot get back to initial self.pr.step() # self.table.set_collidable(True) self.gripper_left_pad.set_collidable( True ) # set the pad on the gripper to be collidable, so as to check collision self.target.set_collidable(True) if np.sum(self.gripper.get_open_amount()) < 1.5: self.gripper.actuate(1, velocity=0.5) # open the gripper self.pr.step() if self.visual_control: return self._get_visual_state() else: return self._get_state() def step(self, action): ''' Move the robot arm according to the action. If control_mode=='joint_velocity', action is 7 dim of joint velocity values + 1 dim of gripper rotation. if control_mode=='end_position', action is 3 dim of tip (end of robot arm) position values + 1 dim of gripper rotation. ''' if self.control_mode == 'end_position': if action is None or action.shape[0] != 4: action = list(np.random.uniform(-0.1, 0.1, 4)) # random self._move(action) elif self.control_mode == 'joint_velocity': if action is None or action.shape[0] != 7: print('No actions or wrong action dimensions!') action = list(np.random.uniform(-0.1, 0.1, 7)) # random self.agent.set_joint_target_velocities( action) # Execute action on arm self.pr.step() else: raise NotImplementedError ax, ay, az = self.gripper.get_position() tx, ty, tz = self.target.get_position() # Reward is negative distance to target distance = (ax - tx)**2 + (ay - ty)**2 + (az - tz)**2 done = False # print(self.proximity_sensor.is_detected(self.target)) current_vision = self.vision_sensor.capture_rgb( ) # capture a screenshot of the view with vision sensor plt.imshow(current_vision) plt.savefig('./img/vision.png') reward = 0 # close the gripper if close enough to the object and the object is detected with the proximity sensor if distance < 0.1 and self.proximity_sensor.is_detected( self.target) == True: # make sure the gripper is open before grasping self.gripper.actuate(1, velocity=0.5) self.pr.step() self.gripper.actuate( 0, velocity=0.5 ) # if done, close the hand, 0 for close and 1 for open. self.pr.step() # Step the physics simulation if self._is_holding(): # reward for hold here! reward += 10 done = True else: self.gripper.actuate(1, velocity=0.5) self.pr.step() elif np.sum( self.gripper.get_open_amount() ) < 1.5: # if gripper is closed due to collision or esle, open it; .get_open_amount() return list of gripper joint values self.gripper.actuate(1, velocity=0.5) self.pr.step() else: pass reward -= np.sqrt(distance) if tz < self.initial_target_positions[ 2] - self.fall_down_offset: # the object fall off the table done = True reward = -self.reward_offset if self.visual_control: return self._get_visual_state(), reward, done, {} else: return self._get_state(), reward, done, {} def shutdown(self): self.pr.stop() self.pr.shutdown()
def test_get_duplicate_gripper(self): g = BaxterGripper(1) self.assertIsInstance(g, BaxterGripper)
from os.path import dirname, join, abspath from pyrep import PyRep from pyrep.robots.arms.baxter import BaxterLeft, BaxterRight from pyrep.robots.end_effectors.baxter_gripper import BaxterGripper from pyrep.objects.dummy import Dummy from pyrep.objects.shape import Shape SCENE_FILE = join(dirname(abspath(__file__)), 'scene_baxter_pick_and_pass.ttt') pr = PyRep() pr.launch(SCENE_FILE, headless=False) pr.start() baxter_left = BaxterLeft() baxter_right = BaxterRight() baxter_gripper_left = BaxterGripper(0) baxter_gripper_right = BaxterGripper(1) cup = Shape('Cup') waypoints = [Dummy('waypoint%d' % i) for i in range(7)] print('Planning path for left arm to cup ...') path = baxter_left.get_path(position=waypoints[0].get_position(), quaternion=waypoints[0].get_quaternion()) path.visualize() # Let's see what the path looks like print('Executing plan ...') done = False while not done: done = path.step() pr.step() path.clear_visualization()
def __init__(self, headless, control_mode='joint_velocity'): ''' parameters: :headless: bool, if True, no visualization, else with visualization. :control mode: str, 'end_position' or 'joint_velocity'. ''' # set public variables self.headless = headless # if headless is True, no visualization self.reward_offset = 10.0 # reward of achieving the grasping object self.reward_range = self.reward_offset # reward range for register gym env when using vectorized env wrapper self.penalty_offset = 1. # penalty value for undesired cases self.fall_down_offset = 0.1 # distance for judging the target object fall off the table self.metadata = [] # gym env argument self.control_mode = control_mode # the control mode of robotic arm: 'end_position' or 'joint_velocity' # launch and set up the scene, and set the proxy variables in represent of the counterparts in the scene self.pr = PyRep() # call the PyRep if control_mode == 'end_position': # need to use different scene, the one with all joints in inverse kinematics mode SCENE_FILE = join( dirname(abspath(__file__)), './scenes/sawyer_reacher_rl_new_ik.ttt' ) # scene with joints controlled by ik (inverse kinematics) elif control_mode == 'joint_velocity': # the scene with all joints in force/torch mode for forward kinematics SCENE_FILE = join( dirname(abspath(__file__)), './scenes/sawyer_reacher_rl_new.ttt' ) # scene with joints controlled by forward kinematics self.pr.launch(SCENE_FILE, headless=headless ) # lunch the scene, headless means no visualization self.pr.start() # start the scene self.agent = Sawyer() # get the robot arm in the scene self.gripper = BaxterGripper() # get the gripper in the scene self.gripper_left_pad = Shape( 'BaxterGripper_leftPad') # the left pad on the gripper finger self.proximity_sensor = ProximitySensor( 'BaxterGripper_attachProxSensor' ) # need the name of the sensor here self.vision_sensor = VisionSensor( 'Vision_sensor') # need the name of the sensor here self.table = Shape( 'diningTable') # the table in the scene for checking collision if control_mode == 'end_position': # control the robot arm by the position of its end using inverse kinematics self.agent.set_control_loop_enabled( True) # if false, inverse kinematics won't work self.action_space = np.zeros( 4) # 3 DOF end position control + 1 rotation of gripper elif control_mode == 'joint_velocity': # control the robot arm by directly setting velocity values on each joint, using forward kinematics self.agent.set_control_loop_enabled(False) self.action_space = np.zeros( 7 ) # 7 DOF velocity control, no need for extra control of end rotation, the 7th joint controls it. else: raise NotImplementedError self.observation_space = np.zeros( 17) # position and velocity of 7 joints + position of the target self.agent.set_motor_locked_at_zero_velocity(True) self.target = Shape('target') # get the target object self.agent_ee_tip = self.agent.get_tip( ) # a part of robot as the end of inverse kinematics chain for controlling self.tip_target = Dummy( 'Sawyer_target' ) # the target point of the tip (end of the robot arm) to move towards self.tip_pos = self.agent_ee_tip.get_position() # tip x,y,z position # set a proper initial robot gesture or tip position if control_mode == 'end_position': initial_pos = [0.3, 0.1, 0.9] self.tip_target.set_position(initial_pos) # one big step for rotation setting is enough, with reset_dynamics=True, set the rotation instantaneously self.tip_target.set_orientation( [0, np.pi, np.pi / 2], reset_dynamics=True ) # first two dimensions along x and y axis make gripper face downwards elif control_mode == 'joint_velocity': self.initial_joint_positions = [0.001815199851989746, -1.4224984645843506, \ 0.704303503036499, 2.54307222366333, 2.972468852996826, -0.4989511966705322, 4.105560302734375] # a proper initial gesture self.agent.set_joint_positions(self.initial_joint_positions) self.pr.step() self.initial_tip_positions = self.agent_ee_tip.get_position() self.initial_target_positions = self.target.get_position()
class GraspEnv(object): ''' Sawyer robot grasping a cuboid ''' def __init__(self, headless, control_mode='joint_velocity'): ''' parameters: :headless: bool, if True, no visualization, else with visualization. :control mode: str, 'end_position' or 'joint_velocity'. ''' # set public variables self.headless = headless # if headless is True, no visualization self.reward_offset = 10.0 # reward of achieving the grasping object self.reward_range = self.reward_offset # reward range for register gym env when using vectorized env wrapper self.penalty_offset = 1. # penalty value for undesired cases self.fall_down_offset = 0.1 # distance for judging the target object fall off the table self.metadata = [] # gym env argument self.control_mode = control_mode # the control mode of robotic arm: 'end_position' or 'joint_velocity' # launch and set up the scene, and set the proxy variables in represent of the counterparts in the scene self.pr = PyRep() # call the PyRep if control_mode == 'end_position': # need to use different scene, the one with all joints in inverse kinematics mode SCENE_FILE = join( dirname(abspath(__file__)), './scenes/sawyer_reacher_rl_new_ik.ttt' ) # scene with joints controlled by ik (inverse kinematics) elif control_mode == 'joint_velocity': # the scene with all joints in force/torch mode for forward kinematics SCENE_FILE = join( dirname(abspath(__file__)), './scenes/sawyer_reacher_rl_new.ttt' ) # scene with joints controlled by forward kinematics self.pr.launch(SCENE_FILE, headless=headless ) # lunch the scene, headless means no visualization self.pr.start() # start the scene self.agent = Sawyer() # get the robot arm in the scene self.gripper = BaxterGripper() # get the gripper in the scene self.gripper_left_pad = Shape( 'BaxterGripper_leftPad') # the left pad on the gripper finger self.proximity_sensor = ProximitySensor( 'BaxterGripper_attachProxSensor' ) # need the name of the sensor here self.vision_sensor = VisionSensor( 'Vision_sensor') # need the name of the sensor here self.table = Shape( 'diningTable') # the table in the scene for checking collision if control_mode == 'end_position': # control the robot arm by the position of its end using inverse kinematics self.agent.set_control_loop_enabled( True) # if false, inverse kinematics won't work self.action_space = np.zeros( 4) # 3 DOF end position control + 1 rotation of gripper elif control_mode == 'joint_velocity': # control the robot arm by directly setting velocity values on each joint, using forward kinematics self.agent.set_control_loop_enabled(False) self.action_space = np.zeros( 7 ) # 7 DOF velocity control, no need for extra control of end rotation, the 7th joint controls it. else: raise NotImplementedError self.observation_space = np.zeros( 17) # position and velocity of 7 joints + position of the target self.agent.set_motor_locked_at_zero_velocity(True) self.target = Shape('target') # get the target object self.agent_ee_tip = self.agent.get_tip( ) # a part of robot as the end of inverse kinematics chain for controlling self.tip_target = Dummy( 'Sawyer_target' ) # the target point of the tip (end of the robot arm) to move towards self.tip_pos = self.agent_ee_tip.get_position() # tip x,y,z position # set a proper initial robot gesture or tip position if control_mode == 'end_position': initial_pos = [0.3, 0.1, 0.9] self.tip_target.set_position(initial_pos) # one big step for rotation setting is enough, with reset_dynamics=True, set the rotation instantaneously self.tip_target.set_orientation( [0, np.pi, np.pi / 2], reset_dynamics=True ) # first two dimensions along x and y axis make gripper face downwards elif control_mode == 'joint_velocity': self.initial_joint_positions = [0.001815199851989746, -1.4224984645843506, \ 0.704303503036499, 2.54307222366333, 2.972468852996826, -0.4989511966705322, 4.105560302734375] # a proper initial gesture self.agent.set_joint_positions(self.initial_joint_positions) self.pr.step() self.initial_tip_positions = self.agent_ee_tip.get_position() self.initial_target_positions = self.target.get_position() def _get_state(self): ''' Return state containing arm joint positions/velocities & target position. ''' return np.array(self.agent.get_joint_positions() + # list, dim=7 self.agent.get_joint_velocities() + # list, dim=7 self.target.get_position()) # list, dim=3 def _is_holding(self): ''' Return the state of holding the target or not, return bool. ''' # Note that the collision check is not always accurate all the time, # for continuous collision frames, maybe only the first 4-5 frames of collision can be detected. pad_collide_object = self.gripper_left_pad.check_collision(self.target) if pad_collide_object and self.proximity_sensor.is_detected( self.target) == True: return True else: return False def _move(self, action, bounding_offset=0.15, step_factor=0.2, max_itr=20, max_error=0.05, rotation_norm=5.): ''' Move the end effector on robot arm according to the action with inverse kinematics for 'end_position' control mode; Inverse kinematics mode control is achieved through setting the tip target instead of using .solve_ik(), because sometimes the .solve_ik() does not function correctly. Mode: a close-loop proportional control, using ik. parameters: :bounding_offset: offset of bounding box outside the valid target position range, as valid and safe range of action :step_factor: small step factor mulitplied on the difference of current and desired position, i.e. proportional factor :max_itr: maximum moving iterations :max_error: upper bound of distance error for movement at each call :rotation_norm: factor for normalization of rotation values, since the action are of the same scale for each dimension ''' pos = self.gripper.get_position() # check if state+action will be within of the bounding box, if so, move normally; otherwise the action is not conducted. # i.e. x_min < x < x_max and y_min < y < y_max and z > z_min if pos[0]+action[0]>POS_MIN[0]-bounding_offset and pos[0]+action[0]<POS_MAX[0]+bounding_offset \ and pos[1]+action[1] > POS_MIN[1]-bounding_offset and pos[1]+action[1] < POS_MAX[1]+2*bounding_offset \ and pos[2]+action[2] > POS_MIN[2]-2*bounding_offset: # larger offset in z axis # there is a mismatch between the object set_orientation() and get_orientation(): # the (x,y,z) in set_orientation() will be (y,x,-z) in get_orientation(). ori_z = -self.agent_ee_tip.get_orientation( )[2] # the minus is because the mismatch between the set_orientation() and get_orientation() target_pos = np.array(self.agent_ee_tip.get_position()) + np.array( action[:3]) diff = 1 # intialization itr = 0 while np.sum(np.abs(diff)) > max_error and itr < max_itr: itr += 1 # set pos in small step cur_pos = self.agent_ee_tip.get_position() diff = target_pos - cur_pos # difference of current and target position, close-loop control pos = cur_pos + step_factor * diff # step small step according to current difference, to prevent that ik cannot be solved self.tip_target.set_position(pos.tolist()) self.pr.step( ) # every time when setting target tip, need to call simulation step to achieve it # one big step for z-rotation is enough, but small error still exists due to the ik solver ori_z += rotation_norm * action[ 3] # normalize the rotation values, as usually same action range is used in policy for both rotation and position self.tip_target.set_orientation([ 0, np.pi, ori_z ]) # make gripper face downwards and rotate ori_z along z axis self.pr.step() else: print("Potential Movement Out of the Bounding Box!") pass # no action if potentially moving out of the bounding box def reinit(self): ''' Reinitialize the environment, e.g. when the gripper is broken during exploration. ''' self.shutdown() # shutdown the original env first self.__init__(self.headless) # initialize with the same headless mode def reset(self, random_target=False): ''' Get a random position within a cuboid and set the target position. ''' # set target object if random_target: # randomize pos = list(np.random.uniform( POS_MIN, POS_MAX)) # sample from uniform in valid range self.target.set_position(pos) # random position else: # non-randomize self.target.set_position( self.initial_target_positions) # fixed position self.target.set_orientation([0, 0, 0]) self.pr.step() # set end position to be initialized if self.control_mode == 'end_position': # JointMode.IK self.agent.set_control_loop_enabled(True) # ik mode self.tip_target.set_position( self.initial_tip_positions ) # cannot set joint positions directly due to in ik mode or force/torch mode self.pr.step() # prevent stuck cases. as using ik for moving, stucking can make ik cannot be solved therefore not reset correctly, therefore taking # some random action when desired position is not reached. itr = 0 max_itr = 10 while np.sum( np.abs( np.array(self.agent_ee_tip.get_position() - np.array(self.initial_tip_positions))) ) > 0.1 and itr < max_itr: itr += 1 self.step(np.random.uniform( -0.2, 0.2, 4)) # take random actions for preventing the stuck cases self.pr.step() elif self.control_mode == 'joint_velocity': # JointMode.FORCE self.agent.set_joint_positions(self.initial_joint_positions) self.pr.step() # set collidable, for collision detection self.gripper_left_pad.set_collidable( True ) # set the pad on the gripper to be collidable, so as to check collision self.target.set_collidable(True) # open the gripper if it's not fully open if np.sum(self.gripper.get_open_amount()) < 1.5: self.gripper.actuate(1, velocity=0.5) self.pr.step() return self._get_state() # return current state of the environment def step(self, action): ''' Move the robot arm according to the action. If control_mode=='joint_velocity', action is 7 dim of joint velocity values + 1 dim rotation of gripper; if control_mode=='end_position', action is 3 dim of tip (end of robot arm) position values + 1 dim rotation of gripper; ''' # initialization done = False # episode finishes reward = 0 hold_flag = False # holding the object or not if self.control_mode == 'end_position': if action is None or action.shape[ 0] != 4: # check if action is valid print('No actions or wrong action dimensions!') action = list(np.random.uniform(-0.1, 0.1, 4)) # random self._move(action) elif self.control_mode == 'joint_velocity': if action is None or action.shape[ 0] != 7: # check if action is valid print('No actions or wrong action dimensions!') action = list(np.random.uniform(-0.1, 0.1, 7)) # random self.agent.set_joint_target_velocities( action) # Execute action on arm self.pr.step() else: raise NotImplementedError ax, ay, az = self.gripper.get_position() if math.isnan( ax): # capture the broken gripper cases during exploration print('Gripper position is nan.') self.reinit() done = True tx, ty, tz = self.target.get_position() offset = 0.08 # augmented reward: offset of target position above the target object sqr_distance = (ax - tx)**2 + (ay - ty)**2 + ( az - (tz + offset) )**2 # squared distance between the gripper and the target object ''' for visual-based control only, large time consumption! ''' # current_vision = self.vision_sensor.capture_rgb() # capture a screenshot of the view with vision sensor # plt.imshow(current_vision) # plt.savefig('./img/vision.png') # close the gripper if close enough to the object and the object is detected with the proximity sensor if sqr_distance < 0.1 and self.proximity_sensor.is_detected( self.target) == True: # make sure the gripper is open before grasping self.gripper.actuate(1, velocity=0.5) self.pr.step() self.gripper.actuate( 0, velocity=0.5 ) # if done, close the hand, 0 for close and 1 for open; velocity 0.5 ensures the gripper to close with in one frame self.pr.step() # Step the physics simulation if self._is_holding(): reward += self.reward_offset # extra reward for grasping the object done = True hold_flag = True else: self.gripper.actuate(1, velocity=0.5) self.pr.step() elif np.sum( self.gripper.get_open_amount() ) < 1.5: # if gripper is closed (not fully open) due to collision or esle, open it; get_open_amount() return list of gripper joint values self.gripper.actuate(1, velocity=0.5) self.pr.step() else: pass # the base reward is negative distance to target reward -= np.sqrt(sqr_distance) # case when the object fall off the table if tz < self.initial_target_positions[2] - self.fall_down_offset: done = True reward = -self.reward_offset # Augmented reward for orientation: better grasping gesture if the gripper has vertical orientation to the target object. # Note: the frame of gripper has a difference of pi/2 in z orientation as the frame of target. desired_orientation = np.concatenate( ([np.pi, 0], [self.target.get_orientation()[2] ])) # gripper vertical to target in z and facing downwards, rotation_penalty = -np.sum( np.abs( np.array(self.agent_ee_tip.get_orientation()) - desired_orientation)) rotation_norm = 0.02 reward += rotation_norm * rotation_penalty # Penalty for collision with the table if self.gripper_left_pad.check_collision(self.table): reward -= self.penalty_offset #print('Penalize collision with table.') if math.isnan(reward): # capture the cases of numerical problem reward = 0. return self._get_state(), reward, done, {'finished': hold_flag} def shutdown(self): ''' Close the simulator ''' self.pr.stop() self.pr.shutdown()
def __init__(self, headless, control_mode='joint_velocity'): ''' :headless: bool, if True, no visualization, else with visualization. :control mode: str, 'end_position' or 'joint_velocity'. ''' self.headless = headless self.reward_offset = 10.0 # reward of achieving the grasping object self.reward_range = self.reward_offset # reward range for register gym env when using vectorized env wrapper self.fall_down_offset = 0.1 # for judging the target object fall off the table self.metadata = [] # gym env format self.control_mode = control_mode # the control mode of robotic arm: 'end_position' or 'joint_velocity' self.pr = PyRep() if control_mode == 'end_position': # need to use different scene, the one with all joints in inverse kinematics mode SCENE_FILE = join( dirname(abspath(__file__)), './scenes/sawyer_reacher_rl_new_ik.ttt' ) # scene with joints controlled by ik (inverse kinematics) elif control_mode == 'joint_velocity': # the scene with all joints in force/torch mode for forward kinematics SCENE_FILE = join( dirname(abspath(__file__)), './scenes/sawyer_reacher_rl_new.ttt' ) # scene with joints controlled by forward kinematics self.pr.launch(SCENE_FILE, headless=headless ) # lunch the scene, headless means no visualization self.pr.start() # start the scene self.agent = Sawyer() # get the robot arm in the scene self.gripper = BaxterGripper() # get the gripper in the scene self.gripper_left_pad = Shape( 'BaxterGripper_leftPad') # the pad on the gripper finger self.proximity_sensor = ProximitySensor( 'BaxterGripper_attachProxSensor' ) # need the name of the sensor here self.vision_sensor = VisionSensor( 'Vision_sensor') # need the name of the sensor here self.initial_joint_positions = [ 0.162, -1.109, 0.259, 1.866, 2.949, -0.811, 4.440 ] # a good staring gesture of robot, avoid stucking and colliding self.agent.set_joint_positions(self.initial_joint_positions) if control_mode == 'end_position': # control the robot arm by the position of its end using inverse kinematics self.agent.set_control_loop_enabled( True) # if false, ik won't work self.action_space = np.zeros( 4) # 3 DOF end position control + 1 rotation of gripper elif control_mode == 'joint_velocity': # control the robot arm by directly setting velocity values on each joint, forward kinematics self.agent.set_control_loop_enabled(False) self.action_space = np.zeros( 7 ) # 7 DOF velocity control, no need for extra control of end rotation, the 7th joint controls it. else: raise NotImplementedError self.observation_space = np.zeros( 17) # position and velocity of 7 joints + position of the target self.agent.set_motor_locked_at_zero_velocity(True) self.target = Shape('target') # get the target object self.agent_ee_tip = self.agent.get_tip( ) # a part of robot as the end of inverse kinematics chain for controlling self.tip_target = Dummy( 'Sawyer_target' ) # the target point of the tip (end of the robot arm) to move towards self.tip_pos = self.agent_ee_tip.get_position() # tip x,y,z position self.tip_quat = self.agent_ee_tip.get_quaternion( ) # tip rotation as quaternion, if not control the rotation # reference for reset self.initial_tip_positions = [0.3, 0.1, 1.] self.initial_z_rotation = np.pi / 2. self.initial_target_positions = self.target.get_position()
class ReacherEnv(object): def __init__(self, headless, control_mode='joint_velocity'): ''' :headless: bool, if True, no visualization, else with visualization. :control mode: str, 'end_position' or 'joint_velocity'. ''' self.headless = headless self.reward_offset = 10.0 # reward of achieving the grasping object self.reward_range = self.reward_offset # reward range for register gym env when using vectorized env wrapper self.fall_down_offset = 0.1 # for judging the target object fall off the table self.metadata = [] # gym env format self.control_mode = control_mode # the control mode of robotic arm: 'end_position' or 'joint_velocity' self.pr = PyRep() if control_mode == 'end_position': # need to use different scene, the one with all joints in inverse kinematics mode SCENE_FILE = join( dirname(abspath(__file__)), './scenes/sawyer_reacher_rl_new_ik.ttt' ) # scene with joints controlled by ik (inverse kinematics) elif control_mode == 'joint_velocity': # the scene with all joints in force/torch mode for forward kinematics SCENE_FILE = join( dirname(abspath(__file__)), './scenes/sawyer_reacher_rl_new.ttt' ) # scene with joints controlled by forward kinematics self.pr.launch(SCENE_FILE, headless=headless ) # lunch the scene, headless means no visualization self.pr.start() # start the scene self.agent = Sawyer() # get the robot arm in the scene self.gripper = BaxterGripper() # get the gripper in the scene self.gripper_left_pad = Shape( 'BaxterGripper_leftPad') # the pad on the gripper finger self.proximity_sensor = ProximitySensor( 'BaxterGripper_attachProxSensor' ) # need the name of the sensor here self.vision_sensor = VisionSensor( 'Vision_sensor') # need the name of the sensor here self.initial_joint_positions = [ 0.162, -1.109, 0.259, 1.866, 2.949, -0.811, 4.440 ] # a good staring gesture of robot, avoid stucking and colliding self.agent.set_joint_positions(self.initial_joint_positions) if control_mode == 'end_position': # control the robot arm by the position of its end using inverse kinematics self.agent.set_control_loop_enabled( True) # if false, ik won't work self.action_space = np.zeros( 4) # 3 DOF end position control + 1 rotation of gripper elif control_mode == 'joint_velocity': # control the robot arm by directly setting velocity values on each joint, forward kinematics self.agent.set_control_loop_enabled(False) self.action_space = np.zeros( 7 ) # 7 DOF velocity control, no need for extra control of end rotation, the 7th joint controls it. else: raise NotImplementedError self.observation_space = np.zeros( 17) # position and velocity of 7 joints + position of the target self.agent.set_motor_locked_at_zero_velocity(True) self.target = Shape('target') # get the target object self.agent_ee_tip = self.agent.get_tip( ) # a part of robot as the end of inverse kinematics chain for controlling self.tip_target = Dummy( 'Sawyer_target' ) # the target point of the tip (end of the robot arm) to move towards self.tip_pos = self.agent_ee_tip.get_position() # tip x,y,z position self.tip_quat = self.agent_ee_tip.get_quaternion( ) # tip rotation as quaternion, if not control the rotation # reference for reset self.initial_tip_positions = [0.3, 0.1, 1.] self.initial_z_rotation = np.pi / 2. self.initial_target_positions = self.target.get_position() def _get_state(self): ''' Return state containing arm joint angles/velocities & target position. ''' return np.array(self.agent.get_joint_positions() + # list self.agent.get_joint_velocities() + # list self.target.get_position()) # list def _is_holding(self): ''' Return is holding the target or not, return bool. ''' # Note that the collision check is not always accurate all the time, # for continuous conllision, maybe only the first 4-5 frames of collision can be detected pad_collide_object = self.gripper_left_pad.check_collision(self.target) if pad_collide_object and self.proximity_sensor.is_detected( self.target) == True: return True else: return False # def _move(self, action): # ''' Mode 0: as given by original examples in Pyrep repo ''' # # Deprecated, always fail the ik! Move the tip according to the action with inverse kinematics for 'end_position' control. # action=list(action) # pos=self.agent_ee_tip.get_position() # assert len(action) == len(pos) # for idx in range(len(pos)): # pos[idx] += action[idx] # print('pos: ', pos) # new_joint_angles = self.agent.solve_ik(pos, quaternion=self.tip_quat) # self.agent.set_joint_target_positions(new_joint_angles) # def _move(self, action): # ''' # Deprecated! # Move the tip according to the action with inverse kinematics for 'end_position' control; # with control of tip target in inverse kinematics mode instead of using .solve_ik() in forward kinematics mode. # Mode 1: an open-loop control, using ik; not as accurate as the closed-loop control. # ''' # pos=self.gripper.get_position() # bounding_offset=0.1 # robot_moving_unit=0.01 # the amount of single step move of robot, not accurate; the smaller the value, the smoother the movement. # # check if state+action will be within of the bounding box, if so, move normally; else no action. # # x_min < x < x_max and y_min < y < y_max and z > z_min # if pos[0]+action[0]>POS_MIN[0]-bounding_offset and pos[0]+action[0]<POS_MAX[0]+bounding_offset \ # and pos[1]+action[1] > POS_MIN[1]-bounding_offset and pos[1]+action[1] < POS_MAX[1]+bounding_offset \ # and pos[2]+action[2] > POS_MIN[2]-bounding_offset: # moving_loop_itr=int(np.sum(np.abs(action[:3]))/robot_moving_unit)+1 # adaptive number of moving steps, with minimal of 1 step; the larger it is, the more accurate for each movement. # small_step = list(1./moving_loop_itr*np.array(action)) # break the action into small steps, as the robot cannot move to the target position within one frame # ''' # there is a mismatch between the object set_orientation() and get_orientation(): # the (x,y,z) in set_orientation() will be (y,x,-z) in get_orientation(). # ''' # ori_z=-self.agent_ee_tip.get_orientation()[2] # the minus is because the mismatch between the set and get # assert len(small_step) == len(pos)+1 # 3 values for position, 1 value for rotation # for _ in range(moving_loop_itr): # for idx in range(len(pos)): # pos[idx] += small_step[idx] # self.tip_target.set_position(pos) # self.pr.step() # ''' deprecated! no need to use small steps for the rotation with reset_dynamics=True''' # ori_z+=small_step[3] # change the orientation along z-axis with a small step # self.tip_target.set_orientation([0, np.pi, ori_z], reset_dynamics=True) # make gripper face downwards # self.pr.step() # ''' one big step for z-rotation is enough, with reset_dynamics=True, set the rotation instantaneously ''' # # ori_z+=action[3] # # self.tip_target.set_orientation([0, np.pi, ori_z], reset_dynamics=True) # make gripper face downwards # # self.pr.step() # else: # print("Potential Movement Out of the Bounding Box!") # pass # no action if potentially out of the bounding box def _move(self, action, bounding_offset=0.15, step_factor=0.4, max_itr=40, max_error=0.01, rotation_norm=5.): ''' Move the tip according to the action with inverse kinematics for 'end_position' control; with control of tip target in inverse kinematics mode instead of using .solve_ik() in forward kinematics mode. Mode 2: a close-loop control, using ik. parameters: :bounding_offset: offset of bounding box and valid target position range, bounding box is larger :step_factor: small step factor mulitplied on the gradient step calculated by inverse kinematics :max_itr=20: maximum moving iterations :max_error: upper bound of distance error for movement at each call :rotation_norm: factor for normalization of rotation values ''' pos = self.gripper.get_position() # check if state+action will be within of the bounding box, if so, move normally; else action is not conducted. # i.e. x_min < x < x_max and y_min < y < y_max and z > z_min if pos[0]+action[0]>POS_MIN[0]-bounding_offset and pos[0]+action[0]<POS_MAX[0]+bounding_offset \ and pos[1]+action[1] > POS_MIN[1]-bounding_offset and pos[1]+action[1] < POS_MAX[1]+2*bounding_offset \ and pos[2]+action[2] > POS_MIN[2]-2*bounding_offset: # larger offset in z axis ''' there is a mismatch between the object set_orientation() and get_orientation(): the (x,y,z) in set_orientation() will be (y,x,-z) in get_orientation(). ''' ori_z = -self.agent_ee_tip.get_orientation()[ 2] # the minus is because the mismatch between the set and get target_pos = np.array(self.agent_ee_tip.get_position()) + np.array( action[:3]) diff = 1 itr = 0 while np.sum(np.abs(diff)) > max_error and itr < max_itr: # print(action[:3], np.sum(np.abs(diff))) itr += 1 # set pos in small step cur_pos = self.agent_ee_tip.get_position() diff = target_pos - cur_pos # difference of current and target position, close-loop control pos = cur_pos + step_factor * diff # step small step according to current difference, to prevent that ik cannot be solved self.tip_target.set_position(pos.tolist()) self.pr.step( ) # every time set target tip, need to call simulation step to achieve it ''' one big step for z-rotation is enough, but still small error exists ''' ori_z += rotation_norm * action[ 3] # normalize the rotation values, as usually same action range is used in policy for both rotation and position self.tip_target.set_orientation( [0, 3.1415, ori_z]) # make gripper face downwards and rotate ori_z self.pr.step() # print('after: ', ori_z, -self.agent_ee_tip.get_orientation()[2]) else: print("Potential Movement Out of the Bounding Box!") pass # no action if potentially moving out of the bounding box def reset(self, random_target=False): ''' Reset with the _move() function. Get a random position within a cuboid and set the target position. ''' if random_target: # randomly set the target position pos = list(np.random.uniform( POS_MIN, POS_MAX)) # sample from uniform in valid range self.target.set_position(pos) # random position else: self.target.set_position( self.initial_target_positions) # fixed position self.target.set_orientation([0, 0, 0]) self.pr.step() # set end position to be initialized if self.control_mode == 'end_position': # JointMode.IK curr_pos = self.agent_ee_tip.get_position() a = np.array(self.initial_tip_positions) - np.array(curr_pos) self._move(np.concatenate((a, [self.initial_z_rotation])), max_itr=80) # prevent stuck cases, as using ik for moving, stucking can make ik cannot be solved therefore not reset correctly itr = 0 max_itr = 10 while np.sum( np.abs( np.array(self.agent_ee_tip.get_position() - np.array(self.initial_tip_positions))) ) > 0.1 and itr < max_itr: itr += 1 self.step(np.random.uniform( -0.2, 0.2, 4)) # take random actions for preventing the stuck cases self.pr.step() elif self.control_mode == 'joint_velocity': # JointMode.FORCE self.agent.set_joint_positions( self.initial_joint_positions ) # sometimes the gripper is stuck, cannot get back to initial self.pr.step() # set collidable, for collision detection self.gripper_left_pad.set_collidable( True ) # set the pad on the gripper to be collidable, so as to check collision self.target.set_collidable(True) # open the gripper if it's not fully open if np.sum(self.gripper.get_open_amount()) < 1.5: self.gripper.actuate(1, velocity=0.5) self.pr.step() return self._get_state() # return current state of the environment # def reset(self, random_target=False): # ''' # Deprecated! # Get a random position within a cuboid and set the target position. # ''' # print('reset') # if random_target: # randomly set the target position # pos = list(np.random.uniform(POS_MIN, POS_MAX)) # sample from uniform in valid range # self.target.set_position(pos) # random position # else: # self.target.set_position(self.initial_target_positions) # fixed position # self.target.set_orientation([0,0,0]) # # set end position to be initialized # if self.control_mode == 'end_position': # JointMode.IK # self.agent.set_control_loop_enabled(True) # self.tip_target.set_position(self.initial_tip_positions) # cannot set joint positions directly due to in ik mode nor force/torch mode # self.pr.step() # # prevent stuck cases, as using ik for moving, stucking can make ik cannot be solved therefore not reset correctly # itr=0 # max_itr=10 # while np.sum(np.abs(np.array(self.agent_ee_tip.get_position()-np.array(self.initial_tip_positions))))>0.1 and itr<max_itr: # itr+=1 # self.step(np.random.uniform(-0.2,0.2,4)) # take random actions for preventing the stuck cases # self.pr.step() # elif self.control_mode == 'joint_velocity': # JointMode.FORCE # self.agent.set_joint_positions(self.initial_joint_positions) # sometimes the gripper is stuck, cannot get back to initial # # set collidable, for collision detection # self.gripper_left_pad.set_collidable(True) # set the pad on the gripper to be collidable, so as to check collision # self.target.set_collidable(True) # # open the gripper if it's not fully open # if np.sum(self.gripper.get_open_amount())<1.5: # self.gripper.actuate(1, velocity=0.5) # self.pr.step() # return self._get_state() # return current state of the environment def step(self, action): ''' Move the robot arm according to the action. If control_mode=='joint_velocity', action is 7 dim of joint velocity values + 1 dim rotation of gripper; if control_mode=='end_position', action is 3 dim of tip (end of robot arm) position values + 1 dim rotation of gripper; ''' done = False if self.control_mode == 'end_position': if action is None or action.shape[0] != 4: print('No actions or wrong action dimensions!') action = list(np.random.uniform(-0.1, 0.1, 4)) # random # print(action, self.agent_ee_tip.get_position()) self._move(action) elif self.control_mode == 'joint_velocity': if action is None or action.shape[0] != 7: print('No actions or wrong action dimensions!') action = list(np.random.uniform(-0.1, 0.1, 7)) # random self.agent.set_joint_target_velocities( action) # Execute action on arm self.pr.step() # if no pr.step() the action won't be conducted else: raise NotImplementedError ax, ay, az = self.gripper.get_position() if math.isnan( ax ): # capture the case when the gripper is broken during exporation self.__init__(headless=self.headless) done = True tx, ty, tz = self.target.get_position() distance = (ax - tx)**2 + (ay - ty)**2 + ( az - tz)**2 # distance between the gripper and the target object ''' for visual-based control only, large time consumption! ''' # current_vision = self.vision_sensor.capture_rgb() # capture a screenshot of the view with vision sensor # plt.imshow(current_vision) # plt.savefig('./img/vision.png') reward = 0 # close the gripper if close enough to the object and the object is detected with the proximity sensor if distance < 0.1 and self.proximity_sensor.is_detected( self.target) == True: # make sure the gripper is open before grasping self.gripper.actuate(1, velocity=0.5) self.pr.step() self.gripper.actuate( 0, velocity=0.5 ) # if done, close the hand, 0 for close and 1 for open; velocity 0.5 ensures the gripper to close with in one frame self.pr.step() # Step the physics simulation if self._is_holding(): # reward for hold here! reward += self.reward_offset # extra reward for grasping the object done = True else: self.gripper.actuate(1, velocity=0.5) self.pr.step() elif np.sum( self.gripper.get_open_amount() ) < 1.5: # if gripper is closed (not fully open) due to collision or esle, open it; .get_open_amount() return list of gripper joint values self.gripper.actuate(1, velocity=0.5) self.pr.step() else: pass reward -= np.clip( np.sqrt(distance), 0, self.reward_offset ) # Reward is negative distance to target; clipped for removing abnormal cases if tz < self.initial_target_positions[ 2] - self.fall_down_offset: # the object fall off the table done = True reward = -self.reward_offset # can also set reward for orientation, same orientation for target and gripper, they are actually vertical, so can grasp # reward += np.sqrt(np.array(self.agent_ee_tip.get_orientation())-np.array(self.target.get_orientation())) return self._get_state(), reward, done, {} def shutdown(self): ''' Close the simulator ''' self.pr.stop() self.pr.shutdown()
def __init__(self, obs_space_keys=('pov', 'arm'), scene_file='rozum_pyrep.ttt', headless=True, video_path=None, pose_sigma=20, randomize=False, sparse=False, camera_resolution=(256, 256), step_limit=200): self.obs_space_keys = (obs_space_keys, ) if isinstance( obs_space_keys, str) else obs_space_keys # PyRep self._pyrep = PyRep() self._pyrep.launch(scene_file, headless=headless) self._pyrep.start() self.rozum = Rozum() self.rozum.set_control_loop_enabled(True) self.gripper = BaxterGripper() self.gripper.set_control_loop_enabled(True) self._initial_robot_state = (self.rozum.get_configuration_tree(), self.gripper.get_configuration_tree()) self.cube = Shape("Cube") self.graspable_objects = [ self.cube, ] self.camera = VisionSensor("render") self.camera.set_resolution(list(camera_resolution)) self.camera0 = VisionSensor("render0") self.camera0.set_resolution(list(camera_resolution)) self.rozum_tip = self.rozum.get_tip() # Action and Observation spaces self.angles_scale = np.array( [np.pi for _ in range(self.rozum.num_joints)]) low = np.array([-20 / 180 for _ in range(self.rozum.num_joints)] + [ 0., ]) high = np.array([20 / 180 for _ in range(self.rozum.num_joints)] + [ 1., ]) self.action_space = gym.spaces.Box(low=low, high=high) angle_bounds = self.rozum.get_joint_intervals()[1] low = np.array([bound[0] for bound in angle_bounds] + [ 0., ]) high = np.array([bound[0] + bound[1] for bound in angle_bounds] + [ 1., ]) self.angles_bounds = gym.spaces.Box(low=low[:-1], high=high[:-1]) self._available_obs_spaces = dict() self._render_dict = dict() self._available_obs_spaces['pov'] = gym.spaces.Box( shape=self.camera.resolution + [3], low=0, high=255, dtype=np.uint8) self._render_dict['pov'] = lambda: self.get_image(self.camera) self._available_obs_spaces['pov0'] = gym.spaces.Box( shape=self.camera0.resolution + [3], low=0, high=255, dtype=np.uint8) self._render_dict['pov0'] = lambda: self.get_image(self.camera0) low = np.array([bound[0] for bound in angle_bounds] * 2 + [0., 0., -1., -1., -1.]) high = np.array([bound[0] + bound[1] for bound in angle_bounds] * 2 + [1., 1., 1., 1., 1.]) self._available_obs_spaces['arm'] = gym.spaces.Box(low=low, high=high, dtype=np.float32) self._render_dict['arm'] = self.get_arm_state self._available_obs_spaces['cube'] = gym.spaces.Box(shape=(6, ), low=-np.inf, high=np.inf, dtype=np.float32) self._render_dict['cube'] = self.get_cube_state self._available_obs_spaces['time'] = gym.spaces.Box(low=np.zeros(1), high=np.ones(1), dtype=np.uint8) self._render_dict['time'] = lambda: [ self.current_step / self.step_limit, ] try: if len(self.obs_space_keys) > 1: self.observation_space = gym.spaces.Dict({ key: self._available_obs_spaces[key] for key in self.obs_space_keys }) else: self.observation_space = self._available_obs_spaces[ self.obs_space_keys[0]] except KeyError as err: message = "Observation space {} is not supported.".format( err.args[0]) message += " Available observation space keys: " message += ", ".join(self._available_obs_spaces.keys()) err.args = (message, ) raise # Environment settings self.reward_range = None self.current_step = 0 self.step_limit = step_limit self._start_arm_joint_pos = self.rozum.get_joint_positions() self._start_gripper_joint_pos = self.gripper.get_joint_positions() self.init_cube_pose = self.cube.get_pose() self.pose_sigma = pose_sigma self.sparse = sparse self.randomize = randomize # Video self.recording = list() self.current_episode = 0 self.rewards = [0] self.path = video_path if video_path: def video_step(): self._pyrep.step() self.recording.append(self.get_image(self.camera)[..., ::-1]) self.sim_step = video_step else: self.sim_step = self._pyrep.step
class RozumEnv(gym.Env): metadata = {'render.modes': ['human']} def __init__(self, obs_space_keys=('pov', 'arm'), scene_file='rozum_pyrep.ttt', headless=True, video_path=None, pose_sigma=20, randomize=False, sparse=False, camera_resolution=(256, 256), step_limit=200): self.obs_space_keys = (obs_space_keys, ) if isinstance( obs_space_keys, str) else obs_space_keys # PyRep self._pyrep = PyRep() self._pyrep.launch(scene_file, headless=headless) self._pyrep.start() self.rozum = Rozum() self.rozum.set_control_loop_enabled(True) self.gripper = BaxterGripper() self.gripper.set_control_loop_enabled(True) self._initial_robot_state = (self.rozum.get_configuration_tree(), self.gripper.get_configuration_tree()) self.cube = Shape("Cube") self.graspable_objects = [ self.cube, ] self.camera = VisionSensor("render") self.camera.set_resolution(list(camera_resolution)) self.camera0 = VisionSensor("render0") self.camera0.set_resolution(list(camera_resolution)) self.rozum_tip = self.rozum.get_tip() # Action and Observation spaces self.angles_scale = np.array( [np.pi for _ in range(self.rozum.num_joints)]) low = np.array([-20 / 180 for _ in range(self.rozum.num_joints)] + [ 0., ]) high = np.array([20 / 180 for _ in range(self.rozum.num_joints)] + [ 1., ]) self.action_space = gym.spaces.Box(low=low, high=high) angle_bounds = self.rozum.get_joint_intervals()[1] low = np.array([bound[0] for bound in angle_bounds] + [ 0., ]) high = np.array([bound[0] + bound[1] for bound in angle_bounds] + [ 1., ]) self.angles_bounds = gym.spaces.Box(low=low[:-1], high=high[:-1]) self._available_obs_spaces = dict() self._render_dict = dict() self._available_obs_spaces['pov'] = gym.spaces.Box( shape=self.camera.resolution + [3], low=0, high=255, dtype=np.uint8) self._render_dict['pov'] = lambda: self.get_image(self.camera) self._available_obs_spaces['pov0'] = gym.spaces.Box( shape=self.camera0.resolution + [3], low=0, high=255, dtype=np.uint8) self._render_dict['pov0'] = lambda: self.get_image(self.camera0) low = np.array([bound[0] for bound in angle_bounds] * 2 + [0., 0., -1., -1., -1.]) high = np.array([bound[0] + bound[1] for bound in angle_bounds] * 2 + [1., 1., 1., 1., 1.]) self._available_obs_spaces['arm'] = gym.spaces.Box(low=low, high=high, dtype=np.float32) self._render_dict['arm'] = self.get_arm_state self._available_obs_spaces['cube'] = gym.spaces.Box(shape=(6, ), low=-np.inf, high=np.inf, dtype=np.float32) self._render_dict['cube'] = self.get_cube_state self._available_obs_spaces['time'] = gym.spaces.Box(low=np.zeros(1), high=np.ones(1), dtype=np.uint8) self._render_dict['time'] = lambda: [ self.current_step / self.step_limit, ] try: if len(self.obs_space_keys) > 1: self.observation_space = gym.spaces.Dict({ key: self._available_obs_spaces[key] for key in self.obs_space_keys }) else: self.observation_space = self._available_obs_spaces[ self.obs_space_keys[0]] except KeyError as err: message = "Observation space {} is not supported.".format( err.args[0]) message += " Available observation space keys: " message += ", ".join(self._available_obs_spaces.keys()) err.args = (message, ) raise # Environment settings self.reward_range = None self.current_step = 0 self.step_limit = step_limit self._start_arm_joint_pos = self.rozum.get_joint_positions() self._start_gripper_joint_pos = self.gripper.get_joint_positions() self.init_cube_pose = self.cube.get_pose() self.pose_sigma = pose_sigma self.sparse = sparse self.randomize = randomize # Video self.recording = list() self.current_episode = 0 self.rewards = [0] self.path = video_path if video_path: def video_step(): self._pyrep.step() self.recording.append(self.get_image(self.camera)[..., ::-1]) self.sim_step = video_step else: self.sim_step = self._pyrep.step def get_arm_state(self): arm = self.rozum.get_joint_positions() arm += self.rozum.get_joint_target_positions() arm += self.gripper.get_open_amount() arm += self.rozum_tip.get_position().tolist() return arm def get_cube_state(self): box = self.cube.get_position().tolist() box += self.cube.get_orientation().tolist() return box def sample_action(self): return self.action_space.sample() def step(self, action: list): done = False info = dict() joint_action, ee_action = action[:-1], action[-1] distance_mod = 3 scale = 100 # m -> cm previous_n = int( self._get_distance(self.rozum_tip, self.cube) * scale) // distance_mod grasped = self._robot_step(ee_action, joint_action) _, _, arm_z = self.rozum.joints[-1].get_position() tx, ty, tz = self.cube.get_position() pose_filter = arm_z > (tz + 0.05) current_distance = self._get_distance(self.rozum_tip, self.cube) current_n = int(current_distance * scale) // distance_mod reward = 0 if not self.sparse: reward += previous_n - current_n if reward > 0: reward *= pose_filter state = self.render() info['distance'] = current_distance if grasped: reward += 10 done = True info['grasped'] = 1 elif self.current_step >= self.step_limit: done = True info['grasped'] = 0 elif self._get_distance(self.rozum.joints[0], self.cube) > 0.76: done = True reward = -1 info['grasped'] = 0 elif tz < 0.5: done = True reward = -1 info['grasped'] = 0 self.rewards.append(reward) return state, reward, done, info @staticmethod def _get_distance(first_object, second_object): x, y, z = first_object.get_position() tx, ty, tz = second_object.get_position() distance = np.sqrt((x - tx)**2 + (y - ty)**2 + (z - tz)**2) return distance def _robot_step(self, ee_action, joint_action): grasped = False current_ee = (1.0 if np.mean(self.gripper.get_open_amount()) > 0.8 else 0.0) if ee_action > 0.5: ee_action = 1.0 elif ee_action < 0.5: ee_action = 0.0 if current_ee != ee_action: gripper_done = False self.rozum.set_joint_target_positions( self.rozum.get_joint_positions()) while not gripper_done: gripper_done = self.gripper.actuate(ee_action, velocity=0.2) self.sim_step() self.current_step += 1 if ee_action == 0.0: for g_obj in self.graspable_objects: grasped = self.gripper.grasp(g_obj) else: position = [ j + a * scale for j, a, scale in zip(self.rozum.get_joint_positions(), joint_action, self.angles_scale) ] position = list( np.clip(position, self.angles_bounds.low, self.angles_bounds.high)) self.rozum.set_joint_target_positions(position) for _ in range(4): self.sim_step() self.current_step += 1 return grasped def reset(self): self.gripper.release() arm, gripper = self._initial_robot_state self._pyrep.set_configuration_tree(arm) self._pyrep.set_configuration_tree(gripper) self.rozum.set_joint_positions(self._start_arm_joint_pos) self.rozum.set_joint_target_velocities([0] * len(self.rozum.joints)) self.gripper.set_joint_positions(self._start_gripper_joint_pos) self.gripper.set_joint_target_velocities([0] * len(self.gripper.joints)) # Initialize scene if self.randomize: self.randomize_object() pose = self.init_cube_pose.copy() pose[0] += np.random.uniform(0.0, 0.2) # max distance ~ 0.76 pose[1] += np.random.uniform(-0.15, 0.15) self.cube.set_pose(pose) random_action = np.random.normal( 0., self.pose_sigma, len(self._start_arm_joint_pos)) / 180 * np.pi position = [ angle + action for angle, action in zip(self._start_arm_joint_pos, random_action) ] self.rozum.set_joint_target_positions(position) for _ in range(4): self._pyrep.step() self.current_step = 0 state = self.render() # Video if len(self.recording) > 0: name = str(self.current_episode).zfill(4) + "r" + str( sum(map(int, self.rewards))).zfill(4) + ".mp4" full_path = os.path.join(self.path, name) self.save_video(full_path, video=self.recording) self.current_episode += 1 self.rewards = [0] self.recording = list() self.sim_step() return state def render(self, mode='human'): if len(self.obs_space_keys) > 1: state = { key: self._render_dict[key]() for key in self.obs_space_keys } else: state = self._render_dict[self.obs_space_keys[0]]() return state @staticmethod def get_image(sensor): img = sensor.capture_rgb() img *= 255 img = img.astype('uint8') return img def close(self): self._pyrep.stop() self._pyrep.shutdown() @staticmethod def save_video(filename, video): """ saves video from list of np.array images :param filename: filename or path to file :param video: [image, ..., image] :return: """ size_x, size_y, size_z = video[0].shape out = cv2.VideoWriter(filename, cv2.VideoWriter_fourcc(*'mp4v'), 30.0, (size_x, size_y)) for image in video: out.write(image) out.release() cv2.destroyAllWindows() def randomize_object(self): handle = self.cube.get_handle() sim.simRemoveObject(handle) sizes = [max(random() * 0.1, 0.02), 0.05] objects = list() position = [0, 0, 0] mass = 0.1 # Create cube with random size s = sizes[0] objects.append( Shape.create(type=PrimitiveShape.CUBOID, size=[s, s, s], position=position, mass=mass, color=[random() for _ in range(3)])) index = sample(range(len(position) - 1), 1)[0] sign = sample([1, -1], 1)[0] position[index] += sum(sizes) * 0.5 * sign s = sizes[-1] # Create cube with fix size objects.append( Shape.create(type=PrimitiveShape.CUBOID, size=[s, s, s], position=position, mass=mass, color=[random() for _ in range(3)])) handles = [o.get_handle() for o in objects] handle = sim.simGroupShapes(handles) self.cube = Shape(handle) self.graspable_objects = [ self.cube, ]