class TestProximitySensors(TestCore): def setUp(self): super().setUp() self.sensor = ProximitySensor('proximity_sensor') def test_is_detected(self): ob1 = Shape('simple_model') ob2 = Shape('dynamic_cube') self.assertTrue(self.sensor.is_detected(ob1)) self.assertFalse(self.sensor.is_detected(ob2))
class TestProximitySensors(TestCore): def setUp(self): super().setUp() self.sensor = ProximitySensor('proximity_sensor') def test_read(self): self.pyrep.step() distance, object = self.sensor.read() self.pyrep.step() self.assertAlmostEqual(distance, 0.1) def test_is_detected(self): ob1 = Shape('simple_model') ob2 = Shape('dynamic_cube') self.assertTrue(self.sensor.is_detected(ob1)) self.assertFalse(self.sensor.is_detected(ob2))
class WipeDesk(Task): def init_task(self) -> None: self.dirt_spots = [] self.sponge = Shape('sponge') self.sensor = ProximitySensor('sponge_sensor') self.register_graspable_objects([self.sponge]) boundaries = [Shape('dirt_boundary')] _, _, self.z_boundary = boundaries[0].get_position() self.b = SpawnBoundary(boundaries) def init_episode(self, index: int) -> List[str]: self._place_dirt() self.register_success_conditions([EmptyCondition(self.dirt_spots)]) return [ 'wipe dirt off the desk', 'use the sponge to clean up the desk', 'remove the dirt from the desk', 'grip the sponge and wipe it back and forth over any dirt you ' 'see', 'clean up the mess', 'wipe the dirt up' ] def variation_count(self) -> int: return 1 def step(self) -> None: for d in self.dirt_spots: if self.sensor.is_detected(d): self.dirt_spots.remove(d) d.remove() def cleanup(self) -> None: for d in self.dirt_spots: d.remove() self.dirt_spots = [] def _place_dirt(self): for i in range(DIRT_POINTS): spot = Shape.create(type=PrimitiveShape.CUBOID, size=[.005, .005, .001], mass=0, static=True, respondable=False, renderable=True, color=[0.58, 0.29, 0.0]) spot.set_parent(self.get_base()) spot.set_position([-1, -1, self.z_boundary + 0.001]) self.b.sample(spot, min_distance=0.00, min_rotation=(0.00, 0.00, 0.00), max_rotation=(0.00, 0.00, 0.00)) self.dirt_spots.append(spot) self.b.clear()
class SuctionCup(Shape): """Represents all suction cups. """ def __init__(self, count: int, name: str, base_name: str = None): suffix = '' if count == 0 else '#%d' % (count - 1) super().__init__(name + suffix if base_name is None else base_name + suffix) self._proximity_sensor = ProximitySensor('%s_sensor%s' % (name, suffix)) self._attach_point = ForceSensor('%s_connection%s' % (name, suffix)) self._old_parents = [] self._grasped_objects = [] def grasp(self, obj: Object) -> bool: """Attach the object to the suction cup if it is detected. Note: The does not move the object up to the suction cup. Therefore, the proximity sensor should have a short range in order for the suction grasp to look realistic. :param obj: The object to grasp if detected. :return: True if the object was detected/grasped. """ detected = self._proximity_sensor.is_detected(obj) # Check if detected and that we are not already grasping it. if detected and obj not in self._grasped_objects: self._grasped_objects.append(obj) self._old_parents.append(obj.get_parent()) # type: ignore obj.set_parent(self._attach_point, keep_in_place=True) return detected def release(self) -> None: """Release any objects that have been sucked. Note: The does not actuate the gripper, but instead simply detaches any grasped objects. """ for grasped_obj, old_parent in zip(self._grasped_objects, self._old_parents): # Check if the object still exists if grasped_obj.still_exists(): grasped_obj.set_parent(old_parent, keep_in_place=True) self._grasped_objects = [] self._old_parents = [] def get_grasped_objects(self) -> List[Object]: """Gets the objects that are currently in the suction cup. :return: A list of grasped objects. """ return self._grasped_objects
class GraspEnv(object): def __init__(self, headless, control_mode='joint_velocity'): self.headless = headless self.reward_offset = 10.0 self.reward_range = self.reward_offset self.penalty_offset = 1 #self.penalty_offset = 1. self.fall_down_offset = 0.1 self.metadata = [] #gym env argument self.control_mode = control_mode #launch and setup the scene, and set the proxy variables in present of the counterparts in the scene self.pr = PyRep() if control_mode == 'end_position': SCENE_FILE = join(dirname(abspath(__file__)), './scenes/UnicarRobot_ik.ttt') elif control_mode == 'joint_velocity': SCENE_FILE = join(dirname(abspath(__file__)), './scenes/UnicarRobot.ttt') self.pr.launch(SCENE_FILE, headless=headless) self.pr.start() self.agent = UnicarRobotArm() #drehkranz + UR10 #self.gripper = UnicarRobotGreifer()#suction #self.suction = UnicarRobotGreifer() self.suction = Shape("UnicarRobotGreifer_body_sub0") self.proximity_sensor = ProximitySensor('UnicarRobotGreifer_sensor') self.table = Shape('UnicarRobotTable') self.carbody = Shape('UnicarRobotCarbody') if control_mode == 'end_position': self.agent.set_control_loop_enabled(True) self.action_space = np.zeros(4) elif control_mode == 'joint_velocity': self.agent.set_control_loop_enabled(False) self.action_space = np.zeros(7) else: raise NotImplementedError #self.observation_space = np.zeros(17) self.observation_space = np.zeros(20) self.agent.set_motor_locked_at_zero_velocity(True) self.target = Shape("UnicarRobotTarget") #Box self.agent_ee_tip = self.agent.get_tip() self.tip_target = Dummy('UnicarRobotArm_target') self.tip_pos = self.agent_ee_tip.get_position() # set a proper initial robot gesture or tip position if control_mode == 'end_position': initial_pos = [0, 1.5, 1.6] self.tip_target.set_position(initial_pos) #one big step for rotatin is enough, with reset_dynamics = True, set the rotation instantaneously self.tip_target.set_orientation([0, 0, 0], reset_dynamics=True) elif control_mode == 'joint_velocity': self.initial_joint_positions = [0, 0, 0, 0, 0, 0, 0] self.agent.set_joint_positions(self.initial_joint_positions) self.pr.step() self.initial_tip_positions = self.agent_ee_tip.get_position() self.initial_target_positions = self.target.get_position() def _get_state(self): ''' Return state containging arm joint positions/velocities & target position. ''' return np.array(self.agent.get_joint_positions() + self.agent.get_joint_velocities() + self.agent_ee_tip.get_position() + self.agent_ee_tip.get_orientation()) #all 20 def _is_holding(self): ''' Return the state of holding the UnicarRobotTarget or not, return bool value ''' if self.proximity_sensor.is_detected(self.target) == True: return True else: return False def _move(self, action, bounding_offset=0.15, step_factor=0.2, max_itr=20, max_error=0.05, rotation_norm=5): pos = self.suction.get_position() if pos[0]+action[0] > POS_MIN[0]-bounding_offset and pos[0]+action[0] < POS_MAX[0]-bounding_offset \ and pos[1]+action[1] > POS_MIN[1]-bounding_offset and pos[1]+action[1] < POS_MAX[1]-bounding_offset \ and pos[2]+action[2] > POS_MIN[2]-2*bounding_offset: ori_z = -self.agent_ee_tip.get_orientation( )[2] # the minus is because the mismatch between the set_orientation() and get_orientation() #ori_z = self.agent_ee_tip.get_orientation()[2] target_pos = np.array(self.agent_ee_tip.get_position() + np.array(action[:3])) diff = 1 itr = 0 while np.sum(np.abs(diff)) > max_error and itr < max_itr: itr += 1 # set pos in small step cur_pos = self.agent_ee_tip.get_position() diff = target_pos - cur_pos pos = cur_pos + step_factor * diff self.tip_target.set_position(pos.tolist()) self.pr.step() ori_z += rotation_norm * action[3] self.tip_target.set_orientation([0, np.pi, ori_z]) self.pr.step() else: print("Potantial Movement Out of the Bounding Box!") self.pr.step() def reinit(self): self.shutdown() self.__init__(self.headless) def reset(self, random_target=False): ''' Get a random position within a cuboid and set the target position ''' # set target object if random_target: pos = list(np.random.uniform(POS_MIN, POS_MAX)) self.target.set_position(pos) else: self.target.set_position(self.initial_target_positions) self.target.set_orientation([0, 0, 0]) self.pr.step() #set end position to be initialized if self.control_mode == 'end_position': self.agent.set_control_loop_enabled(True) # IK mode self.tip_target.set_position(self.initial_tip_positions) self.pr.step() itr = 0 max_itr = 10 while np.sum( np.abs( np.array(self.agent_ee_tip.get_position() - np.array(self.initial_tip_positions))) ) > 0.1 and itr < max_itr: itr += 1 self.step(np.random.uniform(-0.2, 0.2, 4)) self.pr.step() elif self.control_mode == 'joint_velocity': #JointMode Force self.agent.set_joint_positions(self.initial_joint_positions) self.pr.step() #set collidable, for collision detection #self.gripper.set_collidable(True) self.suction.set_collidable(True) self.target.set_collidable(True) return self._get_state() #return the current state of the environment def step(self, action): ''' move the robot arm according to the control mode if control_mode == 'end_position' then action is 3 dim of tip (end of robot arm) position values + 1 dim rotation of suction if control_mode == 'joint_velocity' then action is 7 dim of joint velocity + 1 dim of rotation of suction ''' #initialization done = False #episode finishes reward = 0 hold_flag = False #hold the object or not if self.control_mode == 'end_position': if action is None or action.shape[0] != 4: #check action is valid print('No actions or wrong action dimensions!') action = list(np.random.uniform(-0.1, 0.1, 4)) self._move(action) elif self.control_mode == 'joint_velocity': if action is None or action.shape[0] != 7: #??? print('No actions or wrong action dimensions!') action = list(np.random.uniform(-1, 1, 7)) self.agent.set_joint_target_velocities(action) self.pr.step() else: raise NotImplementedError #ax,ay,az = self.gripper.get_position()#gripper position ax, ay, az = self.suction.get_position() #suction position if math.isnan( ax): #capture the broken suction cases during exploration print("Suction position is nan.") self.reinit() done = True desired_position_tip = [0.0, 1.5513, 1.74] desired_orientation_tip = [-np.pi, 0, 0.001567] tip_x, tip_y, tip_z = self.agent_ee_tip.get_position( ) #end_effector position tip_row, tip_pitch, tip_yaw = self.agent_ee_tip.get_orientation( ) #end_effector orientation tx, ty, tz = self.target.get_position() #box position offset = 0.312 #augmented reward: offset of target position above the target object #square distance between the gripper and the target object sqr_distance = np.sqrt((tip_x - desired_position_tip[0])**2 + (tip_y - desired_position_tip[1])**2 + (tip_z - desired_position_tip[2])**2) sqr_orientation = np.sqrt((tip_row - desired_orientation_tip[0])**2 + (tip_pitch - desired_orientation_tip[1])**2 + (tip_yaw - desired_orientation_tip[2])**2) ''' for visual-based control only, large time consumption! ''' # current_vision = self.vision_sensor.capture_rgb() # capture a screenshot of the view with vision sensor # plt.imshow(current_vision) # plt.savefig('./img/vision.png') desired_orientation = [0, 0, -np.pi / 2] desired_orientation_tip = [-np.pi, 0, 0.001567] #Enable the suction if close enough to the object and the object is detected with the proximity sensor if sqr_distance < 0.001 and self.proximity_sensor.is_detected( self.target) == True and sqr_orientation < 0.001: #make sure the suction is not worked self.suction.release() self.pr.step() self.suction.grasp(self.target) self.pr.step() if self._is_holding(): reward += self.reward_offset done = True hold_flag = True else: self.suction.release() self.pr.step() else: pass #the base reward is negative distance from suction to target #reward -= (np.sqrt(sqr_distance)) #case when the object is fall off the table if tz < self.initial_target_positions[ 2] - self.fall_down_offset: #tz is target(box) position in z direction done = True reward -= self.reward_offset # Augmented reward for orientation: better grasping gesture if the suction has vertical orientation to the target object desired_position_tip = [0.0, 1.5513, 1.74] tip_x, tip_y, tip_z = self.agent_ee_tip.get_position() tip_row, tip_pitch, tip_yaw = self.agent_ee_tip.get_orientation() reward -= (np.sqrt((tip_x - desired_position_tip[0])**2 + (tip_y - desired_position_tip[1])**2 + (tip_z - desired_position_tip[2])**2) + np.sqrt((tip_row - desired_orientation_tip[0])**2 + (tip_pitch - desired_orientation_tip[1])**2 + (tip_yaw - desired_orientation_tip[2])**2)) #Penalty for collision with the table if self.suction.check_collision( self.table) or self.suction.check_collision( self.carbody) or self.agent.check_collision( self.table) or self.suction.check_collision( self.target) or self.agent.check_collision( self.target): reward -= self.penalty_offset if math.isnan(reward): reward = 0. return self._get_state(), reward, done, {'finished': hold_flag} def shutdown(self): '''close the simulator''' self.pr.stop() self.pr.shutdown()
class Gripper(RobotComponent): """Represents all types of end-effectors, e.g. grippers. """ def __init__(self, count: int, name: str, joint_names: List[str]): super().__init__(count, name, joint_names) suffix = '' if count == 0 else '#%d' % (count - 1) prox_name = '%s_attachProxSensor%s' % (name, suffix) attach_name = '%s_attachPoint%s' % (name, suffix) self._proximity_sensor = ProximitySensor(prox_name) self._attach_point = ForceSensor(attach_name) self._old_parents: List[Object] = [] self._grasped_objects: List[Object] = [] self._prev_positions = [None] * len(joint_names) self._prev_vels = [None] * len(joint_names) # Used to stop oscillating self._touch_sensors = [] i = 0 while True: fname = '%s_touchSensor%d%s' % (name, i, suffix) if not ForceSensor.exists(fname): break self._touch_sensors.append(ForceSensor(fname)) i += 1 def grasp(self, obj: Object) -> bool: """Grasp object if it is detected. Note: The does not actuate the gripper, but instead simply attaches the detected object to the gripper to 'fake' a grasp. :param obj: The object to grasp if detected. :return: True if the object was detected/grasped. """ detected = self._proximity_sensor.is_detected(obj) # Check if detected and that we are not already grasping it. if detected and obj not in self._grasped_objects: self._grasped_objects.append(obj) self._old_parents.append(obj.get_parent()) # type: ignore obj.set_parent(self._attach_point, keep_in_place=True) return detected def release(self) -> None: """Release any grasped objects. Note: The does not actuate the gripper, but instead simply detaches any grasped objects. """ for grasped_obj, old_parent in zip( self._grasped_objects, self._old_parents): # Check if the object still exists if grasped_obj.still_exists(): grasped_obj.set_parent(old_parent, keep_in_place=True) self._grasped_objects = [] self._old_parents = [] def get_grasped_objects(self) -> List[Object]: """Gets the objects that are currently grasped. :return: A list of grasped objects. """ return self._grasped_objects def actuate(self, amount: float, velocity: float) -> bool: """Actuate the gripper, but return after each simulation step. The functions attempts to open/close the gripper according to 'amount', where 1 represents open, and 0 represents close. The user should iteratively call this function until it returns True. This is a convenience method. If you would like direct control of the gripper, use the :py:class:`RobotComponent` methods instead. For some grippers, this method will need to be overridden. :param amount: A float between 0 and 1 representing the gripper open state. 1 means open, whilst 0 means closed. :param velocity: The velocity to apply to the gripper joints. :raises: ValueError if 'amount' is not between 0 and 1. :return: True if the gripper has reached its open/closed limits, or if the 'max_force' has been exerted. """ if not (0.0 <= amount <= 1.0): raise ValueError("'open_amount' should be between 0 and 1.'") _, joint_intervals_list = self.get_joint_intervals() joint_intervals = np.array(joint_intervals_list) # Decide on if we need to open or close joint_range = joint_intervals[:, 1] - joint_intervals[:, 0] target_pos = joint_intervals[:, 0] + (joint_range * amount) current_positions = self.get_joint_positions() done = True for i, (j, target, cur, prev) in enumerate(zip( self.joints, target_pos, current_positions, self._prev_positions)): # Check if the joint has moved much not_moving = (prev is not None and np.fabs(cur - prev) < POSITION_ERROR) reached_target = np.fabs(target - cur) < POSITION_ERROR vel = -velocity if cur - target > 0 else velocity oscillating = (self._prev_vels[i] is not None and vel != self._prev_vels[i]) if not_moving or reached_target or oscillating: j.set_joint_target_velocity(0) continue done = False vel = -velocity if cur - target > 0 else velocity self._prev_vels[i] = vel # type: ignore j.set_joint_target_velocity(vel) self._prev_positions = current_positions # type: ignore if done: self._prev_positions = [None] * self._num_joints self._prev_vels = [None] * self._num_joints self.set_joint_target_velocities([0.0] * self._num_joints) return done def get_open_amount(self) -> List[float]: """Gets the gripper open state. 1 means open, whilst 0 means closed. :return: A list of floats between 0 and 1 representing the gripper open state for each joint. 1 means open, whilst 0 means closed. """ _, joint_intervals_list = self.get_joint_intervals() joint_intervals = np.array(joint_intervals_list) joint_range = joint_intervals[:, 1] - joint_intervals[:, 0] return list(np.clip((np.array( self.get_joint_positions()) - joint_intervals[:, 0]) / joint_range, 0.0, 1.0)) def get_touch_sensor_forces(self) -> List[List[float]]: if len(self._touch_sensors) == 0: raise NotImplementedError('No touch sensors for this robot!') return [ts.read()[0] for ts in self._touch_sensors]
class ReacherEnv(object): def __init__(self, headless, control_mode='end_position', visual_control=False): ''' :visual_control: bool, controlled by visual state or not (vector state). ''' self.reward_offset = 10.0 self.reward_range = self.reward_offset # reward range for register gym env when using vectorized env wrapper self.fall_down_offset = 0.1 # for judging the target object fall off the table self.metadata = [] # gym env format self.visual_control = visual_control self.control_mode = control_mode self.pr = PyRep() if control_mode == 'end_position': # need to use different scene, the one with all joints in inverse kinematics mode SCENE_FILE = join(dirname(abspath(__file__)), './scenes/sawyer_reacher_rl_new_ik.ttt') elif control_mode == 'joint_velocity': # the scene with all joints in force/torche mode for forward kinematics SCENE_FILE = join(dirname(abspath(__file__)), './scenes/sawyer_reacher_rl_new.ttt') self.pr.launch(SCENE_FILE, headless=headless) self.pr.start() self.agent = Sawyer() self.gripper = BaxterGripper() self.gripper_left_pad = Shape( 'BaxterGripper_leftPad') # the pad on the gripper finger self.proximity_sensor = ProximitySensor( 'BaxterGripper_attachProxSensor' ) # need the name of the sensor here self.vision_sensor = VisionSensor( 'Vision_sensor') # need the name of the sensor here if control_mode == 'end_position': self.agent.set_control_loop_enabled(True) # if false, won't work self.action_space = np.zeros( 4) # 3 DOF end position control + 1 rotation of gripper elif control_mode == 'joint_velocity': self.agent.set_control_loop_enabled(False) self.action_space = np.zeros( 8) # 7 DOF velocity control + 1 rotation of gripper else: raise NotImplementedError if self.visual_control == False: self.observation_space = np.zeros( 17 ) # position and velocity of 7 joints + position of the target else: self.observation_space = np.zeros(100) # dim of img! self.agent.set_motor_locked_at_zero_velocity(True) self.target = Shape('target') # object self.tip_target = Dummy( 'Sawyer_target') # the target point of the tip to move towards # self.table = Shape('diningTable') self.agent_ee_tip = self.agent.get_tip() self.tip_pos = self.agent_ee_tip.get_position() self.tip_quat = self.agent_ee_tip.get_quaternion( ) # tip rotation as quaternion, if not control the rotation # set a proper initial gesture/tip position if control_mode == 'end_position': # agent_position=self.agent.get_position() # initial_pos_offset = [0.4, 0.3, 0.2] # initial relative position of gripper to the whole arm # initial_pos = [(a + o) for (a, o) in zip(agent_position, initial_pos_offset)] initial_pos = [0.3, 0.1, 0.9] self.tip_target.set_position(initial_pos) # one big step for rotation setting is enough, with reset_dynamics=True, set the rotation instantaneously self.tip_target.set_orientation( [0, 3.1415, 1.5707], reset_dynamics=True) # make gripper face downwards self.pr.step() elif control_mode == 'joint_velocity': self.initial_joint_positions = [ 0.001815199851989746, -1.4224984645843506, 0.704303503036499, 2.54307222366333, 2.972468852996826, -0.4989511966705322, 4.105560302734375 ] self.agent.set_joint_positions(self.initial_joint_positions) self.initial_joint_positions = self.agent.get_joint_positions() self.initial_tip_positions = self.agent_ee_tip.get_position() self.initial_target_positions = self.target.get_position() def _get_state(self): # Return state containing arm joint angles/velocities & target position return np.array(self.agent.get_joint_positions() + self.agent.get_joint_velocities() + self.target.get_position()) def _is_holding(self): ''' Return is holding the target or not, return bool. ''' # Nothe that the collision check is not always accurate all the time, # for continuous conllision, maybe only the first 4-5 frames of collision can be detected pad_collide_object = self.gripper_left_pad.check_collision(self.target) if pad_collide_object and self.proximity_sensor.is_detected( self.target) == True: return True else: return False def _get_visual_state(self): # Return a numpy array of size (width, height, 3) return self.vision_sensor.capture_rgb( ) # A numpy array of size (width, height, 3) def _is_holding(self): # Return is holding the target or not, return bool # Nothe that the collision check is not always accurate all the time, # for continuous conllision, maybe only the first 4-5 frames of collision can be detected pad_collide_object = self.gripper_left_pad.check_collision(self.target) if pad_collide_object and self.proximity_sensor.is_detected( self.target) == True: return True else: return False # def _move(self, action): # ''' # Move the tip according to the action with inverse kinematics for 'end_position' control; # with control of tip target in inverse kinematics mode instead of using .solve_ik() in forward kinematics mode. # ''' # robot_moving_unit=0.01 # the amount of single step move of robot, not accurate # moving_loop_itr=int(np.sum(np.abs(action[:3]))/robot_moving_unit)+1 # adaptive number of moving steps, with minimal of 1 step. # # print(moving_loop_itr) # small_step = list(1./moving_loop_itr*np.array(action)) # break the action into small steps, as the robot cannot move to the target position within one frame # pos=self.agent_ee_tip.get_position() # ''' # there is a mismatch between the object set_orientation() and get_orientation(): # the (x,y,z) in set_orientation() will be (y,x,-z) in get_orientation(). # ''' # ori_z=-self.agent_ee_tip.get_orientation()[2] # the minus is because the mismatch between the set and get # assert len(small_step) == len(pos)+1 # 3 values for position, 1 value for rotation # for _ in range(moving_loop_itr): # for idx in range(len(pos)): # pos[idx] += small_step[idx] # self.tip_target.set_position(pos) # self.pr.step() # ''' deprecated! no need to use small steps for the rotation with reset_dynamics=True''' # # ori_z+=small_step[3] # change the orientation along z-axis with a small step # # self.tip_target.set_orientation([0,3.1415,ori_z], reset_dynamics=True) # make gripper face downwards # # self.pr.step() # ''' one big step for z-rotation is enough, with reset_dynamics=True, set the rotation instantaneously ''' # ori_z+=action[3] # self.tip_target.set_orientation([0,3.1415,ori_z], reset_dynamics=True) # make gripper face downwards # self.pr.step() def _move(self, action): ''' Move the tip according to the action with inverse kinematics for 'end_position' control; with control of tip target in inverse kinematics mode instead of using .solve_ik() in forward kinematics mode. Mode 2: a close-loop control, using ik. ''' pos = self.gripper.get_position() bounding_offset = 0.1 step_factor = 0.2 # small step factor mulitplied on the gradient step calculated by inverse kinematics max_itr = 20 # maximum moving iterations max_error = 0.1 # upper bound of distance error for movement at each call rotation_norm = 5. # factor for normalization of rotation values # check if state+action will be within of the bounding box, if so, move normally; else no action. # x_min < x < x_max and y_min < y < y_max and z > z_min if pos[0]+action[0]>POS_MIN[0]-bounding_offset and pos[0]+action[0]<POS_MAX[0]+bounding_offset \ and pos[1]+action[1] > POS_MIN[1]-bounding_offset and pos[1]+action[1] < POS_MAX[1]+bounding_offset \ and pos[2]+action[2] > POS_MIN[2]-bounding_offset: ''' there is a mismatch between the object set_orientation() and get_orientation(): the (x,y,z) in set_orientation() will be (y,x,-z) in get_orientation(). ''' ori_z = -self.agent_ee_tip.get_orientation()[ 2] # the minus is because the mismatch between the set and get target_pos = np.array(self.agent_ee_tip.get_position()) + np.array( action[:3]) diff = 1 itr = 0 # print('before: ', ori_z) while np.sum(np.abs(diff)) > max_error and itr < max_itr: itr += 1 # set pos in small step cur_pos = self.agent_ee_tip.get_position() diff = target_pos - cur_pos pos = cur_pos + step_factor * diff self.tip_target.set_position(pos.tolist()) self.pr.step() ''' one big step for z-rotation is enough ''' ori_z += rotation_norm * action[3] self.tip_target.set_orientation([0, np.pi, ori_z ]) # make gripper face downwards self.pr.step() else: # print("Potential Movement Out of the Bounding Box!") pass # no action if potentially out of the bounding box def reset(self): # Get a random position within a cuboid and set the target position max_itr = 10 pos = list(np.random.uniform(POS_MIN, POS_MAX)) self.target.set_position(pos) self.target.set_orientation([0, 0, 0]) self.pr.step() # changing the color or texture for domain randomization self.target.set_color( np.random.uniform(low=0, high=1, size=3).tolist() ) # set [r,g,b] 3 channel values of object color # set end position to be initialized if self.control_mode == 'end_position': # JointMode.IK self.agent.set_control_loop_enabled(True) self.tip_target.set_position( self.initial_tip_positions ) # cannot set joint positions directly due to in ik mode nor force/torch mode self.pr.step() # prevent stuck case itr = 0 while np.sum( np.abs( np.array(self.agent_ee_tip.get_position() - np.array(self.initial_tip_positions))) ) > 0.1 and itr < max_itr: itr += 1 self.step(np.random.uniform( -0.2, 0.2, 4)) # take random actions for preventing the stuck cases self.pr.step() elif self.control_mode == 'joint_velocity': # JointMode.FORCE self.agent.set_joint_positions( self.initial_joint_positions ) # sometimes the gripper is stuck, cannot get back to initial self.pr.step() # self.table.set_collidable(True) self.gripper_left_pad.set_collidable( True ) # set the pad on the gripper to be collidable, so as to check collision self.target.set_collidable(True) if np.sum(self.gripper.get_open_amount()) < 1.5: self.gripper.actuate(1, velocity=0.5) # open the gripper self.pr.step() if self.visual_control: return self._get_visual_state() else: return self._get_state() def step(self, action): ''' Move the robot arm according to the action. If control_mode=='joint_velocity', action is 7 dim of joint velocity values + 1 dim of gripper rotation. if control_mode=='end_position', action is 3 dim of tip (end of robot arm) position values + 1 dim of gripper rotation. ''' if self.control_mode == 'end_position': if action is None or action.shape[0] != 4: action = list(np.random.uniform(-0.1, 0.1, 4)) # random self._move(action) elif self.control_mode == 'joint_velocity': if action is None or action.shape[0] != 7: print('No actions or wrong action dimensions!') action = list(np.random.uniform(-0.1, 0.1, 7)) # random self.agent.set_joint_target_velocities( action) # Execute action on arm self.pr.step() else: raise NotImplementedError ax, ay, az = self.gripper.get_position() tx, ty, tz = self.target.get_position() # Reward is negative distance to target distance = (ax - tx)**2 + (ay - ty)**2 + (az - tz)**2 done = False # print(self.proximity_sensor.is_detected(self.target)) current_vision = self.vision_sensor.capture_rgb( ) # capture a screenshot of the view with vision sensor plt.imshow(current_vision) plt.savefig('./img/vision.png') reward = 0 # close the gripper if close enough to the object and the object is detected with the proximity sensor if distance < 0.1 and self.proximity_sensor.is_detected( self.target) == True: # make sure the gripper is open before grasping self.gripper.actuate(1, velocity=0.5) self.pr.step() self.gripper.actuate( 0, velocity=0.5 ) # if done, close the hand, 0 for close and 1 for open. self.pr.step() # Step the physics simulation if self._is_holding(): # reward for hold here! reward += 10 done = True else: self.gripper.actuate(1, velocity=0.5) self.pr.step() elif np.sum( self.gripper.get_open_amount() ) < 1.5: # if gripper is closed due to collision or esle, open it; .get_open_amount() return list of gripper joint values self.gripper.actuate(1, velocity=0.5) self.pr.step() else: pass reward -= np.sqrt(distance) if tz < self.initial_target_positions[ 2] - self.fall_down_offset: # the object fall off the table done = True reward = -self.reward_offset if self.visual_control: return self._get_visual_state(), reward, done, {} else: return self._get_state(), reward, done, {} def shutdown(self): self.pr.stop() self.pr.shutdown()
class EmptyContainer(Task): def init_task(self) -> None: self.large_container = Shape('large_container') self.target_container0 = Shape('small_container0') self.target_container1 = Shape('small_container1') self.success_detector0 = ProximitySensor('success0') self.success_detector1 = ProximitySensor('success1') self.target_waypoint = Dummy('waypoint3') self.spawn_boundary = SpawnBoundary([Shape('spawn_boundary')]) self.register_waypoint_ability_start(1, self._move_above_object) self.register_waypoints_should_repeat(self._repeat) self.bin_objects = [] def init_episode(self, index: int) -> List[str]: self._variation_index = index self.bin_objects = sample_procedural_objects(self.get_base(), 3) self.bin_objects_not_done = list(self.bin_objects) self.register_graspable_objects(self.bin_objects) self.spawn_boundary.clear() for ob in self.bin_objects: ob.set_position([0.0, 0.0, 0.2], relative_to=self.large_container, reset_dynamics=False) self.spawn_boundary.sample(ob, ignore_collisions=True, min_distance=0.05) target_pos = [-5.9605e-8, -2.5005e-1, +1.7e-1] conditions = [] target_color_name, target_color_rgb = colors[index] color_choice = np.random.choice(list(range(index)) + list(range(index + 1, len(colors))), size=1, replace=False)[0] _, distractor_color_rgb = colors[color_choice] if index % 2 == 0: self.target_container0.set_color(target_color_rgb) self.target_container1.set_color(distractor_color_rgb) for ob in self.bin_objects: conditions.append(DetectedCondition(ob, self.success_detector0)) else: self.target_container1.set_color(target_color_rgb) self.target_container0.set_color(distractor_color_rgb) for ob in self.bin_objects: conditions.append(DetectedCondition(ob, self.success_detector1)) target_pos[1] = -target_pos[1] self.target_waypoint.set_position(target_pos, relative_to=self.large_container, reset_dynamics=True) self.register_success_conditions( [ConditionSet(conditions, simultaneously_met=True)]) return [ 'empty the container in the to %s container' % target_color_name, 'clear all items from the large tray and put them in the %s ' 'tray' % target_color_name, 'move all objects from the large container and drop them into ' 'the smaller %s one' % target_color_name, 'remove whatever you find in the big box in the middle and ' 'leave them in the %s one' % target_color_name, 'grasp and move all objects into the %s container' % target_color_name ] def variation_count(self) -> int: return len(colors) def cleanup(self) -> None: [ob.remove() for ob in self.bin_objects if ob.still_exists()] self.bin_objects = [] def step(self) -> None: for ob in self.bin_objects_not_done: if self._variation_index % 2 == 0: if self.success_detector0.is_detected(ob): self.bin_objects_not_done.remove(ob) else: if self.success_detector1.is_detected(ob): self.bin_objects_not_done.remove(ob) def _move_above_object(self, waypoint): if len(self.bin_objects_not_done) <= 0: raise RuntimeError('Should not be here.') bin_obj = self.bin_objects_not_done[0] way_obj = waypoint.get_waypoint_object() way_obj.set_position(bin_obj.get_position()) x, y, _ = way_obj.get_orientation() _, _, z = bin_obj.get_orientation(relative_to=way_obj) way_obj.set_orientation([x, y, z]) def _repeat(self): return len(self.bin_objects_not_done) > 0
class GraspEnv(object): ''' Sawyer robot grasping a cuboid ''' def __init__(self, headless, control_mode='joint_velocity'): ''' parameters: :headless: bool, if True, no visualization, else with visualization. :control mode: str, 'end_position' or 'joint_velocity'. ''' # set public variables self.headless = headless # if headless is True, no visualization self.reward_offset = 10.0 # reward of achieving the grasping object self.reward_range = self.reward_offset # reward range for register gym env when using vectorized env wrapper self.penalty_offset = 1. # penalty value for undesired cases self.fall_down_offset = 0.1 # distance for judging the target object fall off the table self.metadata = [] # gym env argument self.control_mode = control_mode # the control mode of robotic arm: 'end_position' or 'joint_velocity' # launch and set up the scene, and set the proxy variables in represent of the counterparts in the scene self.pr = PyRep() # call the PyRep if control_mode == 'end_position': # need to use different scene, the one with all joints in inverse kinematics mode SCENE_FILE = join( dirname(abspath(__file__)), './scenes/sawyer_reacher_rl_new_ik.ttt' ) # scene with joints controlled by ik (inverse kinematics) elif control_mode == 'joint_velocity': # the scene with all joints in force/torch mode for forward kinematics SCENE_FILE = join( dirname(abspath(__file__)), './scenes/sawyer_reacher_rl_new.ttt' ) # scene with joints controlled by forward kinematics self.pr.launch(SCENE_FILE, headless=headless ) # lunch the scene, headless means no visualization self.pr.start() # start the scene self.agent = Sawyer() # get the robot arm in the scene self.gripper = BaxterGripper() # get the gripper in the scene self.gripper_left_pad = Shape( 'BaxterGripper_leftPad') # the left pad on the gripper finger self.proximity_sensor = ProximitySensor( 'BaxterGripper_attachProxSensor' ) # need the name of the sensor here self.vision_sensor = VisionSensor( 'Vision_sensor') # need the name of the sensor here self.table = Shape( 'diningTable') # the table in the scene for checking collision if control_mode == 'end_position': # control the robot arm by the position of its end using inverse kinematics self.agent.set_control_loop_enabled( True) # if false, inverse kinematics won't work self.action_space = np.zeros( 4) # 3 DOF end position control + 1 rotation of gripper elif control_mode == 'joint_velocity': # control the robot arm by directly setting velocity values on each joint, using forward kinematics self.agent.set_control_loop_enabled(False) self.action_space = np.zeros( 7 ) # 7 DOF velocity control, no need for extra control of end rotation, the 7th joint controls it. else: raise NotImplementedError self.observation_space = np.zeros( 17) # position and velocity of 7 joints + position of the target self.agent.set_motor_locked_at_zero_velocity(True) self.target = Shape('target') # get the target object self.agent_ee_tip = self.agent.get_tip( ) # a part of robot as the end of inverse kinematics chain for controlling self.tip_target = Dummy( 'Sawyer_target' ) # the target point of the tip (end of the robot arm) to move towards self.tip_pos = self.agent_ee_tip.get_position() # tip x,y,z position # set a proper initial robot gesture or tip position if control_mode == 'end_position': initial_pos = [0.3, 0.1, 0.9] self.tip_target.set_position(initial_pos) # one big step for rotation setting is enough, with reset_dynamics=True, set the rotation instantaneously self.tip_target.set_orientation( [0, np.pi, np.pi / 2], reset_dynamics=True ) # first two dimensions along x and y axis make gripper face downwards elif control_mode == 'joint_velocity': self.initial_joint_positions = [0.001815199851989746, -1.4224984645843506, \ 0.704303503036499, 2.54307222366333, 2.972468852996826, -0.4989511966705322, 4.105560302734375] # a proper initial gesture self.agent.set_joint_positions(self.initial_joint_positions) self.pr.step() self.initial_tip_positions = self.agent_ee_tip.get_position() self.initial_target_positions = self.target.get_position() def _get_state(self): ''' Return state containing arm joint positions/velocities & target position. ''' return np.array(self.agent.get_joint_positions() + # list, dim=7 self.agent.get_joint_velocities() + # list, dim=7 self.target.get_position()) # list, dim=3 def _is_holding(self): ''' Return the state of holding the target or not, return bool. ''' # Note that the collision check is not always accurate all the time, # for continuous collision frames, maybe only the first 4-5 frames of collision can be detected. pad_collide_object = self.gripper_left_pad.check_collision(self.target) if pad_collide_object and self.proximity_sensor.is_detected( self.target) == True: return True else: return False def _move(self, action, bounding_offset=0.15, step_factor=0.2, max_itr=20, max_error=0.05, rotation_norm=5.): ''' Move the end effector on robot arm according to the action with inverse kinematics for 'end_position' control mode; Inverse kinematics mode control is achieved through setting the tip target instead of using .solve_ik(), because sometimes the .solve_ik() does not function correctly. Mode: a close-loop proportional control, using ik. parameters: :bounding_offset: offset of bounding box outside the valid target position range, as valid and safe range of action :step_factor: small step factor mulitplied on the difference of current and desired position, i.e. proportional factor :max_itr: maximum moving iterations :max_error: upper bound of distance error for movement at each call :rotation_norm: factor for normalization of rotation values, since the action are of the same scale for each dimension ''' pos = self.gripper.get_position() # check if state+action will be within of the bounding box, if so, move normally; otherwise the action is not conducted. # i.e. x_min < x < x_max and y_min < y < y_max and z > z_min if pos[0]+action[0]>POS_MIN[0]-bounding_offset and pos[0]+action[0]<POS_MAX[0]+bounding_offset \ and pos[1]+action[1] > POS_MIN[1]-bounding_offset and pos[1]+action[1] < POS_MAX[1]+2*bounding_offset \ and pos[2]+action[2] > POS_MIN[2]-2*bounding_offset: # larger offset in z axis # there is a mismatch between the object set_orientation() and get_orientation(): # the (x,y,z) in set_orientation() will be (y,x,-z) in get_orientation(). ori_z = -self.agent_ee_tip.get_orientation( )[2] # the minus is because the mismatch between the set_orientation() and get_orientation() target_pos = np.array(self.agent_ee_tip.get_position()) + np.array( action[:3]) diff = 1 # intialization itr = 0 while np.sum(np.abs(diff)) > max_error and itr < max_itr: itr += 1 # set pos in small step cur_pos = self.agent_ee_tip.get_position() diff = target_pos - cur_pos # difference of current and target position, close-loop control pos = cur_pos + step_factor * diff # step small step according to current difference, to prevent that ik cannot be solved self.tip_target.set_position(pos.tolist()) self.pr.step( ) # every time when setting target tip, need to call simulation step to achieve it # one big step for z-rotation is enough, but small error still exists due to the ik solver ori_z += rotation_norm * action[ 3] # normalize the rotation values, as usually same action range is used in policy for both rotation and position self.tip_target.set_orientation([ 0, np.pi, ori_z ]) # make gripper face downwards and rotate ori_z along z axis self.pr.step() else: print("Potential Movement Out of the Bounding Box!") pass # no action if potentially moving out of the bounding box def reinit(self): ''' Reinitialize the environment, e.g. when the gripper is broken during exploration. ''' self.shutdown() # shutdown the original env first self.__init__(self.headless) # initialize with the same headless mode def reset(self, random_target=False): ''' Get a random position within a cuboid and set the target position. ''' # set target object if random_target: # randomize pos = list(np.random.uniform( POS_MIN, POS_MAX)) # sample from uniform in valid range self.target.set_position(pos) # random position else: # non-randomize self.target.set_position( self.initial_target_positions) # fixed position self.target.set_orientation([0, 0, 0]) self.pr.step() # set end position to be initialized if self.control_mode == 'end_position': # JointMode.IK self.agent.set_control_loop_enabled(True) # ik mode self.tip_target.set_position( self.initial_tip_positions ) # cannot set joint positions directly due to in ik mode or force/torch mode self.pr.step() # prevent stuck cases. as using ik for moving, stucking can make ik cannot be solved therefore not reset correctly, therefore taking # some random action when desired position is not reached. itr = 0 max_itr = 10 while np.sum( np.abs( np.array(self.agent_ee_tip.get_position() - np.array(self.initial_tip_positions))) ) > 0.1 and itr < max_itr: itr += 1 self.step(np.random.uniform( -0.2, 0.2, 4)) # take random actions for preventing the stuck cases self.pr.step() elif self.control_mode == 'joint_velocity': # JointMode.FORCE self.agent.set_joint_positions(self.initial_joint_positions) self.pr.step() # set collidable, for collision detection self.gripper_left_pad.set_collidable( True ) # set the pad on the gripper to be collidable, so as to check collision self.target.set_collidable(True) # open the gripper if it's not fully open if np.sum(self.gripper.get_open_amount()) < 1.5: self.gripper.actuate(1, velocity=0.5) self.pr.step() return self._get_state() # return current state of the environment def step(self, action): ''' Move the robot arm according to the action. If control_mode=='joint_velocity', action is 7 dim of joint velocity values + 1 dim rotation of gripper; if control_mode=='end_position', action is 3 dim of tip (end of robot arm) position values + 1 dim rotation of gripper; ''' # initialization done = False # episode finishes reward = 0 hold_flag = False # holding the object or not if self.control_mode == 'end_position': if action is None or action.shape[ 0] != 4: # check if action is valid print('No actions or wrong action dimensions!') action = list(np.random.uniform(-0.1, 0.1, 4)) # random self._move(action) elif self.control_mode == 'joint_velocity': if action is None or action.shape[ 0] != 7: # check if action is valid print('No actions or wrong action dimensions!') action = list(np.random.uniform(-0.1, 0.1, 7)) # random self.agent.set_joint_target_velocities( action) # Execute action on arm self.pr.step() else: raise NotImplementedError ax, ay, az = self.gripper.get_position() if math.isnan( ax): # capture the broken gripper cases during exploration print('Gripper position is nan.') self.reinit() done = True tx, ty, tz = self.target.get_position() offset = 0.08 # augmented reward: offset of target position above the target object sqr_distance = (ax - tx)**2 + (ay - ty)**2 + ( az - (tz + offset) )**2 # squared distance between the gripper and the target object ''' for visual-based control only, large time consumption! ''' # current_vision = self.vision_sensor.capture_rgb() # capture a screenshot of the view with vision sensor # plt.imshow(current_vision) # plt.savefig('./img/vision.png') # close the gripper if close enough to the object and the object is detected with the proximity sensor if sqr_distance < 0.1 and self.proximity_sensor.is_detected( self.target) == True: # make sure the gripper is open before grasping self.gripper.actuate(1, velocity=0.5) self.pr.step() self.gripper.actuate( 0, velocity=0.5 ) # if done, close the hand, 0 for close and 1 for open; velocity 0.5 ensures the gripper to close with in one frame self.pr.step() # Step the physics simulation if self._is_holding(): reward += self.reward_offset # extra reward for grasping the object done = True hold_flag = True else: self.gripper.actuate(1, velocity=0.5) self.pr.step() elif np.sum( self.gripper.get_open_amount() ) < 1.5: # if gripper is closed (not fully open) due to collision or esle, open it; get_open_amount() return list of gripper joint values self.gripper.actuate(1, velocity=0.5) self.pr.step() else: pass # the base reward is negative distance to target reward -= np.sqrt(sqr_distance) # case when the object fall off the table if tz < self.initial_target_positions[2] - self.fall_down_offset: done = True reward = -self.reward_offset # Augmented reward for orientation: better grasping gesture if the gripper has vertical orientation to the target object. # Note: the frame of gripper has a difference of pi/2 in z orientation as the frame of target. desired_orientation = np.concatenate( ([np.pi, 0], [self.target.get_orientation()[2] ])) # gripper vertical to target in z and facing downwards, rotation_penalty = -np.sum( np.abs( np.array(self.agent_ee_tip.get_orientation()) - desired_orientation)) rotation_norm = 0.02 reward += rotation_norm * rotation_penalty # Penalty for collision with the table if self.gripper_left_pad.check_collision(self.table): reward -= self.penalty_offset #print('Penalize collision with table.') if math.isnan(reward): # capture the cases of numerical problem reward = 0. return self._get_state(), reward, done, {'finished': hold_flag} def shutdown(self): ''' Close the simulator ''' self.pr.stop() self.pr.shutdown()
class ReacherEnv(object): def __init__(self, headless, control_mode='joint_velocity'): ''' :headless: bool, if True, no visualization, else with visualization. :control mode: str, 'end_position' or 'joint_velocity'. ''' self.headless = headless self.reward_offset = 10.0 # reward of achieving the grasping object self.reward_range = self.reward_offset # reward range for register gym env when using vectorized env wrapper self.fall_down_offset = 0.1 # for judging the target object fall off the table self.metadata = [] # gym env format self.control_mode = control_mode # the control mode of robotic arm: 'end_position' or 'joint_velocity' self.pr = PyRep() if control_mode == 'end_position': # need to use different scene, the one with all joints in inverse kinematics mode SCENE_FILE = join( dirname(abspath(__file__)), './scenes/sawyer_reacher_rl_new_ik.ttt' ) # scene with joints controlled by ik (inverse kinematics) elif control_mode == 'joint_velocity': # the scene with all joints in force/torch mode for forward kinematics SCENE_FILE = join( dirname(abspath(__file__)), './scenes/sawyer_reacher_rl_new.ttt' ) # scene with joints controlled by forward kinematics self.pr.launch(SCENE_FILE, headless=headless ) # lunch the scene, headless means no visualization self.pr.start() # start the scene self.agent = Sawyer() # get the robot arm in the scene self.gripper = BaxterGripper() # get the gripper in the scene self.gripper_left_pad = Shape( 'BaxterGripper_leftPad') # the pad on the gripper finger self.proximity_sensor = ProximitySensor( 'BaxterGripper_attachProxSensor' ) # need the name of the sensor here self.vision_sensor = VisionSensor( 'Vision_sensor') # need the name of the sensor here self.initial_joint_positions = [ 0.162, -1.109, 0.259, 1.866, 2.949, -0.811, 4.440 ] # a good staring gesture of robot, avoid stucking and colliding self.agent.set_joint_positions(self.initial_joint_positions) if control_mode == 'end_position': # control the robot arm by the position of its end using inverse kinematics self.agent.set_control_loop_enabled( True) # if false, ik won't work self.action_space = np.zeros( 4) # 3 DOF end position control + 1 rotation of gripper elif control_mode == 'joint_velocity': # control the robot arm by directly setting velocity values on each joint, forward kinematics self.agent.set_control_loop_enabled(False) self.action_space = np.zeros( 7 ) # 7 DOF velocity control, no need for extra control of end rotation, the 7th joint controls it. else: raise NotImplementedError self.observation_space = np.zeros( 17) # position and velocity of 7 joints + position of the target self.agent.set_motor_locked_at_zero_velocity(True) self.target = Shape('target') # get the target object self.agent_ee_tip = self.agent.get_tip( ) # a part of robot as the end of inverse kinematics chain for controlling self.tip_target = Dummy( 'Sawyer_target' ) # the target point of the tip (end of the robot arm) to move towards self.tip_pos = self.agent_ee_tip.get_position() # tip x,y,z position self.tip_quat = self.agent_ee_tip.get_quaternion( ) # tip rotation as quaternion, if not control the rotation # reference for reset self.initial_tip_positions = [0.3, 0.1, 1.] self.initial_z_rotation = np.pi / 2. self.initial_target_positions = self.target.get_position() def _get_state(self): ''' Return state containing arm joint angles/velocities & target position. ''' return np.array(self.agent.get_joint_positions() + # list self.agent.get_joint_velocities() + # list self.target.get_position()) # list def _is_holding(self): ''' Return is holding the target or not, return bool. ''' # Note that the collision check is not always accurate all the time, # for continuous conllision, maybe only the first 4-5 frames of collision can be detected pad_collide_object = self.gripper_left_pad.check_collision(self.target) if pad_collide_object and self.proximity_sensor.is_detected( self.target) == True: return True else: return False # def _move(self, action): # ''' Mode 0: as given by original examples in Pyrep repo ''' # # Deprecated, always fail the ik! Move the tip according to the action with inverse kinematics for 'end_position' control. # action=list(action) # pos=self.agent_ee_tip.get_position() # assert len(action) == len(pos) # for idx in range(len(pos)): # pos[idx] += action[idx] # print('pos: ', pos) # new_joint_angles = self.agent.solve_ik(pos, quaternion=self.tip_quat) # self.agent.set_joint_target_positions(new_joint_angles) # def _move(self, action): # ''' # Deprecated! # Move the tip according to the action with inverse kinematics for 'end_position' control; # with control of tip target in inverse kinematics mode instead of using .solve_ik() in forward kinematics mode. # Mode 1: an open-loop control, using ik; not as accurate as the closed-loop control. # ''' # pos=self.gripper.get_position() # bounding_offset=0.1 # robot_moving_unit=0.01 # the amount of single step move of robot, not accurate; the smaller the value, the smoother the movement. # # check if state+action will be within of the bounding box, if so, move normally; else no action. # # x_min < x < x_max and y_min < y < y_max and z > z_min # if pos[0]+action[0]>POS_MIN[0]-bounding_offset and pos[0]+action[0]<POS_MAX[0]+bounding_offset \ # and pos[1]+action[1] > POS_MIN[1]-bounding_offset and pos[1]+action[1] < POS_MAX[1]+bounding_offset \ # and pos[2]+action[2] > POS_MIN[2]-bounding_offset: # moving_loop_itr=int(np.sum(np.abs(action[:3]))/robot_moving_unit)+1 # adaptive number of moving steps, with minimal of 1 step; the larger it is, the more accurate for each movement. # small_step = list(1./moving_loop_itr*np.array(action)) # break the action into small steps, as the robot cannot move to the target position within one frame # ''' # there is a mismatch between the object set_orientation() and get_orientation(): # the (x,y,z) in set_orientation() will be (y,x,-z) in get_orientation(). # ''' # ori_z=-self.agent_ee_tip.get_orientation()[2] # the minus is because the mismatch between the set and get # assert len(small_step) == len(pos)+1 # 3 values for position, 1 value for rotation # for _ in range(moving_loop_itr): # for idx in range(len(pos)): # pos[idx] += small_step[idx] # self.tip_target.set_position(pos) # self.pr.step() # ''' deprecated! no need to use small steps for the rotation with reset_dynamics=True''' # ori_z+=small_step[3] # change the orientation along z-axis with a small step # self.tip_target.set_orientation([0, np.pi, ori_z], reset_dynamics=True) # make gripper face downwards # self.pr.step() # ''' one big step for z-rotation is enough, with reset_dynamics=True, set the rotation instantaneously ''' # # ori_z+=action[3] # # self.tip_target.set_orientation([0, np.pi, ori_z], reset_dynamics=True) # make gripper face downwards # # self.pr.step() # else: # print("Potential Movement Out of the Bounding Box!") # pass # no action if potentially out of the bounding box def _move(self, action, bounding_offset=0.15, step_factor=0.4, max_itr=40, max_error=0.01, rotation_norm=5.): ''' Move the tip according to the action with inverse kinematics for 'end_position' control; with control of tip target in inverse kinematics mode instead of using .solve_ik() in forward kinematics mode. Mode 2: a close-loop control, using ik. parameters: :bounding_offset: offset of bounding box and valid target position range, bounding box is larger :step_factor: small step factor mulitplied on the gradient step calculated by inverse kinematics :max_itr=20: maximum moving iterations :max_error: upper bound of distance error for movement at each call :rotation_norm: factor for normalization of rotation values ''' pos = self.gripper.get_position() # check if state+action will be within of the bounding box, if so, move normally; else action is not conducted. # i.e. x_min < x < x_max and y_min < y < y_max and z > z_min if pos[0]+action[0]>POS_MIN[0]-bounding_offset and pos[0]+action[0]<POS_MAX[0]+bounding_offset \ and pos[1]+action[1] > POS_MIN[1]-bounding_offset and pos[1]+action[1] < POS_MAX[1]+2*bounding_offset \ and pos[2]+action[2] > POS_MIN[2]-2*bounding_offset: # larger offset in z axis ''' there is a mismatch between the object set_orientation() and get_orientation(): the (x,y,z) in set_orientation() will be (y,x,-z) in get_orientation(). ''' ori_z = -self.agent_ee_tip.get_orientation()[ 2] # the minus is because the mismatch between the set and get target_pos = np.array(self.agent_ee_tip.get_position()) + np.array( action[:3]) diff = 1 itr = 0 while np.sum(np.abs(diff)) > max_error and itr < max_itr: # print(action[:3], np.sum(np.abs(diff))) itr += 1 # set pos in small step cur_pos = self.agent_ee_tip.get_position() diff = target_pos - cur_pos # difference of current and target position, close-loop control pos = cur_pos + step_factor * diff # step small step according to current difference, to prevent that ik cannot be solved self.tip_target.set_position(pos.tolist()) self.pr.step( ) # every time set target tip, need to call simulation step to achieve it ''' one big step for z-rotation is enough, but still small error exists ''' ori_z += rotation_norm * action[ 3] # normalize the rotation values, as usually same action range is used in policy for both rotation and position self.tip_target.set_orientation( [0, 3.1415, ori_z]) # make gripper face downwards and rotate ori_z self.pr.step() # print('after: ', ori_z, -self.agent_ee_tip.get_orientation()[2]) else: print("Potential Movement Out of the Bounding Box!") pass # no action if potentially moving out of the bounding box def reset(self, random_target=False): ''' Reset with the _move() function. Get a random position within a cuboid and set the target position. ''' if random_target: # randomly set the target position pos = list(np.random.uniform( POS_MIN, POS_MAX)) # sample from uniform in valid range self.target.set_position(pos) # random position else: self.target.set_position( self.initial_target_positions) # fixed position self.target.set_orientation([0, 0, 0]) self.pr.step() # set end position to be initialized if self.control_mode == 'end_position': # JointMode.IK curr_pos = self.agent_ee_tip.get_position() a = np.array(self.initial_tip_positions) - np.array(curr_pos) self._move(np.concatenate((a, [self.initial_z_rotation])), max_itr=80) # prevent stuck cases, as using ik for moving, stucking can make ik cannot be solved therefore not reset correctly itr = 0 max_itr = 10 while np.sum( np.abs( np.array(self.agent_ee_tip.get_position() - np.array(self.initial_tip_positions))) ) > 0.1 and itr < max_itr: itr += 1 self.step(np.random.uniform( -0.2, 0.2, 4)) # take random actions for preventing the stuck cases self.pr.step() elif self.control_mode == 'joint_velocity': # JointMode.FORCE self.agent.set_joint_positions( self.initial_joint_positions ) # sometimes the gripper is stuck, cannot get back to initial self.pr.step() # set collidable, for collision detection self.gripper_left_pad.set_collidable( True ) # set the pad on the gripper to be collidable, so as to check collision self.target.set_collidable(True) # open the gripper if it's not fully open if np.sum(self.gripper.get_open_amount()) < 1.5: self.gripper.actuate(1, velocity=0.5) self.pr.step() return self._get_state() # return current state of the environment # def reset(self, random_target=False): # ''' # Deprecated! # Get a random position within a cuboid and set the target position. # ''' # print('reset') # if random_target: # randomly set the target position # pos = list(np.random.uniform(POS_MIN, POS_MAX)) # sample from uniform in valid range # self.target.set_position(pos) # random position # else: # self.target.set_position(self.initial_target_positions) # fixed position # self.target.set_orientation([0,0,0]) # # set end position to be initialized # if self.control_mode == 'end_position': # JointMode.IK # self.agent.set_control_loop_enabled(True) # self.tip_target.set_position(self.initial_tip_positions) # cannot set joint positions directly due to in ik mode nor force/torch mode # self.pr.step() # # prevent stuck cases, as using ik for moving, stucking can make ik cannot be solved therefore not reset correctly # itr=0 # max_itr=10 # while np.sum(np.abs(np.array(self.agent_ee_tip.get_position()-np.array(self.initial_tip_positions))))>0.1 and itr<max_itr: # itr+=1 # self.step(np.random.uniform(-0.2,0.2,4)) # take random actions for preventing the stuck cases # self.pr.step() # elif self.control_mode == 'joint_velocity': # JointMode.FORCE # self.agent.set_joint_positions(self.initial_joint_positions) # sometimes the gripper is stuck, cannot get back to initial # # set collidable, for collision detection # self.gripper_left_pad.set_collidable(True) # set the pad on the gripper to be collidable, so as to check collision # self.target.set_collidable(True) # # open the gripper if it's not fully open # if np.sum(self.gripper.get_open_amount())<1.5: # self.gripper.actuate(1, velocity=0.5) # self.pr.step() # return self._get_state() # return current state of the environment def step(self, action): ''' Move the robot arm according to the action. If control_mode=='joint_velocity', action is 7 dim of joint velocity values + 1 dim rotation of gripper; if control_mode=='end_position', action is 3 dim of tip (end of robot arm) position values + 1 dim rotation of gripper; ''' done = False if self.control_mode == 'end_position': if action is None or action.shape[0] != 4: print('No actions or wrong action dimensions!') action = list(np.random.uniform(-0.1, 0.1, 4)) # random # print(action, self.agent_ee_tip.get_position()) self._move(action) elif self.control_mode == 'joint_velocity': if action is None or action.shape[0] != 7: print('No actions or wrong action dimensions!') action = list(np.random.uniform(-0.1, 0.1, 7)) # random self.agent.set_joint_target_velocities( action) # Execute action on arm self.pr.step() # if no pr.step() the action won't be conducted else: raise NotImplementedError ax, ay, az = self.gripper.get_position() if math.isnan( ax ): # capture the case when the gripper is broken during exporation self.__init__(headless=self.headless) done = True tx, ty, tz = self.target.get_position() distance = (ax - tx)**2 + (ay - ty)**2 + ( az - tz)**2 # distance between the gripper and the target object ''' for visual-based control only, large time consumption! ''' # current_vision = self.vision_sensor.capture_rgb() # capture a screenshot of the view with vision sensor # plt.imshow(current_vision) # plt.savefig('./img/vision.png') reward = 0 # close the gripper if close enough to the object and the object is detected with the proximity sensor if distance < 0.1 and self.proximity_sensor.is_detected( self.target) == True: # make sure the gripper is open before grasping self.gripper.actuate(1, velocity=0.5) self.pr.step() self.gripper.actuate( 0, velocity=0.5 ) # if done, close the hand, 0 for close and 1 for open; velocity 0.5 ensures the gripper to close with in one frame self.pr.step() # Step the physics simulation if self._is_holding(): # reward for hold here! reward += self.reward_offset # extra reward for grasping the object done = True else: self.gripper.actuate(1, velocity=0.5) self.pr.step() elif np.sum( self.gripper.get_open_amount() ) < 1.5: # if gripper is closed (not fully open) due to collision or esle, open it; .get_open_amount() return list of gripper joint values self.gripper.actuate(1, velocity=0.5) self.pr.step() else: pass reward -= np.clip( np.sqrt(distance), 0, self.reward_offset ) # Reward is negative distance to target; clipped for removing abnormal cases if tz < self.initial_target_positions[ 2] - self.fall_down_offset: # the object fall off the table done = True reward = -self.reward_offset # can also set reward for orientation, same orientation for target and gripper, they are actually vertical, so can grasp # reward += np.sqrt(np.array(self.agent_ee_tip.get_orientation())-np.array(self.target.get_orientation())) return self._get_state(), reward, done, {} def shutdown(self): ''' Close the simulator ''' self.pr.stop() self.pr.shutdown()