Esempio n. 1
0
class TestProximitySensors(TestCore):

    def setUp(self):
        super().setUp()
        self.sensor = ProximitySensor('proximity_sensor')

    def test_is_detected(self):
        ob1 = Shape('simple_model')
        ob2 = Shape('dynamic_cube')
        self.assertTrue(self.sensor.is_detected(ob1))
        self.assertFalse(self.sensor.is_detected(ob2))
Esempio n. 2
0
class TestProximitySensors(TestCore):
    def setUp(self):
        super().setUp()
        self.sensor = ProximitySensor('proximity_sensor')

    def test_read(self):
        self.pyrep.step()
        distance, object = self.sensor.read()
        self.pyrep.step()
        self.assertAlmostEqual(distance, 0.1)

    def test_is_detected(self):
        ob1 = Shape('simple_model')
        ob2 = Shape('dynamic_cube')
        self.assertTrue(self.sensor.is_detected(ob1))
        self.assertFalse(self.sensor.is_detected(ob2))
Esempio n. 3
0
class WipeDesk(Task):
    def init_task(self) -> None:
        self.dirt_spots = []
        self.sponge = Shape('sponge')
        self.sensor = ProximitySensor('sponge_sensor')
        self.register_graspable_objects([self.sponge])

        boundaries = [Shape('dirt_boundary')]
        _, _, self.z_boundary = boundaries[0].get_position()
        self.b = SpawnBoundary(boundaries)

    def init_episode(self, index: int) -> List[str]:
        self._place_dirt()
        self.register_success_conditions([EmptyCondition(self.dirt_spots)])
        return [
            'wipe dirt off the desk', 'use the sponge to clean up the desk',
            'remove the dirt from the desk',
            'grip the sponge and wipe it back and forth over any dirt you '
            'see', 'clean up the mess', 'wipe the dirt up'
        ]

    def variation_count(self) -> int:
        return 1

    def step(self) -> None:
        for d in self.dirt_spots:
            if self.sensor.is_detected(d):
                self.dirt_spots.remove(d)
                d.remove()

    def cleanup(self) -> None:
        for d in self.dirt_spots:
            d.remove()
        self.dirt_spots = []

    def _place_dirt(self):
        for i in range(DIRT_POINTS):
            spot = Shape.create(type=PrimitiveShape.CUBOID,
                                size=[.005, .005, .001],
                                mass=0,
                                static=True,
                                respondable=False,
                                renderable=True,
                                color=[0.58, 0.29, 0.0])
            spot.set_parent(self.get_base())
            spot.set_position([-1, -1, self.z_boundary + 0.001])
            self.b.sample(spot,
                          min_distance=0.00,
                          min_rotation=(0.00, 0.00, 0.00),
                          max_rotation=(0.00, 0.00, 0.00))
            self.dirt_spots.append(spot)
        self.b.clear()
Esempio n. 4
0
class SuctionCup(Shape):
    """Represents all suction cups.
    """
    def __init__(self, count: int, name: str, base_name: str = None):
        suffix = '' if count == 0 else '#%d' % (count - 1)
        super().__init__(name + suffix if base_name is None else base_name +
                         suffix)
        self._proximity_sensor = ProximitySensor('%s_sensor%s' %
                                                 (name, suffix))
        self._attach_point = ForceSensor('%s_connection%s' % (name, suffix))
        self._old_parents = []
        self._grasped_objects = []

    def grasp(self, obj: Object) -> bool:
        """Attach the object to the suction cup if it is detected.

        Note: The does not move the object up to the suction cup. Therefore, the
        proximity sensor should have a short range in order for the suction
        grasp to look realistic.

        :param obj: The object to grasp if detected.
        :return: True if the object was detected/grasped.
        """
        detected = self._proximity_sensor.is_detected(obj)
        # Check if detected and that we are not already grasping it.
        if detected and obj not in self._grasped_objects:
            self._grasped_objects.append(obj)
            self._old_parents.append(obj.get_parent())  # type: ignore
            obj.set_parent(self._attach_point, keep_in_place=True)
        return detected

    def release(self) -> None:
        """Release any objects that have been sucked.

        Note: The does not actuate the gripper, but instead simply detaches any
        grasped objects.
        """
        for grasped_obj, old_parent in zip(self._grasped_objects,
                                           self._old_parents):
            # Check if the object still exists
            if grasped_obj.still_exists():
                grasped_obj.set_parent(old_parent, keep_in_place=True)
        self._grasped_objects = []
        self._old_parents = []

    def get_grasped_objects(self) -> List[Object]:
        """Gets the objects that are currently in the suction cup.

        :return: A list of grasped objects.
        """
        return self._grasped_objects
Esempio n. 5
0
class GraspEnv(object):
    def __init__(self, headless, control_mode='joint_velocity'):
        self.headless = headless
        self.reward_offset = 10.0
        self.reward_range = self.reward_offset
        self.penalty_offset = 1
        #self.penalty_offset = 1.
        self.fall_down_offset = 0.1
        self.metadata = []  #gym env argument
        self.control_mode = control_mode

        #launch and setup the scene, and set the proxy variables in present of the counterparts in the scene
        self.pr = PyRep()
        if control_mode == 'end_position':
            SCENE_FILE = join(dirname(abspath(__file__)),
                              './scenes/UnicarRobot_ik.ttt')
        elif control_mode == 'joint_velocity':
            SCENE_FILE = join(dirname(abspath(__file__)),
                              './scenes/UnicarRobot.ttt')
        self.pr.launch(SCENE_FILE, headless=headless)
        self.pr.start()
        self.agent = UnicarRobotArm()  #drehkranz + UR10
        #self.gripper = UnicarRobotGreifer()#suction
        #self.suction = UnicarRobotGreifer()
        self.suction = Shape("UnicarRobotGreifer_body_sub0")
        self.proximity_sensor = ProximitySensor('UnicarRobotGreifer_sensor')
        self.table = Shape('UnicarRobotTable')
        self.carbody = Shape('UnicarRobotCarbody')

        if control_mode == 'end_position':
            self.agent.set_control_loop_enabled(True)
            self.action_space = np.zeros(4)
        elif control_mode == 'joint_velocity':
            self.agent.set_control_loop_enabled(False)
            self.action_space = np.zeros(7)
        else:
            raise NotImplementedError
        #self.observation_space = np.zeros(17)
        self.observation_space = np.zeros(20)
        self.agent.set_motor_locked_at_zero_velocity(True)
        self.target = Shape("UnicarRobotTarget")  #Box
        self.agent_ee_tip = self.agent.get_tip()
        self.tip_target = Dummy('UnicarRobotArm_target')
        self.tip_pos = self.agent_ee_tip.get_position()

        # set a proper initial robot gesture or tip position
        if control_mode == 'end_position':
            initial_pos = [0, 1.5, 1.6]
            self.tip_target.set_position(initial_pos)
            #one big step for rotatin is enough, with reset_dynamics = True, set the rotation instantaneously
            self.tip_target.set_orientation([0, 0, 0], reset_dynamics=True)
        elif control_mode == 'joint_velocity':
            self.initial_joint_positions = [0, 0, 0, 0, 0, 0, 0]
            self.agent.set_joint_positions(self.initial_joint_positions)
        self.pr.step()
        self.initial_tip_positions = self.agent_ee_tip.get_position()
        self.initial_target_positions = self.target.get_position()

    def _get_state(self):
        '''
        Return state containging arm joint positions/velocities & target position.
        '''
        return np.array(self.agent.get_joint_positions() +
                        self.agent.get_joint_velocities() +
                        self.agent_ee_tip.get_position() +
                        self.agent_ee_tip.get_orientation())  #all 20

    def _is_holding(self):
        '''
        Return the state of holding the UnicarRobotTarget or not, return bool value
        '''
        if self.proximity_sensor.is_detected(self.target) == True:
            return True
        else:
            return False

    def _move(self,
              action,
              bounding_offset=0.15,
              step_factor=0.2,
              max_itr=20,
              max_error=0.05,
              rotation_norm=5):
        pos = self.suction.get_position()

        if pos[0]+action[0] > POS_MIN[0]-bounding_offset and pos[0]+action[0] < POS_MAX[0]-bounding_offset \
            and pos[1]+action[1] > POS_MIN[1]-bounding_offset and pos[1]+action[1] < POS_MAX[1]-bounding_offset \
            and pos[2]+action[2] > POS_MIN[2]-2*bounding_offset:

            ori_z = -self.agent_ee_tip.get_orientation(
            )[2]  # the minus is because the mismatch between the set_orientation() and get_orientation()
            #ori_z = self.agent_ee_tip.get_orientation()[2]
            target_pos = np.array(self.agent_ee_tip.get_position() +
                                  np.array(action[:3]))
            diff = 1
            itr = 0
            while np.sum(np.abs(diff)) > max_error and itr < max_itr:
                itr += 1
                # set pos in small step
                cur_pos = self.agent_ee_tip.get_position()
                diff = target_pos - cur_pos
                pos = cur_pos + step_factor * diff
                self.tip_target.set_position(pos.tolist())
                self.pr.step()

            ori_z += rotation_norm * action[3]
            self.tip_target.set_orientation([0, np.pi, ori_z])
            self.pr.step()

        else:
            print("Potantial Movement Out of the Bounding Box!")
            self.pr.step()

    def reinit(self):
        self.shutdown()
        self.__init__(self.headless)

    def reset(self, random_target=False):
        '''
        Get a random position within a cuboid and set the target position
        '''
        # set target object
        if random_target:
            pos = list(np.random.uniform(POS_MIN, POS_MAX))
            self.target.set_position(pos)
        else:
            self.target.set_position(self.initial_target_positions)
        self.target.set_orientation([0, 0, 0])
        self.pr.step()

        #set end position to be initialized
        if self.control_mode == 'end_position':
            self.agent.set_control_loop_enabled(True)  # IK mode
            self.tip_target.set_position(self.initial_tip_positions)
            self.pr.step()
            itr = 0
            max_itr = 10
            while np.sum(
                    np.abs(
                        np.array(self.agent_ee_tip.get_position() -
                                 np.array(self.initial_tip_positions)))
            ) > 0.1 and itr < max_itr:
                itr += 1
                self.step(np.random.uniform(-0.2, 0.2, 4))
                self.pr.step()
        elif self.control_mode == 'joint_velocity':  #JointMode Force
            self.agent.set_joint_positions(self.initial_joint_positions)
            self.pr.step()

        #set collidable, for collision detection
        #self.gripper.set_collidable(True)
        self.suction.set_collidable(True)
        self.target.set_collidable(True)

        return self._get_state()  #return the current state of the environment

    def step(self, action):
        '''
        move the robot arm according to the control mode
        if control_mode == 'end_position' then action is 3 dim of tip (end of robot arm) position values + 1 dim rotation of suction
        if control_mode == 'joint_velocity' then action is 7 dim of joint velocity + 1 dim of rotation of suction
        '''
        #initialization
        done = False  #episode finishes
        reward = 0
        hold_flag = False  #hold the object or not
        if self.control_mode == 'end_position':
            if action is None or action.shape[0] != 4:  #check action is valid
                print('No actions or wrong action dimensions!')
                action = list(np.random.uniform(-0.1, 0.1, 4))
            self._move(action)
        elif self.control_mode == 'joint_velocity':
            if action is None or action.shape[0] != 7:  #???
                print('No actions or wrong action dimensions!')
                action = list(np.random.uniform(-1, 1, 7))
            self.agent.set_joint_target_velocities(action)
            self.pr.step()

        else:
            raise NotImplementedError

        #ax,ay,az = self.gripper.get_position()#gripper position
        ax, ay, az = self.suction.get_position()  #suction position
        if math.isnan(
                ax):  #capture the broken suction cases during exploration
            print("Suction position is nan.")
            self.reinit()
            done = True

        desired_position_tip = [0.0, 1.5513, 1.74]
        desired_orientation_tip = [-np.pi, 0, 0.001567]
        tip_x, tip_y, tip_z = self.agent_ee_tip.get_position(
        )  #end_effector position
        tip_row, tip_pitch, tip_yaw = self.agent_ee_tip.get_orientation(
        )  #end_effector orientation
        tx, ty, tz = self.target.get_position()  #box position
        offset = 0.312  #augmented reward: offset of target position above the target object
        #square distance between the gripper and the target object
        sqr_distance = np.sqrt((tip_x - desired_position_tip[0])**2 +
                               (tip_y - desired_position_tip[1])**2 +
                               (tip_z - desired_position_tip[2])**2)
        sqr_orientation = np.sqrt((tip_row - desired_orientation_tip[0])**2 +
                                  (tip_pitch - desired_orientation_tip[1])**2 +
                                  (tip_yaw - desired_orientation_tip[2])**2)
        ''' for visual-based control only, large time consumption! '''
        # current_vision = self.vision_sensor.capture_rgb()  # capture a screenshot of the view with vision sensor
        # plt.imshow(current_vision)
        # plt.savefig('./img/vision.png')
        desired_orientation = [0, 0, -np.pi / 2]
        desired_orientation_tip = [-np.pi, 0, 0.001567]
        #Enable the suction if close enough to the object and the object is detected with the proximity sensor
        if sqr_distance < 0.001 and self.proximity_sensor.is_detected(
                self.target) == True and sqr_orientation < 0.001:
            #make sure the suction is not worked
            self.suction.release()
            self.pr.step()
            self.suction.grasp(self.target)
            self.pr.step()

            if self._is_holding():
                reward += self.reward_offset
                done = True
                hold_flag = True
            else:
                self.suction.release()
                self.pr.step()
        else:
            pass

        #the base reward is negative distance from suction to target
        #reward -= (np.sqrt(sqr_distance))

        #case when the object is fall off the table
        if tz < self.initial_target_positions[
                2] - self.fall_down_offset:  #tz is target(box) position in z direction
            done = True
            reward -= self.reward_offset

        # Augmented reward for orientation: better grasping gesture if the suction has vertical orientation to the target object

        desired_position_tip = [0.0, 1.5513, 1.74]
        tip_x, tip_y, tip_z = self.agent_ee_tip.get_position()
        tip_row, tip_pitch, tip_yaw = self.agent_ee_tip.get_orientation()

        reward -= (np.sqrt((tip_x - desired_position_tip[0])**2 +
                           (tip_y - desired_position_tip[1])**2 +
                           (tip_z - desired_position_tip[2])**2) +
                   np.sqrt((tip_row - desired_orientation_tip[0])**2 +
                           (tip_pitch - desired_orientation_tip[1])**2 +
                           (tip_yaw - desired_orientation_tip[2])**2))

        #Penalty for collision with the table
        if self.suction.check_collision(
                self.table) or self.suction.check_collision(
                    self.carbody) or self.agent.check_collision(
                        self.table) or self.suction.check_collision(
                            self.target) or self.agent.check_collision(
                                self.target):
            reward -= self.penalty_offset

        if math.isnan(reward):
            reward = 0.

        return self._get_state(), reward, done, {'finished': hold_flag}

    def shutdown(self):
        '''close the simulator'''
        self.pr.stop()
        self.pr.shutdown()
Esempio n. 6
0
class Gripper(RobotComponent):
    """Represents all types of end-effectors, e.g. grippers.
    """

    def __init__(self, count: int, name: str, joint_names: List[str]):
        super().__init__(count, name, joint_names)
        suffix = '' if count == 0 else '#%d' % (count - 1)
        prox_name = '%s_attachProxSensor%s' % (name, suffix)
        attach_name = '%s_attachPoint%s' % (name, suffix)
        self._proximity_sensor = ProximitySensor(prox_name)
        self._attach_point = ForceSensor(attach_name)
        self._old_parents: List[Object] = []
        self._grasped_objects: List[Object] = []
        self._prev_positions = [None] * len(joint_names)
        self._prev_vels = [None] * len(joint_names)  # Used to stop oscillating

        self._touch_sensors = []
        i = 0
        while True:
            fname = '%s_touchSensor%d%s' % (name, i, suffix)
            if not ForceSensor.exists(fname):
                break
            self._touch_sensors.append(ForceSensor(fname))
            i += 1

    def grasp(self, obj: Object) -> bool:
        """Grasp object if it is detected.

        Note: The does not actuate the gripper, but instead simply attaches the
        detected object to the gripper to 'fake' a grasp.

        :param obj: The object to grasp if detected.
        :return: True if the object was detected/grasped.
        """
        detected = self._proximity_sensor.is_detected(obj)
        # Check if detected and that we are not already grasping it.
        if detected and obj not in self._grasped_objects:
            self._grasped_objects.append(obj)
            self._old_parents.append(obj.get_parent())  # type: ignore
            obj.set_parent(self._attach_point, keep_in_place=True)
        return detected

    def release(self) -> None:
        """Release any grasped objects.

        Note: The does not actuate the gripper, but instead simply detaches any
        grasped objects.
        """
        for grasped_obj, old_parent in zip(
                self._grasped_objects, self._old_parents):
            # Check if the object still exists
            if grasped_obj.still_exists():
                grasped_obj.set_parent(old_parent, keep_in_place=True)
        self._grasped_objects = []
        self._old_parents = []

    def get_grasped_objects(self) -> List[Object]:
        """Gets the objects that are currently grasped.

        :return: A list of grasped objects.
        """
        return self._grasped_objects

    def actuate(self, amount: float, velocity: float) -> bool:
        """Actuate the gripper, but return after each simulation step.

        The functions attempts to open/close the gripper according to 'amount',
        where 1 represents open, and 0 represents close. The user should
        iteratively call this function until it returns True.

        This is a convenience method. If you would like direct control of the
        gripper, use the :py:class:`RobotComponent` methods instead.

        For some grippers, this method will need to be overridden.

        :param amount: A float between 0 and 1 representing the gripper open
            state. 1 means open, whilst 0 means closed.
        :param velocity: The velocity to apply to the gripper joints.


        :raises: ValueError if 'amount' is not between 0 and 1.

        :return: True if the gripper has reached its open/closed limits, or if
            the 'max_force' has been exerted.
        """
        if not (0.0 <= amount <= 1.0):
            raise ValueError("'open_amount' should be between 0 and 1.'")
        _, joint_intervals_list = self.get_joint_intervals()
        joint_intervals = np.array(joint_intervals_list)

        # Decide on if we need to open or close
        joint_range = joint_intervals[:, 1] - joint_intervals[:, 0]
        target_pos = joint_intervals[:, 0] + (joint_range * amount)

        current_positions = self.get_joint_positions()
        done = True
        for i, (j, target, cur, prev) in enumerate(zip(
                self.joints, target_pos, current_positions,
                self._prev_positions)):
            # Check if the joint has moved much
            not_moving = (prev is not None and
                          np.fabs(cur - prev) < POSITION_ERROR)
            reached_target = np.fabs(target - cur) < POSITION_ERROR
            vel = -velocity if cur - target > 0 else velocity
            oscillating = (self._prev_vels[i] is not None and
                           vel != self._prev_vels[i])
            if not_moving or reached_target or oscillating:
                j.set_joint_target_velocity(0)
                continue
            done = False
            vel = -velocity if cur - target > 0 else velocity
            self._prev_vels[i] = vel  # type: ignore
            j.set_joint_target_velocity(vel)
        self._prev_positions = current_positions  # type: ignore
        if done:
            self._prev_positions = [None] * self._num_joints
            self._prev_vels = [None] * self._num_joints
            self.set_joint_target_velocities([0.0] * self._num_joints)
        return done

    def get_open_amount(self) -> List[float]:
        """Gets the gripper open state. 1 means open, whilst 0 means closed.

        :return: A list of floats between 0 and 1 representing the gripper open
            state for each joint. 1 means open, whilst 0 means closed.
        """
        _, joint_intervals_list = self.get_joint_intervals()
        joint_intervals = np.array(joint_intervals_list)
        joint_range = joint_intervals[:, 1] - joint_intervals[:, 0]
        return list(np.clip((np.array(
            self.get_joint_positions()) - joint_intervals[:, 0]) /
                            joint_range, 0.0, 1.0))

    def get_touch_sensor_forces(self) -> List[List[float]]:
        if len(self._touch_sensors) == 0:
            raise NotImplementedError('No touch sensors for this robot!')
        return [ts.read()[0] for ts in self._touch_sensors]
Esempio n. 7
0
class ReacherEnv(object):
    def __init__(self,
                 headless,
                 control_mode='end_position',
                 visual_control=False):
        '''
        :visual_control: bool, controlled by visual state or not (vector state).
        '''
        self.reward_offset = 10.0
        self.reward_range = self.reward_offset  # reward range for register gym env when using vectorized env wrapper
        self.fall_down_offset = 0.1  # for judging the target object fall off the table
        self.metadata = []  # gym env format
        self.visual_control = visual_control
        self.control_mode = control_mode
        self.pr = PyRep()
        if control_mode == 'end_position':  # need to use different scene, the one with all joints in inverse kinematics mode
            SCENE_FILE = join(dirname(abspath(__file__)),
                              './scenes/sawyer_reacher_rl_new_ik.ttt')
        elif control_mode == 'joint_velocity':  # the scene with all joints in force/torche mode for forward kinematics
            SCENE_FILE = join(dirname(abspath(__file__)),
                              './scenes/sawyer_reacher_rl_new.ttt')
        self.pr.launch(SCENE_FILE, headless=headless)
        self.pr.start()
        self.agent = Sawyer()
        self.gripper = BaxterGripper()
        self.gripper_left_pad = Shape(
            'BaxterGripper_leftPad')  # the pad on the gripper finger
        self.proximity_sensor = ProximitySensor(
            'BaxterGripper_attachProxSensor'
        )  # need the name of the sensor here
        self.vision_sensor = VisionSensor(
            'Vision_sensor')  # need the name of the sensor here
        if control_mode == 'end_position':
            self.agent.set_control_loop_enabled(True)  # if false, won't work
            self.action_space = np.zeros(
                4)  # 3 DOF end position control + 1 rotation of gripper
        elif control_mode == 'joint_velocity':
            self.agent.set_control_loop_enabled(False)
            self.action_space = np.zeros(
                8)  # 7 DOF velocity control + 1 rotation of gripper
        else:
            raise NotImplementedError
        if self.visual_control == False:
            self.observation_space = np.zeros(
                17
            )  # position and velocity of 7 joints + position of the target
        else:
            self.observation_space = np.zeros(100)  # dim of img!
        self.agent.set_motor_locked_at_zero_velocity(True)
        self.target = Shape('target')  # object
        self.tip_target = Dummy(
            'Sawyer_target')  # the target point of the tip to move towards
        # self.table = Shape('diningTable')
        self.agent_ee_tip = self.agent.get_tip()
        self.tip_pos = self.agent_ee_tip.get_position()
        self.tip_quat = self.agent_ee_tip.get_quaternion(
        )  # tip rotation as quaternion, if not control the rotation

        # set a proper initial gesture/tip position
        if control_mode == 'end_position':
            # agent_position=self.agent.get_position()
            # initial_pos_offset = [0.4, 0.3, 0.2]  # initial relative position of gripper to the whole arm
            # initial_pos = [(a + o) for (a, o) in zip(agent_position, initial_pos_offset)]
            initial_pos = [0.3, 0.1, 0.9]
            self.tip_target.set_position(initial_pos)
            # one big step for rotation setting is enough, with reset_dynamics=True, set the rotation instantaneously
            self.tip_target.set_orientation(
                [0, 3.1415, 1.5707],
                reset_dynamics=True)  # make gripper face downwards
            self.pr.step()
        elif control_mode == 'joint_velocity':
            self.initial_joint_positions = [
                0.001815199851989746, -1.4224984645843506, 0.704303503036499,
                2.54307222366333, 2.972468852996826, -0.4989511966705322,
                4.105560302734375
            ]
            self.agent.set_joint_positions(self.initial_joint_positions)

        self.initial_joint_positions = self.agent.get_joint_positions()
        self.initial_tip_positions = self.agent_ee_tip.get_position()
        self.initial_target_positions = self.target.get_position()

    def _get_state(self):
        # Return state containing arm joint angles/velocities & target position
        return np.array(self.agent.get_joint_positions() +
                        self.agent.get_joint_velocities() +
                        self.target.get_position())

    def _is_holding(self):
        '''
         Return is holding the target or not, return bool.
        '''

        # Nothe that the collision check is not always accurate all the time,
        # for continuous conllision, maybe only the first 4-5 frames of collision can be detected
        pad_collide_object = self.gripper_left_pad.check_collision(self.target)
        if pad_collide_object and self.proximity_sensor.is_detected(
                self.target) == True:
            return True
        else:
            return False

    def _get_visual_state(self):
        # Return a numpy array of size (width, height, 3)
        return self.vision_sensor.capture_rgb(
        )  # A numpy array of size (width, height, 3)

    def _is_holding(self):
        # Return is holding the target or not, return bool

        # Nothe that the collision check is not always accurate all the time,
        # for continuous conllision, maybe only the first 4-5 frames of collision can be detected
        pad_collide_object = self.gripper_left_pad.check_collision(self.target)
        if pad_collide_object and self.proximity_sensor.is_detected(
                self.target) == True:
            return True
        else:
            return False

    # def _move(self, action):
    #     '''
    #     Move the tip according to the action with inverse kinematics for 'end_position' control;
    #     with control of tip target in inverse kinematics mode instead of using .solve_ik() in forward kinematics mode.
    #     '''
    #     robot_moving_unit=0.01  # the amount of single step move of robot, not accurate
    #     moving_loop_itr=int(np.sum(np.abs(action[:3]))/robot_moving_unit)+1  # adaptive number of moving steps, with minimal of 1 step.
    #     # print(moving_loop_itr)
    #     small_step = list(1./moving_loop_itr*np.array(action))  # break the action into small steps, as the robot cannot move to the target position within one frame
    #     pos=self.agent_ee_tip.get_position()

    #     '''
    #     there is a mismatch between the object set_orientation() and get_orientation():
    #     the (x,y,z) in set_orientation() will be (y,x,-z) in get_orientation().
    #     '''
    #     ori_z=-self.agent_ee_tip.get_orientation()[2] # the minus is because the mismatch between the set and get
    #     assert len(small_step) == len(pos)+1  # 3 values for position, 1 value for rotation

    #     for _ in range(moving_loop_itr):
    #         for idx in range(len(pos)):
    #             pos[idx] += small_step[idx]
    #         self.tip_target.set_position(pos)
    #         self.pr.step()
    #         ''' deprecated! no need to use small steps for the rotation with reset_dynamics=True'''
    #         # ori_z+=small_step[3]  # change the orientation along z-axis with a small step
    #         # self.tip_target.set_orientation([0,3.1415,ori_z], reset_dynamics=True)  # make gripper face downwards
    #         # self.pr.step()
    #     ''' one big step for z-rotation is enough, with reset_dynamics=True, set the rotation instantaneously '''
    #     ori_z+=action[3]
    #     self.tip_target.set_orientation([0,3.1415,ori_z], reset_dynamics=True)  # make gripper face downwards
    #     self.pr.step()

    def _move(self, action):
        ''' 
        Move the tip according to the action with inverse kinematics for 'end_position' control;
        with control of tip target in inverse kinematics mode instead of using .solve_ik() in forward kinematics mode.
        Mode 2: a close-loop control, using ik.
        '''
        pos = self.gripper.get_position()
        bounding_offset = 0.1
        step_factor = 0.2  # small step factor mulitplied on the gradient step calculated by inverse kinematics
        max_itr = 20  # maximum moving iterations
        max_error = 0.1  # upper bound of distance error for movement at each call
        rotation_norm = 5.  # factor for normalization of rotation values
        # check if state+action will be within of the bounding box, if so, move normally; else no action.
        #  x_min < x < x_max  and  y_min < y < y_max  and  z > z_min
        if pos[0]+action[0]>POS_MIN[0]-bounding_offset and pos[0]+action[0]<POS_MAX[0]+bounding_offset  \
            and pos[1]+action[1] > POS_MIN[1]-bounding_offset and pos[1]+action[1] < POS_MAX[1]+bounding_offset  \
            and pos[2]+action[2] > POS_MIN[2]-bounding_offset:
            ''' 
            there is a mismatch between the object set_orientation() and get_orientation():
            the (x,y,z) in set_orientation() will be (y,x,-z) in get_orientation().
            '''
            ori_z = -self.agent_ee_tip.get_orientation()[
                2]  # the minus is because the mismatch between the set and get
            target_pos = np.array(self.agent_ee_tip.get_position()) + np.array(
                action[:3])
            diff = 1
            itr = 0
            # print('before: ', ori_z)
            while np.sum(np.abs(diff)) > max_error and itr < max_itr:
                itr += 1
                # set pos in small step
                cur_pos = self.agent_ee_tip.get_position()
                diff = target_pos - cur_pos
                pos = cur_pos + step_factor * diff
                self.tip_target.set_position(pos.tolist())
                self.pr.step()
            ''' one big step for z-rotation is enough '''
            ori_z += rotation_norm * action[3]
            self.tip_target.set_orientation([0, np.pi, ori_z
                                             ])  # make gripper face downwards
            self.pr.step()

        else:
            # print("Potential Movement Out of the Bounding Box!")
            pass  # no action if potentially out of the bounding box

    def reset(self):
        # Get a random position within a cuboid and set the target position
        max_itr = 10
        pos = list(np.random.uniform(POS_MIN, POS_MAX))
        self.target.set_position(pos)
        self.target.set_orientation([0, 0, 0])
        self.pr.step()
        # changing the color or texture for domain randomization
        self.target.set_color(
            np.random.uniform(low=0, high=1, size=3).tolist()
        )  # set [r,g,b] 3 channel values of object color
        # set end position to be initialized
        if self.control_mode == 'end_position':  # JointMode.IK
            self.agent.set_control_loop_enabled(True)
            self.tip_target.set_position(
                self.initial_tip_positions
            )  # cannot set joint positions directly due to in ik mode nor force/torch mode
            self.pr.step()
            # prevent stuck case
            itr = 0
            while np.sum(
                    np.abs(
                        np.array(self.agent_ee_tip.get_position() -
                                 np.array(self.initial_tip_positions)))
            ) > 0.1 and itr < max_itr:
                itr += 1
                self.step(np.random.uniform(
                    -0.2, 0.2,
                    4))  # take random actions for preventing the stuck cases
                self.pr.step()
        elif self.control_mode == 'joint_velocity':  # JointMode.FORCE
            self.agent.set_joint_positions(
                self.initial_joint_positions
            )  # sometimes the gripper is stuck, cannot get back to initial
            self.pr.step()

        # self.table.set_collidable(True)
        self.gripper_left_pad.set_collidable(
            True
        )  # set the pad on the gripper to be collidable, so as to check collision
        self.target.set_collidable(True)
        if np.sum(self.gripper.get_open_amount()) < 1.5:
            self.gripper.actuate(1, velocity=0.5)  # open the gripper
            self.pr.step()
        if self.visual_control:
            return self._get_visual_state()
        else:
            return self._get_state()

    def step(self, action):
        '''
        Move the robot arm according to the action.
        If control_mode=='joint_velocity', action is 7 dim of joint velocity values + 1 dim of gripper rotation.
        if control_mode=='end_position', action is 3 dim of tip (end of robot arm) position values + 1 dim of gripper rotation.
        '''
        if self.control_mode == 'end_position':
            if action is None or action.shape[0] != 4:
                action = list(np.random.uniform(-0.1, 0.1, 4))  # random
            self._move(action)
        elif self.control_mode == 'joint_velocity':
            if action is None or action.shape[0] != 7:
                print('No actions or wrong action dimensions!')
                action = list(np.random.uniform(-0.1, 0.1, 7))  # random
            self.agent.set_joint_target_velocities(
                action)  # Execute action on arm
            self.pr.step()
        else:
            raise NotImplementedError

        ax, ay, az = self.gripper.get_position()
        tx, ty, tz = self.target.get_position()
        # Reward is negative distance to target
        distance = (ax - tx)**2 + (ay - ty)**2 + (az - tz)**2
        done = False

        # print(self.proximity_sensor.is_detected(self.target))
        current_vision = self.vision_sensor.capture_rgb(
        )  # capture a screenshot of the view with vision sensor
        plt.imshow(current_vision)
        plt.savefig('./img/vision.png')

        reward = 0
        # close the gripper if close enough to the object and the object is detected with the proximity sensor
        if distance < 0.1 and self.proximity_sensor.is_detected(
                self.target) == True:
            # make sure the gripper is open before grasping
            self.gripper.actuate(1, velocity=0.5)
            self.pr.step()

            self.gripper.actuate(
                0, velocity=0.5
            )  # if done, close the hand, 0 for close and 1 for open.
            self.pr.step()  # Step the physics simulation

            if self._is_holding():
                # reward for hold here!
                reward += 10
                done = True

            else:
                self.gripper.actuate(1, velocity=0.5)
                self.pr.step()

        elif np.sum(
                self.gripper.get_open_amount()
        ) < 1.5:  # if gripper is closed due to collision or esle, open it; .get_open_amount() return list of gripper joint values
            self.gripper.actuate(1, velocity=0.5)
            self.pr.step()

        else:
            pass

        reward -= np.sqrt(distance)

        if tz < self.initial_target_positions[
                2] - self.fall_down_offset:  # the object fall off the table
            done = True
            reward = -self.reward_offset

        if self.visual_control:
            return self._get_visual_state(), reward, done, {}
        else:
            return self._get_state(), reward, done, {}

    def shutdown(self):
        self.pr.stop()
        self.pr.shutdown()
Esempio n. 8
0
class EmptyContainer(Task):
    def init_task(self) -> None:
        self.large_container = Shape('large_container')
        self.target_container0 = Shape('small_container0')
        self.target_container1 = Shape('small_container1')
        self.success_detector0 = ProximitySensor('success0')
        self.success_detector1 = ProximitySensor('success1')
        self.target_waypoint = Dummy('waypoint3')
        self.spawn_boundary = SpawnBoundary([Shape('spawn_boundary')])
        self.register_waypoint_ability_start(1, self._move_above_object)
        self.register_waypoints_should_repeat(self._repeat)
        self.bin_objects = []

    def init_episode(self, index: int) -> List[str]:
        self._variation_index = index
        self.bin_objects = sample_procedural_objects(self.get_base(), 3)
        self.bin_objects_not_done = list(self.bin_objects)
        self.register_graspable_objects(self.bin_objects)
        self.spawn_boundary.clear()
        for ob in self.bin_objects:
            ob.set_position([0.0, 0.0, 0.2],
                            relative_to=self.large_container,
                            reset_dynamics=False)
            self.spawn_boundary.sample(ob,
                                       ignore_collisions=True,
                                       min_distance=0.05)
        target_pos = [-5.9605e-8, -2.5005e-1, +1.7e-1]
        conditions = []
        target_color_name, target_color_rgb = colors[index]
        color_choice = np.random.choice(list(range(index)) +
                                        list(range(index + 1, len(colors))),
                                        size=1,
                                        replace=False)[0]
        _, distractor_color_rgb = colors[color_choice]
        if index % 2 == 0:
            self.target_container0.set_color(target_color_rgb)
            self.target_container1.set_color(distractor_color_rgb)
            for ob in self.bin_objects:
                conditions.append(DetectedCondition(ob,
                                                    self.success_detector0))
        else:
            self.target_container1.set_color(target_color_rgb)
            self.target_container0.set_color(distractor_color_rgb)
            for ob in self.bin_objects:
                conditions.append(DetectedCondition(ob,
                                                    self.success_detector1))
            target_pos[1] = -target_pos[1]
        self.target_waypoint.set_position(target_pos,
                                          relative_to=self.large_container,
                                          reset_dynamics=True)
        self.register_success_conditions(
            [ConditionSet(conditions, simultaneously_met=True)])

        return [
            'empty the container in the to %s container' % target_color_name,
            'clear all items from the large tray and put them in the %s '
            'tray' % target_color_name,
            'move all objects from the large container and drop them into '
            'the smaller %s one' % target_color_name,
            'remove whatever you find in the big box in the middle and '
            'leave them in the %s one' % target_color_name,
            'grasp and move all objects into the %s container' %
            target_color_name
        ]

    def variation_count(self) -> int:
        return len(colors)

    def cleanup(self) -> None:
        [ob.remove() for ob in self.bin_objects if ob.still_exists()]
        self.bin_objects = []

    def step(self) -> None:
        for ob in self.bin_objects_not_done:
            if self._variation_index % 2 == 0:
                if self.success_detector0.is_detected(ob):
                    self.bin_objects_not_done.remove(ob)
            else:
                if self.success_detector1.is_detected(ob):
                    self.bin_objects_not_done.remove(ob)

    def _move_above_object(self, waypoint):
        if len(self.bin_objects_not_done) <= 0:
            raise RuntimeError('Should not be here.')
        bin_obj = self.bin_objects_not_done[0]
        way_obj = waypoint.get_waypoint_object()
        way_obj.set_position(bin_obj.get_position())
        x, y, _ = way_obj.get_orientation()
        _, _, z = bin_obj.get_orientation(relative_to=way_obj)
        way_obj.set_orientation([x, y, z])

    def _repeat(self):
        return len(self.bin_objects_not_done) > 0
Esempio n. 9
0
class GraspEnv(object):
    ''' Sawyer robot grasping a cuboid '''
    def __init__(self, headless, control_mode='joint_velocity'):
        '''
        parameters:
        :headless: bool, if True, no visualization, else with visualization.
        :control mode: str, 'end_position' or 'joint_velocity'.
        '''
        # set public variables
        self.headless = headless  # if headless is True, no visualization
        self.reward_offset = 10.0  # reward of achieving the grasping object
        self.reward_range = self.reward_offset  # reward range for register gym env when using vectorized env wrapper
        self.penalty_offset = 1.  # penalty value for undesired cases
        self.fall_down_offset = 0.1  # distance for judging the target object fall off the table
        self.metadata = []  # gym env argument
        self.control_mode = control_mode  # the control mode of robotic arm: 'end_position' or 'joint_velocity'

        # launch and set up the scene, and set the proxy variables in represent of the counterparts in the scene
        self.pr = PyRep()  # call the PyRep
        if control_mode == 'end_position':  # need to use different scene, the one with all joints in inverse kinematics mode
            SCENE_FILE = join(
                dirname(abspath(__file__)),
                './scenes/sawyer_reacher_rl_new_ik.ttt'
            )  # scene with joints controlled by ik (inverse kinematics)
        elif control_mode == 'joint_velocity':  # the scene with all joints in force/torch mode for forward kinematics
            SCENE_FILE = join(
                dirname(abspath(__file__)),
                './scenes/sawyer_reacher_rl_new.ttt'
            )  # scene with joints controlled by forward kinematics
        self.pr.launch(SCENE_FILE, headless=headless
                       )  # lunch the scene, headless means no visualization
        self.pr.start()  # start the scene
        self.agent = Sawyer()  # get the robot arm in the scene
        self.gripper = BaxterGripper()  # get the gripper in the scene
        self.gripper_left_pad = Shape(
            'BaxterGripper_leftPad')  # the left pad on the gripper finger
        self.proximity_sensor = ProximitySensor(
            'BaxterGripper_attachProxSensor'
        )  # need the name of the sensor here
        self.vision_sensor = VisionSensor(
            'Vision_sensor')  # need the name of the sensor here
        self.table = Shape(
            'diningTable')  # the table in the scene for checking collision
        if control_mode == 'end_position':  # control the robot arm by the position of its end using inverse kinematics
            self.agent.set_control_loop_enabled(
                True)  # if false, inverse kinematics won't work
            self.action_space = np.zeros(
                4)  # 3 DOF end position control + 1 rotation of gripper
        elif control_mode == 'joint_velocity':  # control the robot arm by directly setting velocity values on each joint, using forward kinematics
            self.agent.set_control_loop_enabled(False)
            self.action_space = np.zeros(
                7
            )  # 7 DOF velocity control, no need for extra control of end rotation, the 7th joint controls it.
        else:
            raise NotImplementedError
        self.observation_space = np.zeros(
            17)  # position and velocity of 7 joints + position of the target
        self.agent.set_motor_locked_at_zero_velocity(True)
        self.target = Shape('target')  # get the target object
        self.agent_ee_tip = self.agent.get_tip(
        )  # a part of robot as the end of inverse kinematics chain for controlling
        self.tip_target = Dummy(
            'Sawyer_target'
        )  # the target point of the tip (end of the robot arm) to move towards
        self.tip_pos = self.agent_ee_tip.get_position()  # tip x,y,z position

        # set a proper initial robot gesture or tip position
        if control_mode == 'end_position':
            initial_pos = [0.3, 0.1, 0.9]
            self.tip_target.set_position(initial_pos)
            # one big step for rotation setting is enough, with reset_dynamics=True, set the rotation instantaneously
            self.tip_target.set_orientation(
                [0, np.pi, np.pi / 2], reset_dynamics=True
            )  # first two dimensions along x and y axis make gripper face downwards
        elif control_mode == 'joint_velocity':
            self.initial_joint_positions = [0.001815199851989746, -1.4224984645843506, \
                0.704303503036499, 2.54307222366333, 2.972468852996826, -0.4989511966705322, 4.105560302734375] # a proper initial gesture
            self.agent.set_joint_positions(self.initial_joint_positions)
        self.pr.step()
        self.initial_tip_positions = self.agent_ee_tip.get_position()
        self.initial_target_positions = self.target.get_position()

    def _get_state(self):
        '''
        Return state containing arm joint positions/velocities & target position.
        '''
        return np.array(self.agent.get_joint_positions() +  # list, dim=7
                        self.agent.get_joint_velocities() +  # list, dim=7
                        self.target.get_position())  # list, dim=3

    def _is_holding(self):
        '''
         Return the state of holding the target or not, return bool.
        '''
        # Note that the collision check is not always accurate all the time,
        # for continuous collision frames, maybe only the first 4-5 frames of collision can be detected.
        pad_collide_object = self.gripper_left_pad.check_collision(self.target)
        if pad_collide_object and self.proximity_sensor.is_detected(
                self.target) == True:
            return True
        else:
            return False

    def _move(self,
              action,
              bounding_offset=0.15,
              step_factor=0.2,
              max_itr=20,
              max_error=0.05,
              rotation_norm=5.):
        ''' 
        Move the end effector on robot arm according to the action with inverse kinematics for 'end_position' control mode;
        Inverse kinematics mode control is achieved through setting the tip target instead of using .solve_ik(), 
        because sometimes the .solve_ik() does not function correctly.
        Mode: a close-loop proportional control, using ik.

        parameters:
        :bounding_offset: offset of bounding box outside the valid target position range, as valid and safe range of action
        :step_factor: small step factor mulitplied on the difference of current and desired position, i.e. proportional factor
        :max_itr: maximum moving iterations
        :max_error: upper bound of distance error for movement at each call
        :rotation_norm: factor for normalization of rotation values, since the action are of the same scale for each dimension
        '''
        pos = self.gripper.get_position()

        # check if state+action will be within of the bounding box, if so, move normally; otherwise the action is not conducted.
        #  i.e. x_min < x < x_max  and  y_min < y < y_max  and  z > z_min
        if pos[0]+action[0]>POS_MIN[0]-bounding_offset and pos[0]+action[0]<POS_MAX[0]+bounding_offset  \
            and pos[1]+action[1] > POS_MIN[1]-bounding_offset and pos[1]+action[1] < POS_MAX[1]+2*bounding_offset  \
            and pos[2]+action[2] > POS_MIN[2]-2*bounding_offset:  # larger offset in z axis

            # there is a mismatch between the object set_orientation() and get_orientation():
            # the (x,y,z) in set_orientation() will be (y,x,-z) in get_orientation().
            ori_z = -self.agent_ee_tip.get_orientation(
            )[2]  # the minus is because the mismatch between the set_orientation() and get_orientation()
            target_pos = np.array(self.agent_ee_tip.get_position()) + np.array(
                action[:3])
            diff = 1  # intialization
            itr = 0
            while np.sum(np.abs(diff)) > max_error and itr < max_itr:
                itr += 1
                # set pos in small step
                cur_pos = self.agent_ee_tip.get_position()
                diff = target_pos - cur_pos  # difference of current and target position, close-loop control
                pos = cur_pos + step_factor * diff  # step small step according to current difference, to prevent that ik cannot be solved
                self.tip_target.set_position(pos.tolist())
                self.pr.step(
                )  # every time when setting target tip, need to call simulation step to achieve it

            # one big step for z-rotation is enough, but small error still exists due to the ik solver
            ori_z += rotation_norm * action[
                3]  # normalize the rotation values, as usually same action range is used in policy for both rotation and position
            self.tip_target.set_orientation([
                0, np.pi, ori_z
            ])  # make gripper face downwards and rotate ori_z along z axis
            self.pr.step()

        else:
            print("Potential Movement Out of the Bounding Box!")
            pass  # no action if potentially moving out of the bounding box

    def reinit(self):
        '''
        Reinitialize the environment, e.g. when the gripper is broken during exploration.
        '''
        self.shutdown()  # shutdown the original env first
        self.__init__(self.headless)  # initialize with the same headless mode

    def reset(self, random_target=False):
        '''
        Get a random position within a cuboid and set the target position.
        '''
        # set target object
        if random_target:  # randomize
            pos = list(np.random.uniform(
                POS_MIN, POS_MAX))  # sample from uniform in valid range
            self.target.set_position(pos)  # random position
        else:  # non-randomize
            self.target.set_position(
                self.initial_target_positions)  # fixed position
        self.target.set_orientation([0, 0, 0])
        self.pr.step()

        # set end position to be initialized
        if self.control_mode == 'end_position':  # JointMode.IK
            self.agent.set_control_loop_enabled(True)  # ik mode
            self.tip_target.set_position(
                self.initial_tip_positions
            )  # cannot set joint positions directly due to in ik mode or force/torch mode
            self.pr.step()
            # prevent stuck cases. as using ik for moving, stucking can make ik cannot be solved therefore not reset correctly, therefore taking
            # some random action when desired position is not reached.
            itr = 0
            max_itr = 10
            while np.sum(
                    np.abs(
                        np.array(self.agent_ee_tip.get_position() -
                                 np.array(self.initial_tip_positions)))
            ) > 0.1 and itr < max_itr:
                itr += 1
                self.step(np.random.uniform(
                    -0.2, 0.2,
                    4))  # take random actions for preventing the stuck cases
                self.pr.step()

        elif self.control_mode == 'joint_velocity':  # JointMode.FORCE
            self.agent.set_joint_positions(self.initial_joint_positions)
            self.pr.step()

        # set collidable, for collision detection
        self.gripper_left_pad.set_collidable(
            True
        )  # set the pad on the gripper to be collidable, so as to check collision
        self.target.set_collidable(True)
        # open the gripper if it's not fully open
        if np.sum(self.gripper.get_open_amount()) < 1.5:
            self.gripper.actuate(1, velocity=0.5)
            self.pr.step()

        return self._get_state()  # return current state of the environment

    def step(self, action):
        '''
        Move the robot arm according to the action.
        If control_mode=='joint_velocity', action is 7 dim of joint velocity values + 1 dim rotation of gripper;
        if control_mode=='end_position', action is 3 dim of tip (end of robot arm) position values + 1 dim rotation of gripper;
        '''
        # initialization
        done = False  # episode finishes
        reward = 0
        hold_flag = False  # holding the object or not
        if self.control_mode == 'end_position':
            if action is None or action.shape[
                    0] != 4:  # check if action is valid
                print('No actions or wrong action dimensions!')
                action = list(np.random.uniform(-0.1, 0.1, 4))  # random
            self._move(action)

        elif self.control_mode == 'joint_velocity':
            if action is None or action.shape[
                    0] != 7:  # check if action is valid
                print('No actions or wrong action dimensions!')
                action = list(np.random.uniform(-0.1, 0.1, 7))  # random
            self.agent.set_joint_target_velocities(
                action)  # Execute action on arm
            self.pr.step()

        else:
            raise NotImplementedError

        ax, ay, az = self.gripper.get_position()
        if math.isnan(
                ax):  # capture the broken gripper cases during exploration
            print('Gripper position is nan.')
            self.reinit()
            done = True
        tx, ty, tz = self.target.get_position()
        offset = 0.08  # augmented reward: offset of target position above the target object
        sqr_distance = (ax - tx)**2 + (ay - ty)**2 + (
            az - (tz + offset)
        )**2  # squared distance between the gripper and the target object
        ''' for visual-based control only, large time consumption! '''
        # current_vision = self.vision_sensor.capture_rgb()  # capture a screenshot of the view with vision sensor
        # plt.imshow(current_vision)
        # plt.savefig('./img/vision.png')

        # close the gripper if close enough to the object and the object is detected with the proximity sensor
        if sqr_distance < 0.1 and self.proximity_sensor.is_detected(
                self.target) == True:
            # make sure the gripper is open before grasping
            self.gripper.actuate(1, velocity=0.5)
            self.pr.step()
            self.gripper.actuate(
                0, velocity=0.5
            )  # if done, close the hand, 0 for close and 1 for open; velocity 0.5 ensures the gripper to close with in one frame
            self.pr.step()  # Step the physics simulation

            if self._is_holding():
                reward += self.reward_offset  # extra reward for grasping the object
                done = True
                hold_flag = True
            else:
                self.gripper.actuate(1, velocity=0.5)
                self.pr.step()
        elif np.sum(
                self.gripper.get_open_amount()
        ) < 1.5:  # if gripper is closed (not fully open) due to collision or esle, open it; get_open_amount() return list of gripper joint values
            self.gripper.actuate(1, velocity=0.5)
            self.pr.step()
        else:
            pass

        # the base reward is negative distance to target
        reward -= np.sqrt(sqr_distance)

        # case when the object fall off the table
        if tz < self.initial_target_positions[2] - self.fall_down_offset:
            done = True
            reward = -self.reward_offset

        # Augmented reward for orientation: better grasping gesture if the gripper has vertical orientation to the target object.
        # Note: the frame of gripper has a difference of pi/2 in z orientation as the frame of target.
        desired_orientation = np.concatenate(
            ([np.pi,
              0], [self.target.get_orientation()[2]
                   ]))  # gripper vertical to target in z and facing downwards,
        rotation_penalty = -np.sum(
            np.abs(
                np.array(self.agent_ee_tip.get_orientation()) -
                desired_orientation))
        rotation_norm = 0.02
        reward += rotation_norm * rotation_penalty

        # Penalty for collision with the table
        if self.gripper_left_pad.check_collision(self.table):
            reward -= self.penalty_offset
            #print('Penalize collision with table.')

        if math.isnan(reward):  # capture the cases of numerical problem
            reward = 0.

        return self._get_state(), reward, done, {'finished': hold_flag}

    def shutdown(self):
        ''' Close the simulator '''
        self.pr.stop()
        self.pr.shutdown()
Esempio n. 10
0
class ReacherEnv(object):
    def __init__(self, headless, control_mode='joint_velocity'):
        '''
        :headless: bool, if True, no visualization, else with visualization.
        :control mode: str, 'end_position' or 'joint_velocity'.
        '''
        self.headless = headless
        self.reward_offset = 10.0  # reward of achieving the grasping object
        self.reward_range = self.reward_offset  # reward range for register gym env when using vectorized env wrapper
        self.fall_down_offset = 0.1  # for judging the target object fall off the table
        self.metadata = []  # gym env format
        self.control_mode = control_mode  # the control mode of robotic arm: 'end_position' or 'joint_velocity'
        self.pr = PyRep()
        if control_mode == 'end_position':  # need to use different scene, the one with all joints in inverse kinematics mode
            SCENE_FILE = join(
                dirname(abspath(__file__)),
                './scenes/sawyer_reacher_rl_new_ik.ttt'
            )  # scene with joints controlled by ik (inverse kinematics)
        elif control_mode == 'joint_velocity':  # the scene with all joints in force/torch mode for forward kinematics
            SCENE_FILE = join(
                dirname(abspath(__file__)),
                './scenes/sawyer_reacher_rl_new.ttt'
            )  # scene with joints controlled by forward kinematics
        self.pr.launch(SCENE_FILE, headless=headless
                       )  # lunch the scene, headless means no visualization
        self.pr.start()  # start the scene
        self.agent = Sawyer()  # get the robot arm in the scene
        self.gripper = BaxterGripper()  # get the gripper in the scene
        self.gripper_left_pad = Shape(
            'BaxterGripper_leftPad')  # the pad on the gripper finger
        self.proximity_sensor = ProximitySensor(
            'BaxterGripper_attachProxSensor'
        )  # need the name of the sensor here
        self.vision_sensor = VisionSensor(
            'Vision_sensor')  # need the name of the sensor here
        self.initial_joint_positions = [
            0.162, -1.109, 0.259, 1.866, 2.949, -0.811, 4.440
        ]  # a good staring gesture of robot, avoid stucking and colliding
        self.agent.set_joint_positions(self.initial_joint_positions)
        if control_mode == 'end_position':  # control the robot arm by the position of its end using inverse kinematics
            self.agent.set_control_loop_enabled(
                True)  # if false, ik won't work
            self.action_space = np.zeros(
                4)  # 3 DOF end position control + 1 rotation of gripper
        elif control_mode == 'joint_velocity':  # control the robot arm by directly setting velocity values on each joint, forward kinematics
            self.agent.set_control_loop_enabled(False)
            self.action_space = np.zeros(
                7
            )  # 7 DOF velocity control, no need for extra control of end rotation, the 7th joint controls it.
        else:
            raise NotImplementedError
        self.observation_space = np.zeros(
            17)  # position and velocity of 7 joints + position of the target
        self.agent.set_motor_locked_at_zero_velocity(True)
        self.target = Shape('target')  # get the target object
        self.agent_ee_tip = self.agent.get_tip(
        )  # a part of robot as the end of inverse kinematics chain for controlling
        self.tip_target = Dummy(
            'Sawyer_target'
        )  # the target point of the tip (end of the robot arm) to move towards
        self.tip_pos = self.agent_ee_tip.get_position()  # tip x,y,z position
        self.tip_quat = self.agent_ee_tip.get_quaternion(
        )  # tip rotation as quaternion, if not control the rotation
        # reference for reset
        self.initial_tip_positions = [0.3, 0.1, 1.]
        self.initial_z_rotation = np.pi / 2.
        self.initial_target_positions = self.target.get_position()

    def _get_state(self):
        '''
         Return state containing arm joint angles/velocities & target position.
         '''
        return np.array(self.agent.get_joint_positions() +  # list
                        self.agent.get_joint_velocities() +  # list
                        self.target.get_position())  # list

    def _is_holding(self):
        '''
         Return is holding the target or not, return bool.
        '''

        # Note that the collision check is not always accurate all the time,
        # for continuous conllision, maybe only the first 4-5 frames of collision can be detected
        pad_collide_object = self.gripper_left_pad.check_collision(self.target)
        if pad_collide_object and self.proximity_sensor.is_detected(
                self.target) == True:
            return True
        else:
            return False

    # def _move(self, action):
    #     ''' Mode 0: as given by original examples in Pyrep repo '''
    #     # Deprecated, always fail the ik! Move the tip according to the action with inverse kinematics for 'end_position' control.
    #     action=list(action)
    #     pos=self.agent_ee_tip.get_position()
    #     assert len(action) == len(pos)
    #     for idx in range(len(pos)):
    #         pos[idx] += action[idx]
    #     print('pos: ', pos)
    #     new_joint_angles = self.agent.solve_ik(pos, quaternion=self.tip_quat)
    #     self.agent.set_joint_target_positions(new_joint_angles)

    # def _move(self, action):
    #     '''
    #     Deprecated!
    #     Move the tip according to the action with inverse kinematics for 'end_position' control;
    #     with control of tip target in inverse kinematics mode instead of using .solve_ik() in forward kinematics mode.
    #     Mode 1: an open-loop control, using ik; not as accurate as the closed-loop control.
    #     '''
    #     pos=self.gripper.get_position()
    #     bounding_offset=0.1
    #     robot_moving_unit=0.01  # the amount of single step move of robot, not accurate; the smaller the value, the smoother the movement.
    #     # check if state+action will be within of the bounding box, if so, move normally; else no action.
    #     #  x_min < x < x_max  and  y_min < y < y_max  and  z > z_min
    #     if pos[0]+action[0]>POS_MIN[0]-bounding_offset and pos[0]+action[0]<POS_MAX[0]+bounding_offset  \
    #         and pos[1]+action[1] > POS_MIN[1]-bounding_offset and pos[1]+action[1] < POS_MAX[1]+bounding_offset  \
    #         and pos[2]+action[2] > POS_MIN[2]-bounding_offset:

    #         moving_loop_itr=int(np.sum(np.abs(action[:3]))/robot_moving_unit)+1  # adaptive number of moving steps, with minimal of 1 step; the larger it is, the more accurate for each movement.
    #         small_step = list(1./moving_loop_itr*np.array(action))  # break the action into small steps, as the robot cannot move to the target position within one frame

    #         '''
    #         there is a mismatch between the object set_orientation() and get_orientation():
    #         the (x,y,z) in set_orientation() will be (y,x,-z) in get_orientation().
    #         '''
    #         ori_z=-self.agent_ee_tip.get_orientation()[2] # the minus is because the mismatch between the set and get
    #         assert len(small_step) == len(pos)+1  # 3 values for position, 1 value for rotation

    #         for _ in range(moving_loop_itr):
    #             for idx in range(len(pos)):
    #                 pos[idx] += small_step[idx]
    #             self.tip_target.set_position(pos)
    #             self.pr.step()
    #             ''' deprecated! no need to use small steps for the rotation with reset_dynamics=True'''
    #             ori_z+=small_step[3]  # change the orientation along z-axis with a small step
    #             self.tip_target.set_orientation([0, np.pi, ori_z], reset_dynamics=True)  # make gripper face downwards
    #             self.pr.step()
    #         ''' one big step for z-rotation is enough, with reset_dynamics=True, set the rotation instantaneously '''
    #         # ori_z+=action[3]
    #         # self.tip_target.set_orientation([0, np.pi, ori_z], reset_dynamics=True)  # make gripper face downwards
    #         # self.pr.step()

    #     else:
    #         print("Potential Movement Out of the Bounding Box!")
    #         pass # no action if potentially out of the bounding box

    def _move(self,
              action,
              bounding_offset=0.15,
              step_factor=0.4,
              max_itr=40,
              max_error=0.01,
              rotation_norm=5.):
        ''' 
        Move the tip according to the action with inverse kinematics for 'end_position' control;
        with control of tip target in inverse kinematics mode instead of using .solve_ik() in forward kinematics mode.
        Mode 2: a close-loop control, using ik.

        parameters:
        :bounding_offset: offset of bounding box and valid target position range, bounding box is larger
        :step_factor: small step factor mulitplied on the gradient step calculated by inverse kinematics
        :max_itr=20: maximum moving iterations
        :max_error: upper bound of distance error for movement at each call
        :rotation_norm: factor for normalization of rotation values
        '''
        pos = self.gripper.get_position()

        # check if state+action will be within of the bounding box, if so, move normally; else action is not conducted.
        #  i.e. x_min < x < x_max  and  y_min < y < y_max  and  z > z_min
        if pos[0]+action[0]>POS_MIN[0]-bounding_offset and pos[0]+action[0]<POS_MAX[0]+bounding_offset  \
            and pos[1]+action[1] > POS_MIN[1]-bounding_offset and pos[1]+action[1] < POS_MAX[1]+2*bounding_offset  \
            and pos[2]+action[2] > POS_MIN[2]-2*bounding_offset:  # larger offset in z axis
            ''' 
            there is a mismatch between the object set_orientation() and get_orientation():
            the (x,y,z) in set_orientation() will be (y,x,-z) in get_orientation().
            '''
            ori_z = -self.agent_ee_tip.get_orientation()[
                2]  # the minus is because the mismatch between the set and get
            target_pos = np.array(self.agent_ee_tip.get_position()) + np.array(
                action[:3])
            diff = 1
            itr = 0
            while np.sum(np.abs(diff)) > max_error and itr < max_itr:
                # print(action[:3], np.sum(np.abs(diff)))
                itr += 1
                # set pos in small step
                cur_pos = self.agent_ee_tip.get_position()
                diff = target_pos - cur_pos  # difference of current and target position, close-loop control
                pos = cur_pos + step_factor * diff  # step small step according to current difference, to prevent that ik cannot be solved
                self.tip_target.set_position(pos.tolist())
                self.pr.step(
                )  # every time set target tip, need to call simulation step to achieve it
            ''' one big step for z-rotation is enough, but still small error exists '''
            ori_z += rotation_norm * action[
                3]  # normalize the rotation values, as usually same action range is used in policy for both rotation and position
            self.tip_target.set_orientation(
                [0, 3.1415,
                 ori_z])  # make gripper face downwards and rotate ori_z
            self.pr.step()
            # print('after: ', ori_z, -self.agent_ee_tip.get_orientation()[2])

        else:
            print("Potential Movement Out of the Bounding Box!")
            pass  # no action if potentially moving out of the bounding box

    def reset(self, random_target=False):
        '''
        Reset with the _move() function.
        Get a random position within a cuboid and set the target position.
        '''
        if random_target:  # randomly set the target position
            pos = list(np.random.uniform(
                POS_MIN, POS_MAX))  # sample from uniform in valid range
            self.target.set_position(pos)  # random position
        else:
            self.target.set_position(
                self.initial_target_positions)  # fixed position
        self.target.set_orientation([0, 0, 0])
        self.pr.step()

        # set end position to be initialized
        if self.control_mode == 'end_position':  # JointMode.IK
            curr_pos = self.agent_ee_tip.get_position()
            a = np.array(self.initial_tip_positions) - np.array(curr_pos)
            self._move(np.concatenate((a, [self.initial_z_rotation])),
                       max_itr=80)

            # prevent stuck cases, as using ik for moving, stucking can make ik cannot be solved therefore not reset correctly
            itr = 0
            max_itr = 10
            while np.sum(
                    np.abs(
                        np.array(self.agent_ee_tip.get_position() -
                                 np.array(self.initial_tip_positions)))
            ) > 0.1 and itr < max_itr:
                itr += 1
                self.step(np.random.uniform(
                    -0.2, 0.2,
                    4))  # take random actions for preventing the stuck cases
                self.pr.step()
        elif self.control_mode == 'joint_velocity':  # JointMode.FORCE
            self.agent.set_joint_positions(
                self.initial_joint_positions
            )  # sometimes the gripper is stuck, cannot get back to initial
            self.pr.step()

        # set collidable, for collision detection
        self.gripper_left_pad.set_collidable(
            True
        )  # set the pad on the gripper to be collidable, so as to check collision
        self.target.set_collidable(True)

        # open the gripper if it's not fully open
        if np.sum(self.gripper.get_open_amount()) < 1.5:
            self.gripper.actuate(1, velocity=0.5)
            self.pr.step()

        return self._get_state()  # return current state of the environment

    # def reset(self, random_target=False):
    #     '''
    #     Deprecated!
    #     Get a random position within a cuboid and set the target position.
    #     '''
    #     print('reset')
    #     if random_target:  # randomly set the target position
    #         pos = list(np.random.uniform(POS_MIN, POS_MAX))  # sample from uniform in valid range
    #         self.target.set_position(pos)  # random position
    #     else:
    #         self.target.set_position(self.initial_target_positions) # fixed position
    #     self.target.set_orientation([0,0,0])

    #     # set end position to be initialized
    #     if self.control_mode == 'end_position':  # JointMode.IK
    #         self.agent.set_control_loop_enabled(True)
    #         self.tip_target.set_position(self.initial_tip_positions)  # cannot set joint positions directly due to in ik mode nor force/torch mode
    #         self.pr.step()
    #         # prevent stuck cases, as using ik for moving, stucking can make ik cannot be solved therefore not reset correctly
    #         itr=0
    #         max_itr=10
    #         while np.sum(np.abs(np.array(self.agent_ee_tip.get_position()-np.array(self.initial_tip_positions))))>0.1 and itr<max_itr:
    #             itr+=1
    #             self.step(np.random.uniform(-0.2,0.2,4))  # take random actions for preventing the stuck cases
    #             self.pr.step()
    #     elif self.control_mode == 'joint_velocity': # JointMode.FORCE
    #         self.agent.set_joint_positions(self.initial_joint_positions)  # sometimes the gripper is stuck, cannot get back to initial

    #     # set collidable, for collision detection
    #     self.gripper_left_pad.set_collidable(True)  # set the pad on the gripper to be collidable, so as to check collision
    #     self.target.set_collidable(True)

    #     # open the gripper if it's not fully open
    #     if np.sum(self.gripper.get_open_amount())<1.5:
    #         self.gripper.actuate(1, velocity=0.5)
    #         self.pr.step()

    #     return self._get_state()  # return current state of the environment

    def step(self, action):
        '''
        Move the robot arm according to the action.
        If control_mode=='joint_velocity', action is 7 dim of joint velocity values + 1 dim rotation of gripper;
        if control_mode=='end_position', action is 3 dim of tip (end of robot arm) position values + 1 dim rotation of gripper;
        '''
        done = False
        if self.control_mode == 'end_position':
            if action is None or action.shape[0] != 4:
                print('No actions or wrong action dimensions!')
                action = list(np.random.uniform(-0.1, 0.1, 4))  # random
            # print(action, self.agent_ee_tip.get_position())
            self._move(action)

        elif self.control_mode == 'joint_velocity':
            if action is None or action.shape[0] != 7:
                print('No actions or wrong action dimensions!')
                action = list(np.random.uniform(-0.1, 0.1, 7))  # random
            self.agent.set_joint_target_velocities(
                action)  # Execute action on arm
            self.pr.step()  # if no pr.step() the action won't be conducted

        else:
            raise NotImplementedError

        ax, ay, az = self.gripper.get_position()
        if math.isnan(
                ax
        ):  # capture the case when the gripper is broken during exporation
            self.__init__(headless=self.headless)
            done = True

        tx, ty, tz = self.target.get_position()

        distance = (ax - tx)**2 + (ay - ty)**2 + (
            az - tz)**2  # distance between the gripper and the target object
        ''' for visual-based control only, large time consumption! '''
        # current_vision = self.vision_sensor.capture_rgb()  # capture a screenshot of the view with vision sensor
        # plt.imshow(current_vision)
        # plt.savefig('./img/vision.png')

        reward = 0
        # close the gripper if close enough to the object and the object is detected with the proximity sensor
        if distance < 0.1 and self.proximity_sensor.is_detected(
                self.target) == True:
            # make sure the gripper is open before grasping
            self.gripper.actuate(1, velocity=0.5)
            self.pr.step()

            self.gripper.actuate(
                0, velocity=0.5
            )  # if done, close the hand, 0 for close and 1 for open; velocity 0.5 ensures the gripper to close with in one frame
            self.pr.step()  # Step the physics simulation

            if self._is_holding():
                # reward for hold here!
                reward += self.reward_offset  # extra reward for grasping the object
                done = True
            else:
                self.gripper.actuate(1, velocity=0.5)
                self.pr.step()

        elif np.sum(
                self.gripper.get_open_amount()
        ) < 1.5:  # if gripper is closed (not fully open) due to collision or esle, open it; .get_open_amount() return list of gripper joint values
            self.gripper.actuate(1, velocity=0.5)
            self.pr.step()

        else:
            pass

        reward -= np.clip(
            np.sqrt(distance), 0, self.reward_offset
        )  # Reward is negative distance to target; clipped for removing abnormal cases

        if tz < self.initial_target_positions[
                2] - self.fall_down_offset:  # the object fall off the table
            done = True
            reward = -self.reward_offset

        # can also set reward for orientation, same orientation for target and gripper, they are actually vertical, so can grasp
        # reward += np.sqrt(np.array(self.agent_ee_tip.get_orientation())-np.array(self.target.get_orientation()))

        return self._get_state(), reward, done, {}

    def shutdown(self):
        ''' Close the simulator '''
        self.pr.stop()
        self.pr.shutdown()