def __init__(self):
     self.pr = PyRep()
     self.pr.launch(SCENE_FILE, headless=False)
     self.pr.start()
     # self.agent = Panda()
     self.agent = Sawyer()
     self.gripper = BaxterGripper()
     self.agent.set_control_loop_enabled(False)
     self.agent.set_motor_locked_at_zero_velocity(True)
     self.target = Shape('target')
     self.agent_ee_tip = self.agent.get_tip()
     self.initial_joint_positions = self.agent.get_joint_positions()
class ReacherEnv(object):
    def __init__(self):
        self.pr = PyRep()
        self.pr.launch(SCENE_FILE, headless=False)
        self.pr.start()
        # self.agent = Panda()
        self.agent = Sawyer()
        self.gripper = BaxterGripper()
        self.agent.set_control_loop_enabled(False)
        self.agent.set_motor_locked_at_zero_velocity(True)
        self.target = Shape('target')
        self.agent_ee_tip = self.agent.get_tip()
        self.initial_joint_positions = self.agent.get_joint_positions()

    def _get_state(self):
        # Return state containing arm joint angles/velocities & target position
        return (self.agent.get_joint_positions() +
                self.agent.get_joint_velocities() + self.target.get_position())

    def reset(self):
        # Get a random position within a cuboid and set the target position
        pos = list(np.random.uniform(POS_MIN, POS_MAX))
        self.target.set_position(pos)
        self.agent.set_joint_positions(self.initial_joint_positions)
        self.pr.step()
        return self._get_state()

    def step(self, action):
        self.agent.set_joint_target_velocities(action)  # Execute action on arm
        self.pr.step()  # Step the physics simulation
        ax, ay, az = self.agent_ee_tip.get_position()
        tx, ty, tz = self.target.get_position()
        # Reward is negative distance to target
        distance = (ax - tx)**2 + (ay - ty)**2 + (az - tz)**2
        done = False
        if distance < 5:
            done = True
            self.gripper.actuate(
                0, velocity=0.04
            )  # if done, close the hand, 0 for close and 1 for open.
            self.pr.step()  # Step the physics simulation

        reward = -np.sqrt(distance)
        return self._get_state(), reward, done

    def shutdown(self):
        self.pr.stop()
        self.pr.shutdown()
Example #3
0
    def __init__(self, visualize=True, mode="train", **params):
        super().__init__(visualize=visualize, mode=mode)

        # Scene selection
        scene_file_path = os.path.join(
            os.getcwd(), 'deep_simulation/scenes/UR10_gripper.ttt')

        # Simulator launch
        self.env = PyRep()
        self.env.launch(scene_file_path, headless=False)
        self.env.start()
        self.env.step()

        # Task related initialisations in Simulator
        self.vision_sensor = objects.vision_sensor.VisionSensor(
            "Vision_sensor")
        self.gripper = objects.dummy.Dummy("UR10_target")
        self.gripper_zero_pose = self.gripper.get_pose()
        self.goal = objects.dummy.Dummy("goal_target")
        self.goal_STL = objects.shape.Shape("goal")
        self.goal_STL_zero_pose = self.goal_STL.get_pose()
        self.grasped_STL = objects.shape.Shape("BaxterGripper")
        self.stacking_area = objects.shape.Shape("Plane")
        self.vision_sensor = objects.vision_sensor.VisionSensor(
            "Vision_sensor")

        self.baxter_gripper = BaxterGripper()
        self.UR10_arm = UR10()
        self.gripper_dummy = objects.dummy.Dummy("gripper_dummy")

        self.step_counter = 0
        self.max_step_count = 100
        self.target_pose = None
        self.initial_distance = None
        self.image_width, self.image_height = 320, 240
        self.vision_sensor.set_resolution(
            (self.image_width, self.image_height))
        self._history_len = 1

        self._observation_space = Dict({
            "cam_image":
            Box(0,
                255, [self.image_height, self.image_width, 1],
                dtype=np.uint8)
        })

        self._action_space = Box(-1, 1, (3, ))
        self._state_space = extend_space(self._observation_space,
                                         self._history_len)
Example #4
0
    def __init__(self,
                 headless,
                 control_mode='end_position',
                 visual_control=False):
        '''
        :visual_control: bool, controlled by visual state or not (vector state).
        '''
        self.reward_offset = 10.0
        self.reward_range = self.reward_offset  # reward range for register gym env when using vectorized env wrapper
        self.fall_down_offset = 0.1  # for judging the target object fall off the table
        self.metadata = []  # gym env format
        self.visual_control = visual_control
        self.control_mode = control_mode
        self.pr = PyRep()
        if control_mode == 'end_position':  # need to use different scene, the one with all joints in inverse kinematics mode
            SCENE_FILE = join(dirname(abspath(__file__)),
                              './scenes/sawyer_reacher_rl_new_ik.ttt')
        elif control_mode == 'joint_velocity':  # the scene with all joints in force/torche mode for forward kinematics
            SCENE_FILE = join(dirname(abspath(__file__)),
                              './scenes/sawyer_reacher_rl_new.ttt')
        self.pr.launch(SCENE_FILE, headless=headless)
        self.pr.start()
        self.agent = Sawyer()
        self.gripper = BaxterGripper()
        self.gripper_left_pad = Shape(
            'BaxterGripper_leftPad')  # the pad on the gripper finger
        self.proximity_sensor = ProximitySensor(
            'BaxterGripper_attachProxSensor'
        )  # need the name of the sensor here
        self.vision_sensor = VisionSensor(
            'Vision_sensor')  # need the name of the sensor here
        if control_mode == 'end_position':
            self.agent.set_control_loop_enabled(True)  # if false, won't work
            self.action_space = np.zeros(
                4)  # 3 DOF end position control + 1 rotation of gripper
        elif control_mode == 'joint_velocity':
            self.agent.set_control_loop_enabled(False)
            self.action_space = np.zeros(
                8)  # 7 DOF velocity control + 1 rotation of gripper
        else:
            raise NotImplementedError
        if self.visual_control == False:
            self.observation_space = np.zeros(
                17
            )  # position and velocity of 7 joints + position of the target
        else:
            self.observation_space = np.zeros(100)  # dim of img!
        self.agent.set_motor_locked_at_zero_velocity(True)
        self.target = Shape('target')  # object
        self.tip_target = Dummy(
            'Sawyer_target')  # the target point of the tip to move towards
        # self.table = Shape('diningTable')
        self.agent_ee_tip = self.agent.get_tip()
        self.tip_pos = self.agent_ee_tip.get_position()
        self.tip_quat = self.agent_ee_tip.get_quaternion(
        )  # tip rotation as quaternion, if not control the rotation

        # set a proper initial gesture/tip position
        if control_mode == 'end_position':
            # agent_position=self.agent.get_position()
            # initial_pos_offset = [0.4, 0.3, 0.2]  # initial relative position of gripper to the whole arm
            # initial_pos = [(a + o) for (a, o) in zip(agent_position, initial_pos_offset)]
            initial_pos = [0.3, 0.1, 0.9]
            self.tip_target.set_position(initial_pos)
            # one big step for rotation setting is enough, with reset_dynamics=True, set the rotation instantaneously
            self.tip_target.set_orientation(
                [0, 3.1415, 1.5707],
                reset_dynamics=True)  # make gripper face downwards
            self.pr.step()
        elif control_mode == 'joint_velocity':
            self.initial_joint_positions = [
                0.001815199851989746, -1.4224984645843506, 0.704303503036499,
                2.54307222366333, 2.972468852996826, -0.4989511966705322,
                4.105560302734375
            ]
            self.agent.set_joint_positions(self.initial_joint_positions)

        self.initial_joint_positions = self.agent.get_joint_positions()
        self.initial_tip_positions = self.agent_ee_tip.get_position()
        self.initial_target_positions = self.target.get_position()
Example #5
0
class ReacherEnv(object):
    def __init__(self,
                 headless,
                 control_mode='end_position',
                 visual_control=False):
        '''
        :visual_control: bool, controlled by visual state or not (vector state).
        '''
        self.reward_offset = 10.0
        self.reward_range = self.reward_offset  # reward range for register gym env when using vectorized env wrapper
        self.fall_down_offset = 0.1  # for judging the target object fall off the table
        self.metadata = []  # gym env format
        self.visual_control = visual_control
        self.control_mode = control_mode
        self.pr = PyRep()
        if control_mode == 'end_position':  # need to use different scene, the one with all joints in inverse kinematics mode
            SCENE_FILE = join(dirname(abspath(__file__)),
                              './scenes/sawyer_reacher_rl_new_ik.ttt')
        elif control_mode == 'joint_velocity':  # the scene with all joints in force/torche mode for forward kinematics
            SCENE_FILE = join(dirname(abspath(__file__)),
                              './scenes/sawyer_reacher_rl_new.ttt')
        self.pr.launch(SCENE_FILE, headless=headless)
        self.pr.start()
        self.agent = Sawyer()
        self.gripper = BaxterGripper()
        self.gripper_left_pad = Shape(
            'BaxterGripper_leftPad')  # the pad on the gripper finger
        self.proximity_sensor = ProximitySensor(
            'BaxterGripper_attachProxSensor'
        )  # need the name of the sensor here
        self.vision_sensor = VisionSensor(
            'Vision_sensor')  # need the name of the sensor here
        if control_mode == 'end_position':
            self.agent.set_control_loop_enabled(True)  # if false, won't work
            self.action_space = np.zeros(
                4)  # 3 DOF end position control + 1 rotation of gripper
        elif control_mode == 'joint_velocity':
            self.agent.set_control_loop_enabled(False)
            self.action_space = np.zeros(
                8)  # 7 DOF velocity control + 1 rotation of gripper
        else:
            raise NotImplementedError
        if self.visual_control == False:
            self.observation_space = np.zeros(
                17
            )  # position and velocity of 7 joints + position of the target
        else:
            self.observation_space = np.zeros(100)  # dim of img!
        self.agent.set_motor_locked_at_zero_velocity(True)
        self.target = Shape('target')  # object
        self.tip_target = Dummy(
            'Sawyer_target')  # the target point of the tip to move towards
        # self.table = Shape('diningTable')
        self.agent_ee_tip = self.agent.get_tip()
        self.tip_pos = self.agent_ee_tip.get_position()
        self.tip_quat = self.agent_ee_tip.get_quaternion(
        )  # tip rotation as quaternion, if not control the rotation

        # set a proper initial gesture/tip position
        if control_mode == 'end_position':
            # agent_position=self.agent.get_position()
            # initial_pos_offset = [0.4, 0.3, 0.2]  # initial relative position of gripper to the whole arm
            # initial_pos = [(a + o) for (a, o) in zip(agent_position, initial_pos_offset)]
            initial_pos = [0.3, 0.1, 0.9]
            self.tip_target.set_position(initial_pos)
            # one big step for rotation setting is enough, with reset_dynamics=True, set the rotation instantaneously
            self.tip_target.set_orientation(
                [0, 3.1415, 1.5707],
                reset_dynamics=True)  # make gripper face downwards
            self.pr.step()
        elif control_mode == 'joint_velocity':
            self.initial_joint_positions = [
                0.001815199851989746, -1.4224984645843506, 0.704303503036499,
                2.54307222366333, 2.972468852996826, -0.4989511966705322,
                4.105560302734375
            ]
            self.agent.set_joint_positions(self.initial_joint_positions)

        self.initial_joint_positions = self.agent.get_joint_positions()
        self.initial_tip_positions = self.agent_ee_tip.get_position()
        self.initial_target_positions = self.target.get_position()

    def _get_state(self):
        # Return state containing arm joint angles/velocities & target position
        return np.array(self.agent.get_joint_positions() +
                        self.agent.get_joint_velocities() +
                        self.target.get_position())

    def _is_holding(self):
        '''
         Return is holding the target or not, return bool.
        '''

        # Nothe that the collision check is not always accurate all the time,
        # for continuous conllision, maybe only the first 4-5 frames of collision can be detected
        pad_collide_object = self.gripper_left_pad.check_collision(self.target)
        if pad_collide_object and self.proximity_sensor.is_detected(
                self.target) == True:
            return True
        else:
            return False

    def _get_visual_state(self):
        # Return a numpy array of size (width, height, 3)
        return self.vision_sensor.capture_rgb(
        )  # A numpy array of size (width, height, 3)

    def _is_holding(self):
        # Return is holding the target or not, return bool

        # Nothe that the collision check is not always accurate all the time,
        # for continuous conllision, maybe only the first 4-5 frames of collision can be detected
        pad_collide_object = self.gripper_left_pad.check_collision(self.target)
        if pad_collide_object and self.proximity_sensor.is_detected(
                self.target) == True:
            return True
        else:
            return False

    # def _move(self, action):
    #     '''
    #     Move the tip according to the action with inverse kinematics for 'end_position' control;
    #     with control of tip target in inverse kinematics mode instead of using .solve_ik() in forward kinematics mode.
    #     '''
    #     robot_moving_unit=0.01  # the amount of single step move of robot, not accurate
    #     moving_loop_itr=int(np.sum(np.abs(action[:3]))/robot_moving_unit)+1  # adaptive number of moving steps, with minimal of 1 step.
    #     # print(moving_loop_itr)
    #     small_step = list(1./moving_loop_itr*np.array(action))  # break the action into small steps, as the robot cannot move to the target position within one frame
    #     pos=self.agent_ee_tip.get_position()

    #     '''
    #     there is a mismatch between the object set_orientation() and get_orientation():
    #     the (x,y,z) in set_orientation() will be (y,x,-z) in get_orientation().
    #     '''
    #     ori_z=-self.agent_ee_tip.get_orientation()[2] # the minus is because the mismatch between the set and get
    #     assert len(small_step) == len(pos)+1  # 3 values for position, 1 value for rotation

    #     for _ in range(moving_loop_itr):
    #         for idx in range(len(pos)):
    #             pos[idx] += small_step[idx]
    #         self.tip_target.set_position(pos)
    #         self.pr.step()
    #         ''' deprecated! no need to use small steps for the rotation with reset_dynamics=True'''
    #         # ori_z+=small_step[3]  # change the orientation along z-axis with a small step
    #         # self.tip_target.set_orientation([0,3.1415,ori_z], reset_dynamics=True)  # make gripper face downwards
    #         # self.pr.step()
    #     ''' one big step for z-rotation is enough, with reset_dynamics=True, set the rotation instantaneously '''
    #     ori_z+=action[3]
    #     self.tip_target.set_orientation([0,3.1415,ori_z], reset_dynamics=True)  # make gripper face downwards
    #     self.pr.step()

    def _move(self, action):
        ''' 
        Move the tip according to the action with inverse kinematics for 'end_position' control;
        with control of tip target in inverse kinematics mode instead of using .solve_ik() in forward kinematics mode.
        Mode 2: a close-loop control, using ik.
        '''
        pos = self.gripper.get_position()
        bounding_offset = 0.1
        step_factor = 0.2  # small step factor mulitplied on the gradient step calculated by inverse kinematics
        max_itr = 20  # maximum moving iterations
        max_error = 0.1  # upper bound of distance error for movement at each call
        rotation_norm = 5.  # factor for normalization of rotation values
        # check if state+action will be within of the bounding box, if so, move normally; else no action.
        #  x_min < x < x_max  and  y_min < y < y_max  and  z > z_min
        if pos[0]+action[0]>POS_MIN[0]-bounding_offset and pos[0]+action[0]<POS_MAX[0]+bounding_offset  \
            and pos[1]+action[1] > POS_MIN[1]-bounding_offset and pos[1]+action[1] < POS_MAX[1]+bounding_offset  \
            and pos[2]+action[2] > POS_MIN[2]-bounding_offset:
            ''' 
            there is a mismatch between the object set_orientation() and get_orientation():
            the (x,y,z) in set_orientation() will be (y,x,-z) in get_orientation().
            '''
            ori_z = -self.agent_ee_tip.get_orientation()[
                2]  # the minus is because the mismatch between the set and get
            target_pos = np.array(self.agent_ee_tip.get_position()) + np.array(
                action[:3])
            diff = 1
            itr = 0
            # print('before: ', ori_z)
            while np.sum(np.abs(diff)) > max_error and itr < max_itr:
                itr += 1
                # set pos in small step
                cur_pos = self.agent_ee_tip.get_position()
                diff = target_pos - cur_pos
                pos = cur_pos + step_factor * diff
                self.tip_target.set_position(pos.tolist())
                self.pr.step()
            ''' one big step for z-rotation is enough '''
            ori_z += rotation_norm * action[3]
            self.tip_target.set_orientation([0, np.pi, ori_z
                                             ])  # make gripper face downwards
            self.pr.step()

        else:
            # print("Potential Movement Out of the Bounding Box!")
            pass  # no action if potentially out of the bounding box

    def reset(self):
        # Get a random position within a cuboid and set the target position
        max_itr = 10
        pos = list(np.random.uniform(POS_MIN, POS_MAX))
        self.target.set_position(pos)
        self.target.set_orientation([0, 0, 0])
        self.pr.step()
        # changing the color or texture for domain randomization
        self.target.set_color(
            np.random.uniform(low=0, high=1, size=3).tolist()
        )  # set [r,g,b] 3 channel values of object color
        # set end position to be initialized
        if self.control_mode == 'end_position':  # JointMode.IK
            self.agent.set_control_loop_enabled(True)
            self.tip_target.set_position(
                self.initial_tip_positions
            )  # cannot set joint positions directly due to in ik mode nor force/torch mode
            self.pr.step()
            # prevent stuck case
            itr = 0
            while np.sum(
                    np.abs(
                        np.array(self.agent_ee_tip.get_position() -
                                 np.array(self.initial_tip_positions)))
            ) > 0.1 and itr < max_itr:
                itr += 1
                self.step(np.random.uniform(
                    -0.2, 0.2,
                    4))  # take random actions for preventing the stuck cases
                self.pr.step()
        elif self.control_mode == 'joint_velocity':  # JointMode.FORCE
            self.agent.set_joint_positions(
                self.initial_joint_positions
            )  # sometimes the gripper is stuck, cannot get back to initial
            self.pr.step()

        # self.table.set_collidable(True)
        self.gripper_left_pad.set_collidable(
            True
        )  # set the pad on the gripper to be collidable, so as to check collision
        self.target.set_collidable(True)
        if np.sum(self.gripper.get_open_amount()) < 1.5:
            self.gripper.actuate(1, velocity=0.5)  # open the gripper
            self.pr.step()
        if self.visual_control:
            return self._get_visual_state()
        else:
            return self._get_state()

    def step(self, action):
        '''
        Move the robot arm according to the action.
        If control_mode=='joint_velocity', action is 7 dim of joint velocity values + 1 dim of gripper rotation.
        if control_mode=='end_position', action is 3 dim of tip (end of robot arm) position values + 1 dim of gripper rotation.
        '''
        if self.control_mode == 'end_position':
            if action is None or action.shape[0] != 4:
                action = list(np.random.uniform(-0.1, 0.1, 4))  # random
            self._move(action)
        elif self.control_mode == 'joint_velocity':
            if action is None or action.shape[0] != 7:
                print('No actions or wrong action dimensions!')
                action = list(np.random.uniform(-0.1, 0.1, 7))  # random
            self.agent.set_joint_target_velocities(
                action)  # Execute action on arm
            self.pr.step()
        else:
            raise NotImplementedError

        ax, ay, az = self.gripper.get_position()
        tx, ty, tz = self.target.get_position()
        # Reward is negative distance to target
        distance = (ax - tx)**2 + (ay - ty)**2 + (az - tz)**2
        done = False

        # print(self.proximity_sensor.is_detected(self.target))
        current_vision = self.vision_sensor.capture_rgb(
        )  # capture a screenshot of the view with vision sensor
        plt.imshow(current_vision)
        plt.savefig('./img/vision.png')

        reward = 0
        # close the gripper if close enough to the object and the object is detected with the proximity sensor
        if distance < 0.1 and self.proximity_sensor.is_detected(
                self.target) == True:
            # make sure the gripper is open before grasping
            self.gripper.actuate(1, velocity=0.5)
            self.pr.step()

            self.gripper.actuate(
                0, velocity=0.5
            )  # if done, close the hand, 0 for close and 1 for open.
            self.pr.step()  # Step the physics simulation

            if self._is_holding():
                # reward for hold here!
                reward += 10
                done = True

            else:
                self.gripper.actuate(1, velocity=0.5)
                self.pr.step()

        elif np.sum(
                self.gripper.get_open_amount()
        ) < 1.5:  # if gripper is closed due to collision or esle, open it; .get_open_amount() return list of gripper joint values
            self.gripper.actuate(1, velocity=0.5)
            self.pr.step()

        else:
            pass

        reward -= np.sqrt(distance)

        if tz < self.initial_target_positions[
                2] - self.fall_down_offset:  # the object fall off the table
            done = True
            reward = -self.reward_offset

        if self.visual_control:
            return self._get_visual_state(), reward, done, {}
        else:
            return self._get_state(), reward, done, {}

    def shutdown(self):
        self.pr.stop()
        self.pr.shutdown()
Example #6
0
 def test_get_duplicate_gripper(self):
     g = BaxterGripper(1)
     self.assertIsInstance(g, BaxterGripper)
from os.path import dirname, join, abspath
from pyrep import PyRep
from pyrep.robots.arms.baxter import BaxterLeft, BaxterRight
from pyrep.robots.end_effectors.baxter_gripper import BaxterGripper
from pyrep.objects.dummy import Dummy
from pyrep.objects.shape import Shape

SCENE_FILE = join(dirname(abspath(__file__)), 'scene_baxter_pick_and_pass.ttt')
pr = PyRep()

pr.launch(SCENE_FILE, headless=False)
pr.start()

baxter_left = BaxterLeft()
baxter_right = BaxterRight()
baxter_gripper_left = BaxterGripper(0)
baxter_gripper_right = BaxterGripper(1)

cup = Shape('Cup')
waypoints = [Dummy('waypoint%d' % i) for i in range(7)]

print('Planning path for left arm to cup ...')
path = baxter_left.get_path(position=waypoints[0].get_position(),
                            quaternion=waypoints[0].get_quaternion())
path.visualize()  # Let's see what the path looks like
print('Executing plan ...')
done = False
while not done:
    done = path.step()
    pr.step()
path.clear_visualization()
Example #8
0
    def __init__(self, headless, control_mode='joint_velocity'):
        '''
        parameters:
        :headless: bool, if True, no visualization, else with visualization.
        :control mode: str, 'end_position' or 'joint_velocity'.
        '''
        # set public variables
        self.headless = headless  # if headless is True, no visualization
        self.reward_offset = 10.0  # reward of achieving the grasping object
        self.reward_range = self.reward_offset  # reward range for register gym env when using vectorized env wrapper
        self.penalty_offset = 1.  # penalty value for undesired cases
        self.fall_down_offset = 0.1  # distance for judging the target object fall off the table
        self.metadata = []  # gym env argument
        self.control_mode = control_mode  # the control mode of robotic arm: 'end_position' or 'joint_velocity'

        # launch and set up the scene, and set the proxy variables in represent of the counterparts in the scene
        self.pr = PyRep()  # call the PyRep
        if control_mode == 'end_position':  # need to use different scene, the one with all joints in inverse kinematics mode
            SCENE_FILE = join(
                dirname(abspath(__file__)),
                './scenes/sawyer_reacher_rl_new_ik.ttt'
            )  # scene with joints controlled by ik (inverse kinematics)
        elif control_mode == 'joint_velocity':  # the scene with all joints in force/torch mode for forward kinematics
            SCENE_FILE = join(
                dirname(abspath(__file__)),
                './scenes/sawyer_reacher_rl_new.ttt'
            )  # scene with joints controlled by forward kinematics
        self.pr.launch(SCENE_FILE, headless=headless
                       )  # lunch the scene, headless means no visualization
        self.pr.start()  # start the scene
        self.agent = Sawyer()  # get the robot arm in the scene
        self.gripper = BaxterGripper()  # get the gripper in the scene
        self.gripper_left_pad = Shape(
            'BaxterGripper_leftPad')  # the left pad on the gripper finger
        self.proximity_sensor = ProximitySensor(
            'BaxterGripper_attachProxSensor'
        )  # need the name of the sensor here
        self.vision_sensor = VisionSensor(
            'Vision_sensor')  # need the name of the sensor here
        self.table = Shape(
            'diningTable')  # the table in the scene for checking collision
        if control_mode == 'end_position':  # control the robot arm by the position of its end using inverse kinematics
            self.agent.set_control_loop_enabled(
                True)  # if false, inverse kinematics won't work
            self.action_space = np.zeros(
                4)  # 3 DOF end position control + 1 rotation of gripper
        elif control_mode == 'joint_velocity':  # control the robot arm by directly setting velocity values on each joint, using forward kinematics
            self.agent.set_control_loop_enabled(False)
            self.action_space = np.zeros(
                7
            )  # 7 DOF velocity control, no need for extra control of end rotation, the 7th joint controls it.
        else:
            raise NotImplementedError
        self.observation_space = np.zeros(
            17)  # position and velocity of 7 joints + position of the target
        self.agent.set_motor_locked_at_zero_velocity(True)
        self.target = Shape('target')  # get the target object
        self.agent_ee_tip = self.agent.get_tip(
        )  # a part of robot as the end of inverse kinematics chain for controlling
        self.tip_target = Dummy(
            'Sawyer_target'
        )  # the target point of the tip (end of the robot arm) to move towards
        self.tip_pos = self.agent_ee_tip.get_position()  # tip x,y,z position

        # set a proper initial robot gesture or tip position
        if control_mode == 'end_position':
            initial_pos = [0.3, 0.1, 0.9]
            self.tip_target.set_position(initial_pos)
            # one big step for rotation setting is enough, with reset_dynamics=True, set the rotation instantaneously
            self.tip_target.set_orientation(
                [0, np.pi, np.pi / 2], reset_dynamics=True
            )  # first two dimensions along x and y axis make gripper face downwards
        elif control_mode == 'joint_velocity':
            self.initial_joint_positions = [0.001815199851989746, -1.4224984645843506, \
                0.704303503036499, 2.54307222366333, 2.972468852996826, -0.4989511966705322, 4.105560302734375] # a proper initial gesture
            self.agent.set_joint_positions(self.initial_joint_positions)
        self.pr.step()
        self.initial_tip_positions = self.agent_ee_tip.get_position()
        self.initial_target_positions = self.target.get_position()
Example #9
0
class GraspEnv(object):
    ''' Sawyer robot grasping a cuboid '''
    def __init__(self, headless, control_mode='joint_velocity'):
        '''
        parameters:
        :headless: bool, if True, no visualization, else with visualization.
        :control mode: str, 'end_position' or 'joint_velocity'.
        '''
        # set public variables
        self.headless = headless  # if headless is True, no visualization
        self.reward_offset = 10.0  # reward of achieving the grasping object
        self.reward_range = self.reward_offset  # reward range for register gym env when using vectorized env wrapper
        self.penalty_offset = 1.  # penalty value for undesired cases
        self.fall_down_offset = 0.1  # distance for judging the target object fall off the table
        self.metadata = []  # gym env argument
        self.control_mode = control_mode  # the control mode of robotic arm: 'end_position' or 'joint_velocity'

        # launch and set up the scene, and set the proxy variables in represent of the counterparts in the scene
        self.pr = PyRep()  # call the PyRep
        if control_mode == 'end_position':  # need to use different scene, the one with all joints in inverse kinematics mode
            SCENE_FILE = join(
                dirname(abspath(__file__)),
                './scenes/sawyer_reacher_rl_new_ik.ttt'
            )  # scene with joints controlled by ik (inverse kinematics)
        elif control_mode == 'joint_velocity':  # the scene with all joints in force/torch mode for forward kinematics
            SCENE_FILE = join(
                dirname(abspath(__file__)),
                './scenes/sawyer_reacher_rl_new.ttt'
            )  # scene with joints controlled by forward kinematics
        self.pr.launch(SCENE_FILE, headless=headless
                       )  # lunch the scene, headless means no visualization
        self.pr.start()  # start the scene
        self.agent = Sawyer()  # get the robot arm in the scene
        self.gripper = BaxterGripper()  # get the gripper in the scene
        self.gripper_left_pad = Shape(
            'BaxterGripper_leftPad')  # the left pad on the gripper finger
        self.proximity_sensor = ProximitySensor(
            'BaxterGripper_attachProxSensor'
        )  # need the name of the sensor here
        self.vision_sensor = VisionSensor(
            'Vision_sensor')  # need the name of the sensor here
        self.table = Shape(
            'diningTable')  # the table in the scene for checking collision
        if control_mode == 'end_position':  # control the robot arm by the position of its end using inverse kinematics
            self.agent.set_control_loop_enabled(
                True)  # if false, inverse kinematics won't work
            self.action_space = np.zeros(
                4)  # 3 DOF end position control + 1 rotation of gripper
        elif control_mode == 'joint_velocity':  # control the robot arm by directly setting velocity values on each joint, using forward kinematics
            self.agent.set_control_loop_enabled(False)
            self.action_space = np.zeros(
                7
            )  # 7 DOF velocity control, no need for extra control of end rotation, the 7th joint controls it.
        else:
            raise NotImplementedError
        self.observation_space = np.zeros(
            17)  # position and velocity of 7 joints + position of the target
        self.agent.set_motor_locked_at_zero_velocity(True)
        self.target = Shape('target')  # get the target object
        self.agent_ee_tip = self.agent.get_tip(
        )  # a part of robot as the end of inverse kinematics chain for controlling
        self.tip_target = Dummy(
            'Sawyer_target'
        )  # the target point of the tip (end of the robot arm) to move towards
        self.tip_pos = self.agent_ee_tip.get_position()  # tip x,y,z position

        # set a proper initial robot gesture or tip position
        if control_mode == 'end_position':
            initial_pos = [0.3, 0.1, 0.9]
            self.tip_target.set_position(initial_pos)
            # one big step for rotation setting is enough, with reset_dynamics=True, set the rotation instantaneously
            self.tip_target.set_orientation(
                [0, np.pi, np.pi / 2], reset_dynamics=True
            )  # first two dimensions along x and y axis make gripper face downwards
        elif control_mode == 'joint_velocity':
            self.initial_joint_positions = [0.001815199851989746, -1.4224984645843506, \
                0.704303503036499, 2.54307222366333, 2.972468852996826, -0.4989511966705322, 4.105560302734375] # a proper initial gesture
            self.agent.set_joint_positions(self.initial_joint_positions)
        self.pr.step()
        self.initial_tip_positions = self.agent_ee_tip.get_position()
        self.initial_target_positions = self.target.get_position()

    def _get_state(self):
        '''
        Return state containing arm joint positions/velocities & target position.
        '''
        return np.array(self.agent.get_joint_positions() +  # list, dim=7
                        self.agent.get_joint_velocities() +  # list, dim=7
                        self.target.get_position())  # list, dim=3

    def _is_holding(self):
        '''
         Return the state of holding the target or not, return bool.
        '''
        # Note that the collision check is not always accurate all the time,
        # for continuous collision frames, maybe only the first 4-5 frames of collision can be detected.
        pad_collide_object = self.gripper_left_pad.check_collision(self.target)
        if pad_collide_object and self.proximity_sensor.is_detected(
                self.target) == True:
            return True
        else:
            return False

    def _move(self,
              action,
              bounding_offset=0.15,
              step_factor=0.2,
              max_itr=20,
              max_error=0.05,
              rotation_norm=5.):
        ''' 
        Move the end effector on robot arm according to the action with inverse kinematics for 'end_position' control mode;
        Inverse kinematics mode control is achieved through setting the tip target instead of using .solve_ik(), 
        because sometimes the .solve_ik() does not function correctly.
        Mode: a close-loop proportional control, using ik.

        parameters:
        :bounding_offset: offset of bounding box outside the valid target position range, as valid and safe range of action
        :step_factor: small step factor mulitplied on the difference of current and desired position, i.e. proportional factor
        :max_itr: maximum moving iterations
        :max_error: upper bound of distance error for movement at each call
        :rotation_norm: factor for normalization of rotation values, since the action are of the same scale for each dimension
        '''
        pos = self.gripper.get_position()

        # check if state+action will be within of the bounding box, if so, move normally; otherwise the action is not conducted.
        #  i.e. x_min < x < x_max  and  y_min < y < y_max  and  z > z_min
        if pos[0]+action[0]>POS_MIN[0]-bounding_offset and pos[0]+action[0]<POS_MAX[0]+bounding_offset  \
            and pos[1]+action[1] > POS_MIN[1]-bounding_offset and pos[1]+action[1] < POS_MAX[1]+2*bounding_offset  \
            and pos[2]+action[2] > POS_MIN[2]-2*bounding_offset:  # larger offset in z axis

            # there is a mismatch between the object set_orientation() and get_orientation():
            # the (x,y,z) in set_orientation() will be (y,x,-z) in get_orientation().
            ori_z = -self.agent_ee_tip.get_orientation(
            )[2]  # the minus is because the mismatch between the set_orientation() and get_orientation()
            target_pos = np.array(self.agent_ee_tip.get_position()) + np.array(
                action[:3])
            diff = 1  # intialization
            itr = 0
            while np.sum(np.abs(diff)) > max_error and itr < max_itr:
                itr += 1
                # set pos in small step
                cur_pos = self.agent_ee_tip.get_position()
                diff = target_pos - cur_pos  # difference of current and target position, close-loop control
                pos = cur_pos + step_factor * diff  # step small step according to current difference, to prevent that ik cannot be solved
                self.tip_target.set_position(pos.tolist())
                self.pr.step(
                )  # every time when setting target tip, need to call simulation step to achieve it

            # one big step for z-rotation is enough, but small error still exists due to the ik solver
            ori_z += rotation_norm * action[
                3]  # normalize the rotation values, as usually same action range is used in policy for both rotation and position
            self.tip_target.set_orientation([
                0, np.pi, ori_z
            ])  # make gripper face downwards and rotate ori_z along z axis
            self.pr.step()

        else:
            print("Potential Movement Out of the Bounding Box!")
            pass  # no action if potentially moving out of the bounding box

    def reinit(self):
        '''
        Reinitialize the environment, e.g. when the gripper is broken during exploration.
        '''
        self.shutdown()  # shutdown the original env first
        self.__init__(self.headless)  # initialize with the same headless mode

    def reset(self, random_target=False):
        '''
        Get a random position within a cuboid and set the target position.
        '''
        # set target object
        if random_target:  # randomize
            pos = list(np.random.uniform(
                POS_MIN, POS_MAX))  # sample from uniform in valid range
            self.target.set_position(pos)  # random position
        else:  # non-randomize
            self.target.set_position(
                self.initial_target_positions)  # fixed position
        self.target.set_orientation([0, 0, 0])
        self.pr.step()

        # set end position to be initialized
        if self.control_mode == 'end_position':  # JointMode.IK
            self.agent.set_control_loop_enabled(True)  # ik mode
            self.tip_target.set_position(
                self.initial_tip_positions
            )  # cannot set joint positions directly due to in ik mode or force/torch mode
            self.pr.step()
            # prevent stuck cases. as using ik for moving, stucking can make ik cannot be solved therefore not reset correctly, therefore taking
            # some random action when desired position is not reached.
            itr = 0
            max_itr = 10
            while np.sum(
                    np.abs(
                        np.array(self.agent_ee_tip.get_position() -
                                 np.array(self.initial_tip_positions)))
            ) > 0.1 and itr < max_itr:
                itr += 1
                self.step(np.random.uniform(
                    -0.2, 0.2,
                    4))  # take random actions for preventing the stuck cases
                self.pr.step()

        elif self.control_mode == 'joint_velocity':  # JointMode.FORCE
            self.agent.set_joint_positions(self.initial_joint_positions)
            self.pr.step()

        # set collidable, for collision detection
        self.gripper_left_pad.set_collidable(
            True
        )  # set the pad on the gripper to be collidable, so as to check collision
        self.target.set_collidable(True)
        # open the gripper if it's not fully open
        if np.sum(self.gripper.get_open_amount()) < 1.5:
            self.gripper.actuate(1, velocity=0.5)
            self.pr.step()

        return self._get_state()  # return current state of the environment

    def step(self, action):
        '''
        Move the robot arm according to the action.
        If control_mode=='joint_velocity', action is 7 dim of joint velocity values + 1 dim rotation of gripper;
        if control_mode=='end_position', action is 3 dim of tip (end of robot arm) position values + 1 dim rotation of gripper;
        '''
        # initialization
        done = False  # episode finishes
        reward = 0
        hold_flag = False  # holding the object or not
        if self.control_mode == 'end_position':
            if action is None or action.shape[
                    0] != 4:  # check if action is valid
                print('No actions or wrong action dimensions!')
                action = list(np.random.uniform(-0.1, 0.1, 4))  # random
            self._move(action)

        elif self.control_mode == 'joint_velocity':
            if action is None or action.shape[
                    0] != 7:  # check if action is valid
                print('No actions or wrong action dimensions!')
                action = list(np.random.uniform(-0.1, 0.1, 7))  # random
            self.agent.set_joint_target_velocities(
                action)  # Execute action on arm
            self.pr.step()

        else:
            raise NotImplementedError

        ax, ay, az = self.gripper.get_position()
        if math.isnan(
                ax):  # capture the broken gripper cases during exploration
            print('Gripper position is nan.')
            self.reinit()
            done = True
        tx, ty, tz = self.target.get_position()
        offset = 0.08  # augmented reward: offset of target position above the target object
        sqr_distance = (ax - tx)**2 + (ay - ty)**2 + (
            az - (tz + offset)
        )**2  # squared distance between the gripper and the target object
        ''' for visual-based control only, large time consumption! '''
        # current_vision = self.vision_sensor.capture_rgb()  # capture a screenshot of the view with vision sensor
        # plt.imshow(current_vision)
        # plt.savefig('./img/vision.png')

        # close the gripper if close enough to the object and the object is detected with the proximity sensor
        if sqr_distance < 0.1 and self.proximity_sensor.is_detected(
                self.target) == True:
            # make sure the gripper is open before grasping
            self.gripper.actuate(1, velocity=0.5)
            self.pr.step()
            self.gripper.actuate(
                0, velocity=0.5
            )  # if done, close the hand, 0 for close and 1 for open; velocity 0.5 ensures the gripper to close with in one frame
            self.pr.step()  # Step the physics simulation

            if self._is_holding():
                reward += self.reward_offset  # extra reward for grasping the object
                done = True
                hold_flag = True
            else:
                self.gripper.actuate(1, velocity=0.5)
                self.pr.step()
        elif np.sum(
                self.gripper.get_open_amount()
        ) < 1.5:  # if gripper is closed (not fully open) due to collision or esle, open it; get_open_amount() return list of gripper joint values
            self.gripper.actuate(1, velocity=0.5)
            self.pr.step()
        else:
            pass

        # the base reward is negative distance to target
        reward -= np.sqrt(sqr_distance)

        # case when the object fall off the table
        if tz < self.initial_target_positions[2] - self.fall_down_offset:
            done = True
            reward = -self.reward_offset

        # Augmented reward for orientation: better grasping gesture if the gripper has vertical orientation to the target object.
        # Note: the frame of gripper has a difference of pi/2 in z orientation as the frame of target.
        desired_orientation = np.concatenate(
            ([np.pi,
              0], [self.target.get_orientation()[2]
                   ]))  # gripper vertical to target in z and facing downwards,
        rotation_penalty = -np.sum(
            np.abs(
                np.array(self.agent_ee_tip.get_orientation()) -
                desired_orientation))
        rotation_norm = 0.02
        reward += rotation_norm * rotation_penalty

        # Penalty for collision with the table
        if self.gripper_left_pad.check_collision(self.table):
            reward -= self.penalty_offset
            #print('Penalize collision with table.')

        if math.isnan(reward):  # capture the cases of numerical problem
            reward = 0.

        return self._get_state(), reward, done, {'finished': hold_flag}

    def shutdown(self):
        ''' Close the simulator '''
        self.pr.stop()
        self.pr.shutdown()
Example #10
0
 def __init__(self, headless, control_mode='joint_velocity'):
     '''
     :headless: bool, if True, no visualization, else with visualization.
     :control mode: str, 'end_position' or 'joint_velocity'.
     '''
     self.headless = headless
     self.reward_offset = 10.0  # reward of achieving the grasping object
     self.reward_range = self.reward_offset  # reward range for register gym env when using vectorized env wrapper
     self.fall_down_offset = 0.1  # for judging the target object fall off the table
     self.metadata = []  # gym env format
     self.control_mode = control_mode  # the control mode of robotic arm: 'end_position' or 'joint_velocity'
     self.pr = PyRep()
     if control_mode == 'end_position':  # need to use different scene, the one with all joints in inverse kinematics mode
         SCENE_FILE = join(
             dirname(abspath(__file__)),
             './scenes/sawyer_reacher_rl_new_ik.ttt'
         )  # scene with joints controlled by ik (inverse kinematics)
     elif control_mode == 'joint_velocity':  # the scene with all joints in force/torch mode for forward kinematics
         SCENE_FILE = join(
             dirname(abspath(__file__)),
             './scenes/sawyer_reacher_rl_new.ttt'
         )  # scene with joints controlled by forward kinematics
     self.pr.launch(SCENE_FILE, headless=headless
                    )  # lunch the scene, headless means no visualization
     self.pr.start()  # start the scene
     self.agent = Sawyer()  # get the robot arm in the scene
     self.gripper = BaxterGripper()  # get the gripper in the scene
     self.gripper_left_pad = Shape(
         'BaxterGripper_leftPad')  # the pad on the gripper finger
     self.proximity_sensor = ProximitySensor(
         'BaxterGripper_attachProxSensor'
     )  # need the name of the sensor here
     self.vision_sensor = VisionSensor(
         'Vision_sensor')  # need the name of the sensor here
     self.initial_joint_positions = [
         0.162, -1.109, 0.259, 1.866, 2.949, -0.811, 4.440
     ]  # a good staring gesture of robot, avoid stucking and colliding
     self.agent.set_joint_positions(self.initial_joint_positions)
     if control_mode == 'end_position':  # control the robot arm by the position of its end using inverse kinematics
         self.agent.set_control_loop_enabled(
             True)  # if false, ik won't work
         self.action_space = np.zeros(
             4)  # 3 DOF end position control + 1 rotation of gripper
     elif control_mode == 'joint_velocity':  # control the robot arm by directly setting velocity values on each joint, forward kinematics
         self.agent.set_control_loop_enabled(False)
         self.action_space = np.zeros(
             7
         )  # 7 DOF velocity control, no need for extra control of end rotation, the 7th joint controls it.
     else:
         raise NotImplementedError
     self.observation_space = np.zeros(
         17)  # position and velocity of 7 joints + position of the target
     self.agent.set_motor_locked_at_zero_velocity(True)
     self.target = Shape('target')  # get the target object
     self.agent_ee_tip = self.agent.get_tip(
     )  # a part of robot as the end of inverse kinematics chain for controlling
     self.tip_target = Dummy(
         'Sawyer_target'
     )  # the target point of the tip (end of the robot arm) to move towards
     self.tip_pos = self.agent_ee_tip.get_position()  # tip x,y,z position
     self.tip_quat = self.agent_ee_tip.get_quaternion(
     )  # tip rotation as quaternion, if not control the rotation
     # reference for reset
     self.initial_tip_positions = [0.3, 0.1, 1.]
     self.initial_z_rotation = np.pi / 2.
     self.initial_target_positions = self.target.get_position()
Example #11
0
class ReacherEnv(object):
    def __init__(self, headless, control_mode='joint_velocity'):
        '''
        :headless: bool, if True, no visualization, else with visualization.
        :control mode: str, 'end_position' or 'joint_velocity'.
        '''
        self.headless = headless
        self.reward_offset = 10.0  # reward of achieving the grasping object
        self.reward_range = self.reward_offset  # reward range for register gym env when using vectorized env wrapper
        self.fall_down_offset = 0.1  # for judging the target object fall off the table
        self.metadata = []  # gym env format
        self.control_mode = control_mode  # the control mode of robotic arm: 'end_position' or 'joint_velocity'
        self.pr = PyRep()
        if control_mode == 'end_position':  # need to use different scene, the one with all joints in inverse kinematics mode
            SCENE_FILE = join(
                dirname(abspath(__file__)),
                './scenes/sawyer_reacher_rl_new_ik.ttt'
            )  # scene with joints controlled by ik (inverse kinematics)
        elif control_mode == 'joint_velocity':  # the scene with all joints in force/torch mode for forward kinematics
            SCENE_FILE = join(
                dirname(abspath(__file__)),
                './scenes/sawyer_reacher_rl_new.ttt'
            )  # scene with joints controlled by forward kinematics
        self.pr.launch(SCENE_FILE, headless=headless
                       )  # lunch the scene, headless means no visualization
        self.pr.start()  # start the scene
        self.agent = Sawyer()  # get the robot arm in the scene
        self.gripper = BaxterGripper()  # get the gripper in the scene
        self.gripper_left_pad = Shape(
            'BaxterGripper_leftPad')  # the pad on the gripper finger
        self.proximity_sensor = ProximitySensor(
            'BaxterGripper_attachProxSensor'
        )  # need the name of the sensor here
        self.vision_sensor = VisionSensor(
            'Vision_sensor')  # need the name of the sensor here
        self.initial_joint_positions = [
            0.162, -1.109, 0.259, 1.866, 2.949, -0.811, 4.440
        ]  # a good staring gesture of robot, avoid stucking and colliding
        self.agent.set_joint_positions(self.initial_joint_positions)
        if control_mode == 'end_position':  # control the robot arm by the position of its end using inverse kinematics
            self.agent.set_control_loop_enabled(
                True)  # if false, ik won't work
            self.action_space = np.zeros(
                4)  # 3 DOF end position control + 1 rotation of gripper
        elif control_mode == 'joint_velocity':  # control the robot arm by directly setting velocity values on each joint, forward kinematics
            self.agent.set_control_loop_enabled(False)
            self.action_space = np.zeros(
                7
            )  # 7 DOF velocity control, no need for extra control of end rotation, the 7th joint controls it.
        else:
            raise NotImplementedError
        self.observation_space = np.zeros(
            17)  # position and velocity of 7 joints + position of the target
        self.agent.set_motor_locked_at_zero_velocity(True)
        self.target = Shape('target')  # get the target object
        self.agent_ee_tip = self.agent.get_tip(
        )  # a part of robot as the end of inverse kinematics chain for controlling
        self.tip_target = Dummy(
            'Sawyer_target'
        )  # the target point of the tip (end of the robot arm) to move towards
        self.tip_pos = self.agent_ee_tip.get_position()  # tip x,y,z position
        self.tip_quat = self.agent_ee_tip.get_quaternion(
        )  # tip rotation as quaternion, if not control the rotation
        # reference for reset
        self.initial_tip_positions = [0.3, 0.1, 1.]
        self.initial_z_rotation = np.pi / 2.
        self.initial_target_positions = self.target.get_position()

    def _get_state(self):
        '''
         Return state containing arm joint angles/velocities & target position.
         '''
        return np.array(self.agent.get_joint_positions() +  # list
                        self.agent.get_joint_velocities() +  # list
                        self.target.get_position())  # list

    def _is_holding(self):
        '''
         Return is holding the target or not, return bool.
        '''

        # Note that the collision check is not always accurate all the time,
        # for continuous conllision, maybe only the first 4-5 frames of collision can be detected
        pad_collide_object = self.gripper_left_pad.check_collision(self.target)
        if pad_collide_object and self.proximity_sensor.is_detected(
                self.target) == True:
            return True
        else:
            return False

    # def _move(self, action):
    #     ''' Mode 0: as given by original examples in Pyrep repo '''
    #     # Deprecated, always fail the ik! Move the tip according to the action with inverse kinematics for 'end_position' control.
    #     action=list(action)
    #     pos=self.agent_ee_tip.get_position()
    #     assert len(action) == len(pos)
    #     for idx in range(len(pos)):
    #         pos[idx] += action[idx]
    #     print('pos: ', pos)
    #     new_joint_angles = self.agent.solve_ik(pos, quaternion=self.tip_quat)
    #     self.agent.set_joint_target_positions(new_joint_angles)

    # def _move(self, action):
    #     '''
    #     Deprecated!
    #     Move the tip according to the action with inverse kinematics for 'end_position' control;
    #     with control of tip target in inverse kinematics mode instead of using .solve_ik() in forward kinematics mode.
    #     Mode 1: an open-loop control, using ik; not as accurate as the closed-loop control.
    #     '''
    #     pos=self.gripper.get_position()
    #     bounding_offset=0.1
    #     robot_moving_unit=0.01  # the amount of single step move of robot, not accurate; the smaller the value, the smoother the movement.
    #     # check if state+action will be within of the bounding box, if so, move normally; else no action.
    #     #  x_min < x < x_max  and  y_min < y < y_max  and  z > z_min
    #     if pos[0]+action[0]>POS_MIN[0]-bounding_offset and pos[0]+action[0]<POS_MAX[0]+bounding_offset  \
    #         and pos[1]+action[1] > POS_MIN[1]-bounding_offset and pos[1]+action[1] < POS_MAX[1]+bounding_offset  \
    #         and pos[2]+action[2] > POS_MIN[2]-bounding_offset:

    #         moving_loop_itr=int(np.sum(np.abs(action[:3]))/robot_moving_unit)+1  # adaptive number of moving steps, with minimal of 1 step; the larger it is, the more accurate for each movement.
    #         small_step = list(1./moving_loop_itr*np.array(action))  # break the action into small steps, as the robot cannot move to the target position within one frame

    #         '''
    #         there is a mismatch between the object set_orientation() and get_orientation():
    #         the (x,y,z) in set_orientation() will be (y,x,-z) in get_orientation().
    #         '''
    #         ori_z=-self.agent_ee_tip.get_orientation()[2] # the minus is because the mismatch between the set and get
    #         assert len(small_step) == len(pos)+1  # 3 values for position, 1 value for rotation

    #         for _ in range(moving_loop_itr):
    #             for idx in range(len(pos)):
    #                 pos[idx] += small_step[idx]
    #             self.tip_target.set_position(pos)
    #             self.pr.step()
    #             ''' deprecated! no need to use small steps for the rotation with reset_dynamics=True'''
    #             ori_z+=small_step[3]  # change the orientation along z-axis with a small step
    #             self.tip_target.set_orientation([0, np.pi, ori_z], reset_dynamics=True)  # make gripper face downwards
    #             self.pr.step()
    #         ''' one big step for z-rotation is enough, with reset_dynamics=True, set the rotation instantaneously '''
    #         # ori_z+=action[3]
    #         # self.tip_target.set_orientation([0, np.pi, ori_z], reset_dynamics=True)  # make gripper face downwards
    #         # self.pr.step()

    #     else:
    #         print("Potential Movement Out of the Bounding Box!")
    #         pass # no action if potentially out of the bounding box

    def _move(self,
              action,
              bounding_offset=0.15,
              step_factor=0.4,
              max_itr=40,
              max_error=0.01,
              rotation_norm=5.):
        ''' 
        Move the tip according to the action with inverse kinematics for 'end_position' control;
        with control of tip target in inverse kinematics mode instead of using .solve_ik() in forward kinematics mode.
        Mode 2: a close-loop control, using ik.

        parameters:
        :bounding_offset: offset of bounding box and valid target position range, bounding box is larger
        :step_factor: small step factor mulitplied on the gradient step calculated by inverse kinematics
        :max_itr=20: maximum moving iterations
        :max_error: upper bound of distance error for movement at each call
        :rotation_norm: factor for normalization of rotation values
        '''
        pos = self.gripper.get_position()

        # check if state+action will be within of the bounding box, if so, move normally; else action is not conducted.
        #  i.e. x_min < x < x_max  and  y_min < y < y_max  and  z > z_min
        if pos[0]+action[0]>POS_MIN[0]-bounding_offset and pos[0]+action[0]<POS_MAX[0]+bounding_offset  \
            and pos[1]+action[1] > POS_MIN[1]-bounding_offset and pos[1]+action[1] < POS_MAX[1]+2*bounding_offset  \
            and pos[2]+action[2] > POS_MIN[2]-2*bounding_offset:  # larger offset in z axis
            ''' 
            there is a mismatch between the object set_orientation() and get_orientation():
            the (x,y,z) in set_orientation() will be (y,x,-z) in get_orientation().
            '''
            ori_z = -self.agent_ee_tip.get_orientation()[
                2]  # the minus is because the mismatch between the set and get
            target_pos = np.array(self.agent_ee_tip.get_position()) + np.array(
                action[:3])
            diff = 1
            itr = 0
            while np.sum(np.abs(diff)) > max_error and itr < max_itr:
                # print(action[:3], np.sum(np.abs(diff)))
                itr += 1
                # set pos in small step
                cur_pos = self.agent_ee_tip.get_position()
                diff = target_pos - cur_pos  # difference of current and target position, close-loop control
                pos = cur_pos + step_factor * diff  # step small step according to current difference, to prevent that ik cannot be solved
                self.tip_target.set_position(pos.tolist())
                self.pr.step(
                )  # every time set target tip, need to call simulation step to achieve it
            ''' one big step for z-rotation is enough, but still small error exists '''
            ori_z += rotation_norm * action[
                3]  # normalize the rotation values, as usually same action range is used in policy for both rotation and position
            self.tip_target.set_orientation(
                [0, 3.1415,
                 ori_z])  # make gripper face downwards and rotate ori_z
            self.pr.step()
            # print('after: ', ori_z, -self.agent_ee_tip.get_orientation()[2])

        else:
            print("Potential Movement Out of the Bounding Box!")
            pass  # no action if potentially moving out of the bounding box

    def reset(self, random_target=False):
        '''
        Reset with the _move() function.
        Get a random position within a cuboid and set the target position.
        '''
        if random_target:  # randomly set the target position
            pos = list(np.random.uniform(
                POS_MIN, POS_MAX))  # sample from uniform in valid range
            self.target.set_position(pos)  # random position
        else:
            self.target.set_position(
                self.initial_target_positions)  # fixed position
        self.target.set_orientation([0, 0, 0])
        self.pr.step()

        # set end position to be initialized
        if self.control_mode == 'end_position':  # JointMode.IK
            curr_pos = self.agent_ee_tip.get_position()
            a = np.array(self.initial_tip_positions) - np.array(curr_pos)
            self._move(np.concatenate((a, [self.initial_z_rotation])),
                       max_itr=80)

            # prevent stuck cases, as using ik for moving, stucking can make ik cannot be solved therefore not reset correctly
            itr = 0
            max_itr = 10
            while np.sum(
                    np.abs(
                        np.array(self.agent_ee_tip.get_position() -
                                 np.array(self.initial_tip_positions)))
            ) > 0.1 and itr < max_itr:
                itr += 1
                self.step(np.random.uniform(
                    -0.2, 0.2,
                    4))  # take random actions for preventing the stuck cases
                self.pr.step()
        elif self.control_mode == 'joint_velocity':  # JointMode.FORCE
            self.agent.set_joint_positions(
                self.initial_joint_positions
            )  # sometimes the gripper is stuck, cannot get back to initial
            self.pr.step()

        # set collidable, for collision detection
        self.gripper_left_pad.set_collidable(
            True
        )  # set the pad on the gripper to be collidable, so as to check collision
        self.target.set_collidable(True)

        # open the gripper if it's not fully open
        if np.sum(self.gripper.get_open_amount()) < 1.5:
            self.gripper.actuate(1, velocity=0.5)
            self.pr.step()

        return self._get_state()  # return current state of the environment

    # def reset(self, random_target=False):
    #     '''
    #     Deprecated!
    #     Get a random position within a cuboid and set the target position.
    #     '''
    #     print('reset')
    #     if random_target:  # randomly set the target position
    #         pos = list(np.random.uniform(POS_MIN, POS_MAX))  # sample from uniform in valid range
    #         self.target.set_position(pos)  # random position
    #     else:
    #         self.target.set_position(self.initial_target_positions) # fixed position
    #     self.target.set_orientation([0,0,0])

    #     # set end position to be initialized
    #     if self.control_mode == 'end_position':  # JointMode.IK
    #         self.agent.set_control_loop_enabled(True)
    #         self.tip_target.set_position(self.initial_tip_positions)  # cannot set joint positions directly due to in ik mode nor force/torch mode
    #         self.pr.step()
    #         # prevent stuck cases, as using ik for moving, stucking can make ik cannot be solved therefore not reset correctly
    #         itr=0
    #         max_itr=10
    #         while np.sum(np.abs(np.array(self.agent_ee_tip.get_position()-np.array(self.initial_tip_positions))))>0.1 and itr<max_itr:
    #             itr+=1
    #             self.step(np.random.uniform(-0.2,0.2,4))  # take random actions for preventing the stuck cases
    #             self.pr.step()
    #     elif self.control_mode == 'joint_velocity': # JointMode.FORCE
    #         self.agent.set_joint_positions(self.initial_joint_positions)  # sometimes the gripper is stuck, cannot get back to initial

    #     # set collidable, for collision detection
    #     self.gripper_left_pad.set_collidable(True)  # set the pad on the gripper to be collidable, so as to check collision
    #     self.target.set_collidable(True)

    #     # open the gripper if it's not fully open
    #     if np.sum(self.gripper.get_open_amount())<1.5:
    #         self.gripper.actuate(1, velocity=0.5)
    #         self.pr.step()

    #     return self._get_state()  # return current state of the environment

    def step(self, action):
        '''
        Move the robot arm according to the action.
        If control_mode=='joint_velocity', action is 7 dim of joint velocity values + 1 dim rotation of gripper;
        if control_mode=='end_position', action is 3 dim of tip (end of robot arm) position values + 1 dim rotation of gripper;
        '''
        done = False
        if self.control_mode == 'end_position':
            if action is None or action.shape[0] != 4:
                print('No actions or wrong action dimensions!')
                action = list(np.random.uniform(-0.1, 0.1, 4))  # random
            # print(action, self.agent_ee_tip.get_position())
            self._move(action)

        elif self.control_mode == 'joint_velocity':
            if action is None or action.shape[0] != 7:
                print('No actions or wrong action dimensions!')
                action = list(np.random.uniform(-0.1, 0.1, 7))  # random
            self.agent.set_joint_target_velocities(
                action)  # Execute action on arm
            self.pr.step()  # if no pr.step() the action won't be conducted

        else:
            raise NotImplementedError

        ax, ay, az = self.gripper.get_position()
        if math.isnan(
                ax
        ):  # capture the case when the gripper is broken during exporation
            self.__init__(headless=self.headless)
            done = True

        tx, ty, tz = self.target.get_position()

        distance = (ax - tx)**2 + (ay - ty)**2 + (
            az - tz)**2  # distance between the gripper and the target object
        ''' for visual-based control only, large time consumption! '''
        # current_vision = self.vision_sensor.capture_rgb()  # capture a screenshot of the view with vision sensor
        # plt.imshow(current_vision)
        # plt.savefig('./img/vision.png')

        reward = 0
        # close the gripper if close enough to the object and the object is detected with the proximity sensor
        if distance < 0.1 and self.proximity_sensor.is_detected(
                self.target) == True:
            # make sure the gripper is open before grasping
            self.gripper.actuate(1, velocity=0.5)
            self.pr.step()

            self.gripper.actuate(
                0, velocity=0.5
            )  # if done, close the hand, 0 for close and 1 for open; velocity 0.5 ensures the gripper to close with in one frame
            self.pr.step()  # Step the physics simulation

            if self._is_holding():
                # reward for hold here!
                reward += self.reward_offset  # extra reward for grasping the object
                done = True
            else:
                self.gripper.actuate(1, velocity=0.5)
                self.pr.step()

        elif np.sum(
                self.gripper.get_open_amount()
        ) < 1.5:  # if gripper is closed (not fully open) due to collision or esle, open it; .get_open_amount() return list of gripper joint values
            self.gripper.actuate(1, velocity=0.5)
            self.pr.step()

        else:
            pass

        reward -= np.clip(
            np.sqrt(distance), 0, self.reward_offset
        )  # Reward is negative distance to target; clipped for removing abnormal cases

        if tz < self.initial_target_positions[
                2] - self.fall_down_offset:  # the object fall off the table
            done = True
            reward = -self.reward_offset

        # can also set reward for orientation, same orientation for target and gripper, they are actually vertical, so can grasp
        # reward += np.sqrt(np.array(self.agent_ee_tip.get_orientation())-np.array(self.target.get_orientation()))

        return self._get_state(), reward, done, {}

    def shutdown(self):
        ''' Close the simulator '''
        self.pr.stop()
        self.pr.shutdown()
Example #12
0
    def __init__(self,
                 obs_space_keys=('pov', 'arm'),
                 scene_file='rozum_pyrep.ttt',
                 headless=True,
                 video_path=None,
                 pose_sigma=20,
                 randomize=False,
                 sparse=False,
                 camera_resolution=(256, 256),
                 step_limit=200):
        self.obs_space_keys = (obs_space_keys, ) if isinstance(
            obs_space_keys, str) else obs_space_keys
        # PyRep
        self._pyrep = PyRep()
        self._pyrep.launch(scene_file, headless=headless)
        self._pyrep.start()
        self.rozum = Rozum()
        self.rozum.set_control_loop_enabled(True)
        self.gripper = BaxterGripper()
        self.gripper.set_control_loop_enabled(True)
        self._initial_robot_state = (self.rozum.get_configuration_tree(),
                                     self.gripper.get_configuration_tree())
        self.cube = Shape("Cube")
        self.graspable_objects = [
            self.cube,
        ]
        self.camera = VisionSensor("render")
        self.camera.set_resolution(list(camera_resolution))
        self.camera0 = VisionSensor("render0")
        self.camera0.set_resolution(list(camera_resolution))
        self.rozum_tip = self.rozum.get_tip()

        # Action and Observation spaces
        self.angles_scale = np.array(
            [np.pi for _ in range(self.rozum.num_joints)])
        low = np.array([-20 / 180 for _ in range(self.rozum.num_joints)] + [
            0.,
        ])
        high = np.array([20 / 180 for _ in range(self.rozum.num_joints)] + [
            1.,
        ])
        self.action_space = gym.spaces.Box(low=low, high=high)
        angle_bounds = self.rozum.get_joint_intervals()[1]
        low = np.array([bound[0] for bound in angle_bounds] + [
            0.,
        ])
        high = np.array([bound[0] + bound[1] for bound in angle_bounds] + [
            1.,
        ])
        self.angles_bounds = gym.spaces.Box(low=low[:-1], high=high[:-1])
        self._available_obs_spaces = dict()
        self._render_dict = dict()
        self._available_obs_spaces['pov'] = gym.spaces.Box(
            shape=self.camera.resolution + [3],
            low=0,
            high=255,
            dtype=np.uint8)
        self._render_dict['pov'] = lambda: self.get_image(self.camera)

        self._available_obs_spaces['pov0'] = gym.spaces.Box(
            shape=self.camera0.resolution + [3],
            low=0,
            high=255,
            dtype=np.uint8)
        self._render_dict['pov0'] = lambda: self.get_image(self.camera0)

        low = np.array([bound[0] for bound in angle_bounds] * 2 +
                       [0., 0., -1., -1., -1.])
        high = np.array([bound[0] + bound[1]
                         for bound in angle_bounds] * 2 + [1., 1., 1., 1., 1.])
        self._available_obs_spaces['arm'] = gym.spaces.Box(low=low,
                                                           high=high,
                                                           dtype=np.float32)
        self._render_dict['arm'] = self.get_arm_state
        self._available_obs_spaces['cube'] = gym.spaces.Box(shape=(6, ),
                                                            low=-np.inf,
                                                            high=np.inf,
                                                            dtype=np.float32)
        self._render_dict['cube'] = self.get_cube_state

        self._available_obs_spaces['time'] = gym.spaces.Box(low=np.zeros(1),
                                                            high=np.ones(1),
                                                            dtype=np.uint8)
        self._render_dict['time'] = lambda: [
            self.current_step / self.step_limit,
        ]
        try:
            if len(self.obs_space_keys) > 1:
                self.observation_space = gym.spaces.Dict({
                    key: self._available_obs_spaces[key]
                    for key in self.obs_space_keys
                })
            else:
                self.observation_space = self._available_obs_spaces[
                    self.obs_space_keys[0]]
        except KeyError as err:
            message = "Observation space {} is not supported.".format(
                err.args[0])
            message += " Available observation space keys: "
            message += ", ".join(self._available_obs_spaces.keys())
            err.args = (message, )
            raise

        # Environment settings
        self.reward_range = None
        self.current_step = 0
        self.step_limit = step_limit
        self._start_arm_joint_pos = self.rozum.get_joint_positions()
        self._start_gripper_joint_pos = self.gripper.get_joint_positions()
        self.init_cube_pose = self.cube.get_pose()
        self.pose_sigma = pose_sigma
        self.sparse = sparse
        self.randomize = randomize

        # Video
        self.recording = list()
        self.current_episode = 0
        self.rewards = [0]
        self.path = video_path
        if video_path:

            def video_step():
                self._pyrep.step()
                self.recording.append(self.get_image(self.camera)[..., ::-1])

            self.sim_step = video_step
        else:
            self.sim_step = self._pyrep.step
Example #13
0
class RozumEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self,
                 obs_space_keys=('pov', 'arm'),
                 scene_file='rozum_pyrep.ttt',
                 headless=True,
                 video_path=None,
                 pose_sigma=20,
                 randomize=False,
                 sparse=False,
                 camera_resolution=(256, 256),
                 step_limit=200):
        self.obs_space_keys = (obs_space_keys, ) if isinstance(
            obs_space_keys, str) else obs_space_keys
        # PyRep
        self._pyrep = PyRep()
        self._pyrep.launch(scene_file, headless=headless)
        self._pyrep.start()
        self.rozum = Rozum()
        self.rozum.set_control_loop_enabled(True)
        self.gripper = BaxterGripper()
        self.gripper.set_control_loop_enabled(True)
        self._initial_robot_state = (self.rozum.get_configuration_tree(),
                                     self.gripper.get_configuration_tree())
        self.cube = Shape("Cube")
        self.graspable_objects = [
            self.cube,
        ]
        self.camera = VisionSensor("render")
        self.camera.set_resolution(list(camera_resolution))
        self.camera0 = VisionSensor("render0")
        self.camera0.set_resolution(list(camera_resolution))
        self.rozum_tip = self.rozum.get_tip()

        # Action and Observation spaces
        self.angles_scale = np.array(
            [np.pi for _ in range(self.rozum.num_joints)])
        low = np.array([-20 / 180 for _ in range(self.rozum.num_joints)] + [
            0.,
        ])
        high = np.array([20 / 180 for _ in range(self.rozum.num_joints)] + [
            1.,
        ])
        self.action_space = gym.spaces.Box(low=low, high=high)
        angle_bounds = self.rozum.get_joint_intervals()[1]
        low = np.array([bound[0] for bound in angle_bounds] + [
            0.,
        ])
        high = np.array([bound[0] + bound[1] for bound in angle_bounds] + [
            1.,
        ])
        self.angles_bounds = gym.spaces.Box(low=low[:-1], high=high[:-1])
        self._available_obs_spaces = dict()
        self._render_dict = dict()
        self._available_obs_spaces['pov'] = gym.spaces.Box(
            shape=self.camera.resolution + [3],
            low=0,
            high=255,
            dtype=np.uint8)
        self._render_dict['pov'] = lambda: self.get_image(self.camera)

        self._available_obs_spaces['pov0'] = gym.spaces.Box(
            shape=self.camera0.resolution + [3],
            low=0,
            high=255,
            dtype=np.uint8)
        self._render_dict['pov0'] = lambda: self.get_image(self.camera0)

        low = np.array([bound[0] for bound in angle_bounds] * 2 +
                       [0., 0., -1., -1., -1.])
        high = np.array([bound[0] + bound[1]
                         for bound in angle_bounds] * 2 + [1., 1., 1., 1., 1.])
        self._available_obs_spaces['arm'] = gym.spaces.Box(low=low,
                                                           high=high,
                                                           dtype=np.float32)
        self._render_dict['arm'] = self.get_arm_state
        self._available_obs_spaces['cube'] = gym.spaces.Box(shape=(6, ),
                                                            low=-np.inf,
                                                            high=np.inf,
                                                            dtype=np.float32)
        self._render_dict['cube'] = self.get_cube_state

        self._available_obs_spaces['time'] = gym.spaces.Box(low=np.zeros(1),
                                                            high=np.ones(1),
                                                            dtype=np.uint8)
        self._render_dict['time'] = lambda: [
            self.current_step / self.step_limit,
        ]
        try:
            if len(self.obs_space_keys) > 1:
                self.observation_space = gym.spaces.Dict({
                    key: self._available_obs_spaces[key]
                    for key in self.obs_space_keys
                })
            else:
                self.observation_space = self._available_obs_spaces[
                    self.obs_space_keys[0]]
        except KeyError as err:
            message = "Observation space {} is not supported.".format(
                err.args[0])
            message += " Available observation space keys: "
            message += ", ".join(self._available_obs_spaces.keys())
            err.args = (message, )
            raise

        # Environment settings
        self.reward_range = None
        self.current_step = 0
        self.step_limit = step_limit
        self._start_arm_joint_pos = self.rozum.get_joint_positions()
        self._start_gripper_joint_pos = self.gripper.get_joint_positions()
        self.init_cube_pose = self.cube.get_pose()
        self.pose_sigma = pose_sigma
        self.sparse = sparse
        self.randomize = randomize

        # Video
        self.recording = list()
        self.current_episode = 0
        self.rewards = [0]
        self.path = video_path
        if video_path:

            def video_step():
                self._pyrep.step()
                self.recording.append(self.get_image(self.camera)[..., ::-1])

            self.sim_step = video_step
        else:
            self.sim_step = self._pyrep.step

    def get_arm_state(self):
        arm = self.rozum.get_joint_positions()
        arm += self.rozum.get_joint_target_positions()
        arm += self.gripper.get_open_amount()
        arm += self.rozum_tip.get_position().tolist()
        return arm

    def get_cube_state(self):
        box = self.cube.get_position().tolist()
        box += self.cube.get_orientation().tolist()
        return box

    def sample_action(self):
        return self.action_space.sample()

    def step(self, action: list):
        done = False
        info = dict()
        joint_action, ee_action = action[:-1], action[-1]
        distance_mod = 3
        scale = 100  # m -> cm

        previous_n = int(
            self._get_distance(self.rozum_tip, self.cube) *
            scale) // distance_mod
        grasped = self._robot_step(ee_action, joint_action)
        _, _, arm_z = self.rozum.joints[-1].get_position()
        tx, ty, tz = self.cube.get_position()
        pose_filter = arm_z > (tz + 0.05)
        current_distance = self._get_distance(self.rozum_tip, self.cube)
        current_n = int(current_distance * scale) // distance_mod
        reward = 0
        if not self.sparse:
            reward += previous_n - current_n
            if reward > 0:
                reward *= pose_filter
        state = self.render()
        info['distance'] = current_distance
        if grasped:
            reward += 10
            done = True
            info['grasped'] = 1
        elif self.current_step >= self.step_limit:
            done = True
            info['grasped'] = 0
        elif self._get_distance(self.rozum.joints[0], self.cube) > 0.76:
            done = True
            reward = -1
            info['grasped'] = 0
        elif tz < 0.5:
            done = True
            reward = -1
            info['grasped'] = 0
        self.rewards.append(reward)
        return state, reward, done, info

    @staticmethod
    def _get_distance(first_object, second_object):
        x, y, z = first_object.get_position()
        tx, ty, tz = second_object.get_position()
        distance = np.sqrt((x - tx)**2 + (y - ty)**2 + (z - tz)**2)
        return distance

    def _robot_step(self, ee_action, joint_action):
        grasped = False
        current_ee = (1.0 if np.mean(self.gripper.get_open_amount()) > 0.8 else
                      0.0)
        if ee_action > 0.5:
            ee_action = 1.0
        elif ee_action < 0.5:
            ee_action = 0.0
        if current_ee != ee_action:
            gripper_done = False
            self.rozum.set_joint_target_positions(
                self.rozum.get_joint_positions())
            while not gripper_done:
                gripper_done = self.gripper.actuate(ee_action, velocity=0.2)
                self.sim_step()
                self.current_step += 1
            if ee_action == 0.0:
                for g_obj in self.graspable_objects:
                    grasped = self.gripper.grasp(g_obj)
        else:
            position = [
                j + a * scale
                for j, a, scale in zip(self.rozum.get_joint_positions(),
                                       joint_action, self.angles_scale)
            ]
            position = list(
                np.clip(position, self.angles_bounds.low,
                        self.angles_bounds.high))
            self.rozum.set_joint_target_positions(position)
            for _ in range(4):
                self.sim_step()
                self.current_step += 1
        return grasped

    def reset(self):
        self.gripper.release()
        arm, gripper = self._initial_robot_state
        self._pyrep.set_configuration_tree(arm)
        self._pyrep.set_configuration_tree(gripper)
        self.rozum.set_joint_positions(self._start_arm_joint_pos)
        self.rozum.set_joint_target_velocities([0] * len(self.rozum.joints))
        self.gripper.set_joint_positions(self._start_gripper_joint_pos)
        self.gripper.set_joint_target_velocities([0] *
                                                 len(self.gripper.joints))

        # Initialize scene
        if self.randomize:
            self.randomize_object()
        pose = self.init_cube_pose.copy()
        pose[0] += np.random.uniform(0.0, 0.2)  # max distance ~ 0.76
        pose[1] += np.random.uniform(-0.15, 0.15)
        self.cube.set_pose(pose)
        random_action = np.random.normal(
            0., self.pose_sigma, len(self._start_arm_joint_pos)) / 180 * np.pi
        position = [
            angle + action
            for angle, action in zip(self._start_arm_joint_pos, random_action)
        ]
        self.rozum.set_joint_target_positions(position)
        for _ in range(4):
            self._pyrep.step()
        self.current_step = 0
        state = self.render()

        # Video
        if len(self.recording) > 0:
            name = str(self.current_episode).zfill(4) + "r" + str(
                sum(map(int, self.rewards))).zfill(4) + ".mp4"
            full_path = os.path.join(self.path, name)
            self.save_video(full_path, video=self.recording)
            self.current_episode += 1
            self.rewards = [0]
            self.recording = list()
        self.sim_step()
        return state

    def render(self, mode='human'):
        if len(self.obs_space_keys) > 1:
            state = {
                key: self._render_dict[key]()
                for key in self.obs_space_keys
            }
        else:
            state = self._render_dict[self.obs_space_keys[0]]()
        return state

    @staticmethod
    def get_image(sensor):
        img = sensor.capture_rgb()
        img *= 255
        img = img.astype('uint8')
        return img

    def close(self):
        self._pyrep.stop()
        self._pyrep.shutdown()

    @staticmethod
    def save_video(filename, video):
        """
        saves video from list of np.array images
        :param filename: filename or path to file
        :param video: [image, ..., image]
        :return:
        """
        size_x, size_y, size_z = video[0].shape
        out = cv2.VideoWriter(filename, cv2.VideoWriter_fourcc(*'mp4v'), 30.0,
                              (size_x, size_y))
        for image in video:
            out.write(image)
        out.release()
        cv2.destroyAllWindows()

    def randomize_object(self):
        handle = self.cube.get_handle()
        sim.simRemoveObject(handle)
        sizes = [max(random() * 0.1, 0.02), 0.05]
        objects = list()
        position = [0, 0, 0]
        mass = 0.1
        # Create cube with random size
        s = sizes[0]
        objects.append(
            Shape.create(type=PrimitiveShape.CUBOID,
                         size=[s, s, s],
                         position=position,
                         mass=mass,
                         color=[random() for _ in range(3)]))
        index = sample(range(len(position) - 1), 1)[0]
        sign = sample([1, -1], 1)[0]
        position[index] += sum(sizes) * 0.5 * sign
        s = sizes[-1]
        # Create cube with fix size
        objects.append(
            Shape.create(type=PrimitiveShape.CUBOID,
                         size=[s, s, s],
                         position=position,
                         mass=mass,
                         color=[random() for _ in range(3)]))
        handles = [o.get_handle() for o in objects]
        handle = sim.simGroupShapes(handles)
        self.cube = Shape(handle)
        self.graspable_objects = [
            self.cube,
        ]