Exemple #1
0
    def _set_action(self, action):
        assert action.shape == (4,)
        action = action.copy()  # ensure that we don't change the action outside of this scope
        pos_ctrl, gripper_ctrl = action[:3], action[3]

        pos_ctrl *= 0.05  # limit maximum change in position
        rot_ctrl = [1., 0., 1., 0.]  # fixed rotation of the end effector, expressed as a quaternion
        gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
        assert gripper_ctrl.shape == (2,)
        if self.block_gripper:
            gripper_ctrl = np.zeros_like(gripper_ctrl)
        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

        # Apply action to simulation.
        utils.ctrl_set_action(self.sim, action)
        utils.mocap_set_action(self.sim, action)
Exemple #2
0
    def _set_action(self, action):
        assert action.shape == (4, )
        action = action.copy(
        )  # ensure that we don't change the action outside of this scope
        pos_ctrl, gripper_ctrl = action[:3], action[3]

        pos_ctrl *= 0.05  # limit maximum change in position
        rot_ctrl = [
            1., 0., 1., 0.
        ]  # fixed rotation of the end effector, expressed as a quaternion
        gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
        assert gripper_ctrl.shape == (2, )
        if self.block_gripper:
            gripper_ctrl = np.zeros_like(gripper_ctrl)
        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

        # Apply action to simulation.
        utils.ctrl_set_action(self.sim, action)
        utils.mocap_set_action(self.sim, action)
Exemple #3
0
    def _set_action(self, action):
        assert action.shape == (4, )
        self.counter += 1
        action = action.copy(
        )  # ensure that we don't change the action outside of this scope
        pos_ctrl = action[:3]
        # rotation
        grip_mat = rotations.quat2mat(self.sim.data.mocap_quat)
        grip_v = np.squeeze(np.matmul(grip_mat, np.array([0, 1, 0])))
        grip_radius = (math.atan2(grip_v[0], grip_v[1]) +
                       2 * math.pi) % (2 * math.pi)
        if grip_radius > math.pi:
            grip_radius = (grip_radius - 2 * math.pi)
        angle_ctrl = grip_radius + action[3]
        rot_mat = np.array([[1, 0, 0],
                            [0, math.cos(angle_ctrl), -math.sin(angle_ctrl)],
                            [0, math.sin(angle_ctrl),
                             math.cos(angle_ctrl)]])
        axis_mat = np.array([[0, 0, 1], [0, 1, 0], [-1, 0, 0]])
        rot_ctrl = rotations.mat2quat(np.matmul(axis_mat, rot_mat))

        pos_ctrl *= 0.05  # limit maximum change in position
        gripper_ctrl = np.array([0, 0])
        assert gripper_ctrl.shape == (2, )
        if self.block_gripper:
            gripper_ctrl = np.zeros_like(gripper_ctrl)
        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

        # Apply action to simulation.
        # utils.ctrl_set_action(self.sim, action)

        utils.mocap_set_action(self.sim, action)

        if self.counter >= 2:
            for i in range(10):
                self.sim.step()
                pos_ctrl = np.array([0.0, 0.02, 0.0])
                gripper_ctrl = np.array([0, 0])
                action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
                utils.ctrl_set_action(self.sim, action)
                utils.mocap_set_action(self.sim, action)
                self.sim.step()
Exemple #4
0
    def step(self, action):

        self.num_step += 1

        ## parsing of primitive actions
        delta_x, delta_y = action

        # cap deltas to be between -1, 1
        delta_x = max(-1, min(1, delta_x))
        delta_y = max(-1, min(1, delta_y))

        x, y = self.get_mocap_state()
        x += delta_x * 0.05
        y += delta_y * 0.05

        out_of_bound = (x < 0.3 or x > 0.8) or (y < 0.0 or y > 0.6)

        if not out_of_bound:
            delta_pos = np.array([delta_x * 0.05, delta_y * 0.05, 0.0])
            delta_quat = np.array([0.0, 0.0, 1.0, 0.])
            delta = np.concatenate((delta_pos, delta_quat))
            mocap_set_action(self.sim, delta)
            self.do_simulation()

        ob = self._get_obs()

        total_reward, success = self.calc_reward(ob, True)

        ## getting state
        info = {"done": None}
        if success:
            done = True
            info["done"] = "goal reached"
        elif (self.num_step > self.max_num_steps):
            done = True
            info["done"] = "max_steps_reached"
        else:
            done = False

        info['absolute_ob'] = self.copy_obs(ob)

        return ob, total_reward, done, info
    def step(self, action):

        self.num_step += 1

        ## parsing of primitive actions
        delta_x, delta_y, delta_z = action

        # cap the motions
        delta_x = max(-1, min(1, delta_x))
        delta_y = max(-1, min(1, delta_y))
        delta_z = max(-1, min(1, delta_z))

        x, y, z = self.get_mocap_state()

        new_x = max(0.3, min(0.8, x + delta_x * 0.05))
        new_y = max(0.0, min(0.6, y + delta_y * 0.05))
        new_z = max(0.11, min(0.25, z + delta_z * 0.05))

        delta_pos = np.array([new_x - x, new_y - y, new_z - z])
        delta_quat = np.array([0.0, 0.0, 1.0, 0.])
        delta = np.concatenate((delta_pos, delta_quat))
        mocap_set_action(self.sim, delta)
        self.do_simulation()

        ob = self._get_obs()
        total_reward = self.calc_reward(ob)

        ## getting state
        info = {"done": None}
        if total_reward == 0:
            done = True
            info["done"] = "goal reached"
        elif (self.num_step > self.max_num_steps):
            done = True
            info["done"] = "max_steps_reached"
        else:
            done = False

        info['absolute_ob'] = ob.copy()

        return ob, total_reward, done, info
    def _set_action(self, action):
        action = action.copy(
        )  # ensure that we don't change the action outside of this scope
        pos_ctrl, rot_ctrl, gripper_ctrl = action[:3], action[3], action[4]
        #pos_ctrl[1:]=np.array([0, 0])
        pos_ctrl *= 0.05  # limit maximum change in position
        rot_ctrl *= 90.
        #rot_ctrl = [1., 0., 1., 0.]  # fixed rotation of the end effector, expressed as a quaternion
        axis = np.array([1, 0, 0])

        rot_ctrl = quat_from_angle_and_axis(np.pi * rot_ctrl / 180, axis)
        rot_ctrl = rotations.quat_mul(np.array([1., 0., 1., 0.]), rot_ctrl)
        gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
        assert gripper_ctrl.shape == (2, )
        if self.block_gripper:
            gripper_ctrl = np.zeros_like(gripper_ctrl)
        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

        # Apply action to simulation.
        utils.ctrl_set_action(self.sim, action)
        utils.mocap_set_action(self.sim, action)
    def _set_action(self, action):
        assert action.shape == (3,)
        self.counter += 1
        action = action.copy()  # ensure that we don't change the action outside of this scope
        pos_ctrl = action[:3]

        rot_ctrl = [1., 0., 1., 0.]  # fixed rotation of the end effector, expressed as a quaternion
        gripper_ctrl = np.array([-1, -1])
        assert gripper_ctrl.shape == (2,)

        if self.counter <= 1:
            action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
            action[2] = 0.56402952

            for _ in range(20):
                utils.mocap_set_action_abs(self.sim, action)
                self.sim.step()
        else:
            pos_ctrl *= 0.05  # limit maximum change in position
            action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
            utils.mocap_set_action(self.sim, action)

        # Apply action to simulation.
        utils.ctrl_set_action(self.sim, action)
        utils.mocap_set_action(self.sim, action)

        if self.counter >= 3:
        # if np.linalg.norm(pos_ctrl, axis=-1) < 0.025:
            action = np.array([0,0,-0.05,1,0,1,0,1,1])
            utils.mocap_set_action(self.sim, action)
            for _ in range(5):
                utils.ctrl_set_action(self.sim, action)
                self.sim.step()
Exemple #8
0
    def _set_action(self, action):
        if (not action.shape == (8,)) and (not action.shape == (4,)):
            assert False
        action = action.copy()  # ensure that we don't change the action outside of this scope
        if action.shape == (4,):
            mocap_ctrl, gripper_ctrl = action[:3], action[3]
            pos_ctrl, rot_ctrl = mocap_ctrl, [1., 0., 1., 0.]
        elif action.shape == (8,):
            mocap_ctrl, gripper_ctrl = action[:7], action[7]
            pos_ctrl, rot_ctrl = mocap_ctrl[:3], mocap_ctrl[3:7]

        pos_ctrl *= 0.05  # limit maximum change in position
        rot_ctrl = rot_ctrl
        gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
        assert gripper_ctrl.shape == (2,)
        if self.block_gripper:
            gripper_ctrl = np.zeros_like(gripper_ctrl)
        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

        # Apply action to simulation.
        utils.ctrl_set_action(self.sim, action)
        utils.mocap_set_action(self.sim, action)
Exemple #9
0
    def _set_action(self, action):
        '''
        Sets the action to apply to the simulation
        '''
        assert action.shape == (self.n_actions, )
        # Ensure the action doesn't change outside of this scope
        action = action.copy()
        pos_ctrl, gripper_ctrl = action[:3], action[3]

        # Limit the maximum change in position
        pos_ctrl *= 20
        # Fix roation of the hand, expressed as a quaterion
        rot_ctrl = action[4:]
        gripper_ctrl = np.array([gripper_ctrl])
        assert gripper_ctrl.shape == (1, )
        if self.block_gripper:
            gripper_ctrl = [0.0]
        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

        # Apply the action to the simulation
        utils.ctrl_set_action(self.sim, action)
        utils.mocap_set_action(self.sim, action)
    def _set_action(self, action):
        assert action.shape == (3, )
        self.counter += 1
        action = action.copy(
        )  # ensure that we don't change the action outside of this scope
        pos_ctrl = action[:3]

        rot_ctrl = [
            1., 0., 1., 0.
        ]  # fixed rotation of the end effector, expressed as a quaternion
        gripper_ctrl = np.array([0, 0])
        assert gripper_ctrl.shape == (2, )
        if self.block_gripper:
            gripper_ctrl = np.zeros_like(gripper_ctrl)

        # Apply action to simulation.
        # utils.ctrl_set_action(self.sim, action)
        if self.counter <= 1:
            action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
            if self.use_close_loop:
                action[2] = 0.41644691
            for _ in range(20):
                utils.mocap_set_action_abs(self.sim, action)
                self.sim.step()
        else:
            pos_ctrl *= 0.05  # limit maximum change in position
            action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
            utils.mocap_set_action(self.sim, action)

        if self.counter >= 3:
            for _ in range(10):
                self.sim.step()
                pos_ctrl = np.array([0.0, 0.02, 0.0])
                gripper_ctrl = np.array([0, 0])
                action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
                utils.ctrl_set_action(self.sim, action)
                utils.mocap_set_action(self.sim, action)
                self.sim.step()
Exemple #11
0
    def _set_action(self, action):
        assert action.shape == (4, )
        action = action.copy(
        )  # ensure that we don't change the action outside of this scope
        pos_ctrl, gripper_ctrl = action[:3], action[3]

        pos_ctrl *= 0.5  # limit maximum change in position
        rot_ctrl = [
            0., 0, 1, 0.
        ]  # fixed rotation of the end effector, expressed as a quaternion
        gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
        assert gripper_ctrl.shape == (2, )
        if self.block_gripper:
            gripper_ctrl = np.zeros_like(gripper_ctrl)
        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
        gripper_pos = self.sim.data.get_site_xpos('robot0:grip')
        z_move = gripper_pos[2] + action[2]
        if z_move < 0.15:
            action[2] = 0.15 - gripper_pos[2]

        # Apply action to simulation.
        utils.ctrl_set_action(self.sim, action)
        utils.mocap_set_action(self.sim, action)
Exemple #12
0
    def _set_action(self, action):
        assert action.shape == (3, )
        self.counter += 1
        action = action.copy(
        )  # ensure that we don't change the action outside of this scope
        pos_ctrl = action[:3]

        pos_ctrl *= 0.05  # limit maximum change in position
        rot_ctrl = [
            1., 0., 1., 0.
        ]  # fixed rotation of the end effector, expressed as a quaternion
        gripper_ctrl = np.array([1, 1])
        assert gripper_ctrl.shape == (2, )
        if self.block_gripper:
            gripper_ctrl = np.zeros_like(gripper_ctrl)
        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

        # Apply action to simulation.
        # utils.ctrl_set_action(self.sim, action)

        utils.mocap_set_action(self.sim, action)

        if self.counter >= 2:
            # if np.linalg.norm(pos_ctrl, axis=-1) < 0.025:
            img = self.sim.render(width=500,
                                  height=500,
                                  camera_name="external_camera_1")
            self.sim.step()
            pos_ctrl = np.array([0.0, 0.0, 0.0])
            gripper_ctrl = np.array([-1, -1])
            action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
            utils.ctrl_set_action(self.sim, action)
            utils.mocap_set_action(self.sim, action)
            # logging
            # grip_pos = self.sim.data.get_site_xpos('robot0:grip')
            # obs_pos = self.sim.data.get_site_xpos('object0')
            # offset = obs_pos - grip_pos
            # print(offset, np.linalg.norm(offset, axis = -1))

            for _ in range(20):
                utils.ctrl_set_action(self.sim, action)
                self.sim.step()
            img = self.sim.render(width=500,
                                  height=500,
                                  camera_name="external_camera_1")

            for _ in range(10):
                pos_ctrl = np.array([0.0, 0.02, 0.0])
                gripper_ctrl = np.array([-1, -1])
                action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
                utils.ctrl_set_action(self.sim, action)
                utils.mocap_set_action(self.sim, action)
                self.sim.step()
Exemple #13
0
    def _set_action(self, action):
        assert action.shape == (3, )
        self.counter += 1
        action = action.copy(
        )  # ensure that we don't change the action outside of this scope
        pos_ctrl = action[:3]

        rot_ctrl = [
            1., 0., 1., 0.
        ]  # fixed rotation of the end effector, expressed as a quaternion
        gripper_ctrl = np.array([1, 1])
        assert gripper_ctrl.shape == (2, )
        if self.block_gripper:
            gripper_ctrl = np.zeros_like(gripper_ctrl)

        # Apply action to simulation.
        # utils.ctrl_set_action(self.sim, action)

        if self.counter <= 1:
            action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
            if self.use_close_loop:
                action[2] = 0.46470766
            for _ in range(20):
                utils.mocap_set_action_abs(self.sim, action)
                self.sim.step()
        else:
            pos_ctrl *= 0.05  # limit maximum change in position
            action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
            utils.mocap_set_action(self.sim, action)

        if self.counter >= 3:
            # if np.linalg.norm(pos_ctrl, axis=-1) < 0.025:
            self.sim.step()
            pos_ctrl = np.array([0.0, 0.0, 0.0])
            gripper_ctrl = np.array([-1, -1])
            action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
            utils.ctrl_set_action(self.sim, action)
            utils.mocap_set_action(self.sim, action)
            # logging
            # grip_pos = self.sim.data.get_site_xpos('robot0:grip')
            # obs_pos = self.sim.data.get_site_xpos('object0')
            # offset = obs_pos - grip_pos
            # print(offset, np.linalg.norm(offset, axis = -1))

            for _ in range(20):
                utils.ctrl_set_action(self.sim, action)
                self.sim.step()

            pos_ctrl = np.array([0.0, 0.0, 0.2])
            gripper_ctrl = np.array([-1, -1])
            action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
            utils.ctrl_set_action(self.sim, action)
            utils.mocap_set_action(self.sim, action)
Exemple #14
0
 def set_action(self, action: np.ndarray) -> None:
     utils.mocap_set_action(self.mj_sim, action)
Exemple #15
0
 def _set_action(self, action):
     """
     Define the Action: x,y,z relative position of the grippe + open/close gripper
     """
     assert action.shape == (4,) # define the action space = x,y,z,open/close
     action = action.copy()
     eff_pos_control, gripper_control = action[:3], action[3]  # 0.05 open | 0 close
     #print("gripper_control = {}".format(action[3]))
     #eff_pos_control *= 0.05  # limit the maximum change in position
     #print('eff_pos_control = {}'.format(eff_pos_control))
     #eff_rot_control = [0., 0., 0., 1]  # in this case, we fix the rotation of the end-effector
     eff_rot_control = [1., 0., 1., 0.]
     gripper_control = np.array([gripper_control, -gripper_control]) # l_finger(-1,0) r_finger(0,1)
     assert gripper_control.shape == (2,)
     # if the gripper is blocked, open the gripper
     if self.block_gripper:
         gripper_control = np.zeros_like(gripper_control)
     action = np.concatenate([eff_pos_control, eff_rot_control, gripper_control])
     self.action_done_flag = False
     if self.mode == "robot":
         action = action.copy()
         action = action.astype(np.float32)
         #global counter
         #counter = counter + 1
         print('Set Action displacement = {}'.format(action[:3]))
         self.action_publisher.publish(action)
         self.action_done_flag = rospy.wait_for_message('/pyrobot/action_done', Bool).data
         if self.action_done_flag == True:
             rospy.loginfo('[STATUS] Action Done')
         else:
             rospy.loginfo('[STATUS] Action not feasbile and passed')
         """
         while(self.action_done_flag != True):
             n = n + 1
             print('n = {}'.format(n))
             rospy.loginfo('action done = {}'.format(self.action_done_flag))
             #self.action_done_flag = rospy.wait_for_message('/pyrobot/action_done', Bool).data
             # DEBUG:
             if self.action_done_flag == True:
                 pass
             else:
                 rospy.loginfo('Waiting Action to be Done ~')
                 end_time = timer()
                 time_passed = end_time - start_time
                 print('time_passed = {}'.format(time_passed))
                 if time_passed >= 5.:
                     self.action_done_flag = True
                     rospy.loginfo('Time Out and Pass ~~')
         """
         global counter
         counter = counter + 1
         print('counter = {}'.format(counter))
         # DEBUG:
         #rospy.loginfo('[DEBUG] Action Published')
         #rospy.loginfo(action)
     elif self.mode == "sim":
         # Apply actions to simulation
         # DEBUG:
         #print('action = {}'.format(action))
         #utils.ctrl_set_action(self.sim, action)
         print('[Set Action] action = {}'.format(action))
         utils.mocap_set_action(self.sim, action)
         """
    def _set_action(self, action):
        if self._use_real_robot:

            assert action.shape == (self.n_actions, )  # 6 mobile base
            action = action.copy(
            )  # ensure that we don't change the action outside of this scope
            if self.debug_print:
                print("_set_action:", action)
            pos_ctrl, base_ctrl, gripper_ctrl = action[:3], action[
                3:-1], action[-1]

            pos_ctrl *= 0.05  # limit maximum change in position
            base_ctrl *= 0.01

            rot_ctrl = [
                0, 0.707, 0.707, 0
            ]  # fixed rotation of the end effector, expressed as a quaternion

            if self.gripper_close:
                gripper_ctrl = -1.0
            else:
                gripper_ctrl = 1.0

            if self.block_gripper:
                gripper_ctrl = np.zeros_like(gripper_ctrl)
            # action = np.concatenate([pos_ctrl, rot_ctrl, base_ctrl, gripper_ctrl])

            ee_pose = self.husky_ur5_robot.arm_get_ee_pose(self.use_arm)
            # arm_action = [ee_pose.pose.position.x + pos_ctrl[0],
            #               ee_pose.pose.position.y + pos_ctrl[1],
            #               ee_pose.pose.position.z + pos_ctrl[2],
            #               ee_pose.pose.orientation.w,
            #               ee_pose.pose.orientation.x,
            #               ee_pose.pose.orientation.y,
            #               ee_pose.pose.orientation.z]
            arm_action = pos_ctrl
            print("arm_action: ", arm_action)

            # Applay action to real robot
            # self.husky_ur5_robot.arm_set_ee_pose_relative(pos_ctrl)
            self.husky_ur5_robot.arm_set_ee_pose_relative(arm_action)
            self.husky_ur5_robot.base_velocity_cmd(base_ctrl)
            # self.husky_ur5_robot.base_go_to_relative(base_ctrl)
            if self.gripper_close:
                self.husky_ur5_robot.gripper_close(self.use_arm)
            else:
                self.husky_ur5_robot.gripper_open(self.use_arm)

        else:

            assert action.shape == (self.n_actions, )  # 6 mobile base
            action = action.copy(
            )  # ensure that we don't change the action outside of this scope
            if self.debug_print:
                print("_set_action:", action)
            pos_ctrl, base_ctrl, gripper_ctrl = action[:3], action[
                3:-1], action[-1]
            # pos_ctrl, gripper_ctrl = action[:3], action[3:]

            pos_ctrl *= 0.03  # limit maximum change in position
            base_ctrl *= 0.01
            # rot_ctrl = [1., 0., 1., 0.]  # fixed rotation of the end effector, expressed as a quaternion

            rot_ctrl = [0, 0.707, 0.707, 0]  #(0 0 0)

            if self.gripper_close:
                gripper_ctrl = -1.0
            else:
                gripper_ctrl = 1.0
            gripper_ctrl = self.gripper_format_action(gripper_ctrl)
            # assert gripper_ctrl.shape == (2,)
            assert gripper_ctrl.shape == (self.gripper_actual_dof, )
            if self.block_gripper:
                gripper_ctrl = np.zeros_like(gripper_ctrl)
            action = np.concatenate(
                [pos_ctrl, rot_ctrl, base_ctrl, gripper_ctrl])
            # action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

            # Apply action to simulation.
            utils.ctrl_set_action(self.sim,
                                  action)  # base control + gripper control
            utils.mocap_set_action(
                self.sim, action)  # arm control in cartesion (x, y, z)

            # Applay action to real robot
            gripper_cmd = 0
            if self.gripper_close:
                gripper_cmd = np.array([-1.0])
            else:
                gripper_cmd = np.array([1.0])
            action_ros = np.concatenate(
                [pos_ctrl, rot_ctrl, base_ctrl, gripper_cmd])
    def _set_action(self, action):
        action = action.copy(
        )  # ensure that we don't change the action outside of this scope
        pos_ctrl, gripper_ctrl = action[:3], action[3]
        stiffness_ctrl = 0.0
        psv_stiffness_ctrl = 0.0
        par_stiffness_ctrl = 0.0

        pos_ctrl *= 0.05  # limit maximum change in position
        rot_ctrl = [
            1., 0., 1., 0.
        ]  # fixed rotation of the end effector, expressed as a quaternion

        if action.shape[0] > 4:
            if action.shape[0] > 5:
                if action.shape[0] > 6:
                    par_stiffness_ctrl = 10.0 * action[6]

                    self.par_prev_stiffness += par_stiffness_ctrl
                    self.par_prev_stiffness = np.max(
                        [np.min([self.par_prev_stiffness, 50.0]), 0.0])
                    #                    self.psv_prev_stiffness -= self.par_prev_stiffness

                    self.sim.model.jnt_stiffness[self.sim.model.joint_name2id(
                        'robot0:r_gripper_finger_joint'
                    )] = self.par_prev_stiffness
                    self.sim.model.jnt_stiffness[self.sim.model.joint_name2id(
                        'robot0:l_gripper_finger_joint'
                    )] = self.par_prev_stiffness

                psv_stiffness_ctrl = (self.max_stiffness / 5.0) * action[5]

                self.psv_prev_stiffness += psv_stiffness_ctrl
                self.psv_prev_stiffness = np.max([
                    np.min([
                        self.psv_prev_stiffness,
                        self.max_stiffness - self.par_prev_stiffness
                    ]), self.max_stiffness / 25.0
                ])

                self.sim.model.actuator_gainprm[
                    self.sim.model.
                    actuator_name2id('robot0:l_gripper_finger_joint'),
                    0] = self.psv_prev_stiffness
                self.sim.model.actuator_gainprm[
                    self.sim.model.
                    actuator_name2id('robot0:r_gripper_finger_joint'),
                    0] = self.psv_prev_stiffness
                self.sim.model.actuator_biasprm[
                    self.sim.model.
                    actuator_name2id('robot0:l_gripper_finger_joint'),
                    1] = -self.psv_prev_stiffness
                self.sim.model.actuator_biasprm[
                    self.sim.model.
                    actuator_name2id('robot0:r_gripper_finger_joint'),
                    1] = -self.psv_prev_stiffness

            stiffness_ctrl = (self.max_stiffness / 5.0) * action[4]

            self.prev_stiffness += stiffness_ctrl
            self.prev_stiffness = np.max([
                np.min([
                    self.prev_stiffness,
                    self.par_prev_stiffness + self.psv_prev_stiffness
                ]), self.par_prev_stiffness
            ])

#        print("Series:{}, Parallel:{}, Active:{}".format(self.psv_prev_stiffness,self.par_prev_stiffness, self.prev_stiffness))

        gripper_ctrl = 1.0 * np.array([gripper_ctrl, gripper_ctrl])
        assert gripper_ctrl.shape == (2, )
        if self.block_gripper:
            gripper_ctrl = np.zeros_like(gripper_ctrl)
        prob = 0.1 if self.pert_type == 'delay' else -0.1
        if np.random.random() > prob:
            action = np.concatenate(
                [pos_ctrl, rot_ctrl, gripper_ctrl, [self.prev_stiffness]])
            self.previous_input = action[7:]
        else:
            try:
                action = np.concatenate(
                    [pos_ctrl, rot_ctrl, self.previous_input])
            except:
                action = np.concatenate([pos_ctrl, rot_ctrl, np.zeros(3)])
#        print("Gripper pose:{}, stiffness:{}".format(gripper_ctrl, self.prev_stiffness))
#        self.prev_stiffness = self.sim.model.actuator_gainprm[self.sim.model.actuator_name2id('robot0:l_gripper_finger_joint'), 0]

# Apply action to simulation.
        utils.ctrl_set_action(self.sim, action, self.stiffness_on)
        utils.mocap_set_action(self.sim, action)
Exemple #18
0
    def _set_action(self, action):
        assert action.shape == (4, )
        self.counter += 1
        action = action.copy(
        )  # ensure that we don't change the action outside of this scope
        pos_ctrl = action[:3]
        grip_mat = rotations.quat2mat(self.sim.data.mocap_quat)
        grip_v = np.squeeze(np.matmul(grip_mat, np.array([0, 1, 0])))
        grip_radius = (math.atan2(grip_v[0], grip_v[1]) +
                       2 * math.pi) % (2 * math.pi)
        if grip_radius > math.pi:
            grip_radius = (grip_radius - 2 * math.pi)
        angle_ctrl = grip_radius + action[3]
        rot_mat = np.array([[1, 0, 0],
                            [0, math.cos(angle_ctrl), -math.sin(angle_ctrl)],
                            [0, math.sin(angle_ctrl),
                             math.cos(angle_ctrl)]])
        axis_mat = np.array([[0, 0, 1], [0, 1, 0], [-1, 0, 0]])
        rot_ctrl = rotations.mat2quat(np.matmul(axis_mat, rot_mat))

        pos_ctrl *= 0.05  # limit maximum change in position
        gripper_ctrl = np.array([1, 1])
        assert gripper_ctrl.shape == (2, )
        if self.block_gripper:
            gripper_ctrl = np.zeros_like(gripper_ctrl)
        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

        # Apply action to simulation.
        # utils.ctrl_set_action(self.sim, action)

        utils.mocap_set_action(self.sim, action)

        if self.counter >= 2:
            # if np.linalg.norm(pos_ctrl, axis=-1) < 0.025:

            self.sim.step()
            img = self.sim.render(width=500,
                                  height=500,
                                  camera_name="external_camera_1")
            cv2.imwrite("/checkpoint/jdong1021/grasp_sample1.png", img)
            pos_ctrl = np.array([0.0, 0.0, 0.0])
            gripper_ctrl = np.array([-1, -1])
            action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
            utils.ctrl_set_action(self.sim, action)
            utils.mocap_set_action(self.sim, action)
            # logging
            # grip_pos = self.sim.data.get_site_xpos('robot0:grip')
            # obs_pos = self.sim.data.get_site_xpos('object0')
            # offset = obs_pos - grip_pos
            # print(offset, np.linalg.norm(offset, axis = -1))

            for _ in range(20):
                utils.ctrl_set_action(self.sim, action)
                self.sim.step()

            img = self.sim.render(width=500,
                                  height=500,
                                  camera_name="external_camera_1")
            cv2.imwrite("/checkpoint/jdong1021/grasp_sample2.png", img)

            pos_ctrl = np.array([0.0, 0.0, 0.2])
            gripper_ctrl = np.array([-1, -1])
            action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
            utils.ctrl_set_action(self.sim, action)
            utils.mocap_set_action(self.sim, action)
    def step(self, action):

        self.num_step += 1

        ## parsing of primitive actions
        delta_x, delta_y, delta_z, gripper = action

        # cap the motions
        delta_x = max(-1, min(1, delta_x))
        delta_y = max(-1, min(1, delta_y))
        delta_z = max(-1, min(1, delta_z))

        x, y, z = self.get_mocap_state()
        x += delta_x * 0.05
        y += delta_y * 0.05
        z += delta_z * 0.05

        out_of_bound = (x < 0.3 or x > 0.8) or (y < 0.0
                                                or y > 0.6) or (z < 0.10
                                                                or z > 0.25)

        if not out_of_bound:
            delta_pos = np.array(
                [delta_x * 0.05, delta_y * 0.05, delta_z * 0.05])
            delta_quat = np.array([0.0, 0.0, 1.0, 0.])
            delta = np.concatenate((delta_pos, delta_quat))
            mocap_set_action(self.sim, delta)
            self.close_gripper(gripper)
            self.do_simulation()
        # else:
        #     print("out of bound as x:%.4f, y:%.4f, z:%.4f"%(x,y,z))

        ob = self._get_obs()
        total_reward = self.calc_reward(ob)

        box_loc = ob[self.space_dim:2 * self.space_dim]

        ## getting state
        info = {"done": None}
        if total_reward == 0:
            done = True
            info["done"] = "goal reached"
        elif (self.num_step > self.max_num_steps):
            done = True
            info["done"] = "max_steps_reached"
        else:
            done = False

        # check block loc
        obj_pos = ob[self.space_dim:2 * self.space_dim]

        block_out_of_bound = False
        if obj_pos.size == 2:
            x, y = obj_pos
            if (x < 0.3 or x > 0.8) or (y < 0.0 or y > 0.6):
                block_out_of_bound = True
        else:
            x, y, z = obj_pos
            if (x < 0.3 or x > 0.8) or (y < 0.0 or y > 0.6) or (z < -0.1
                                                                or z > 0.25):
                block_out_of_bound = True

        if block_out_of_bound:
            done = True
            info['done'] = 'unstable simulation'
            total_reward -= (self.max_num_steps - self.num_step) + 2

        info['absolute_ob'] = ob.copy()

        return ob, total_reward, done, info