Ejemplo n.º 1
0
    def ik_robot_eef_joint_cartesian_pose(self):
        """
        Calculates the current cartesian pose of the last joint of the ik robot with respect to the base frame as
        a (pos, orn) tuple where orn is a x-y-z-w quaternion

        Returns:
            2-tuple:

                - (np.array) position
                - (np.array) orientation
        """
        eef_pos_in_world = np.array(p.getLinkState(self.ik_robot, self.bullet_ee_idx,
                                                   physicsClientId=self.bullet_server_id)[0])
        eef_orn_in_world = np.array(p.getLinkState(self.ik_robot, self.bullet_ee_idx,
                                                   physicsClientId=self.bullet_server_id)[1])
        eef_pose_in_world = T.pose2mat((eef_pos_in_world, eef_orn_in_world))

        base_pos_in_world = np.array(p.getBasePositionAndOrientation(self.ik_robot,
                                                                     physicsClientId=self.bullet_server_id)[0])
        base_orn_in_world = np.array(p.getBasePositionAndOrientation(self.ik_robot,
                                                                     physicsClientId=self.bullet_server_id)[1])
        base_pose_in_world = T.pose2mat((base_pos_in_world, base_orn_in_world))
        world_pose_in_base = T.pose_inv(base_pose_in_world)

        # Update reference to inverse orientation offset from pybullet base frame to world frame
        self.base_orn_offset_inv = T.quat2mat(T.quat_inverse(base_orn_in_world))

        # Update reference target orientation
        self.reference_target_orn = T.quat_multiply(self.reference_target_orn, base_orn_in_world)

        eef_pose_in_base = T.pose_in_A_to_pose_in_B(
            pose_A=eef_pose_in_world, pose_A_in_B=world_pose_in_base
        )

        return T.mat2pose(eef_pose_in_base)
Ejemplo n.º 2
0
def point_down(env):
    current_pos = env._right_hand_pos
    target_rot_X = T.rotation_matrix(0, np.array([1, 0, 0]), point=current_pos)
    target_rot_Y = T.rotation_matrix(math.pi,
                                     np.array([0, 1, 0]),
                                     point=current_pos)
    target_rot_Z = T.rotation_matrix(math.pi,
                                     np.array([0, 0, 1]),
                                     point=current_pos)
    target_rot = np.matmul(target_rot_Z, np.matmul(target_rot_Y, target_rot_X))
    target_quat = T.mat2quat(target_rot)

    done_task = False
    while not done_task:

        current_pos = env._right_hand_pos
        current_quat = env._right_hand_quat

        dquat = T.quat_slerp(current_quat, target_quat, 0.01)
        dquat = T.quat_multiply(dquat, T.quat_inverse(current_quat))

        if np.abs(dquat[3] - 1.0) < 1e-4:
            done_task = True

        grasp = -1
        dpos = np.zeros(3)
        action = np.concatenate([dpos, dquat, [grasp]])
        obs, reward, done, info = env.step(action)

        if RENDER:
            env.render()
    def cons_1(self, x):
        # First pose: constrain peg further than lPeg in z direction relative to hole
        p_peg = x[0:3]
        p_hole = x[3:6]
        q_hole = x[10:14]
        p_peg_in_hole_frame = np.matmul(T.quat2mat(T.quat_inverse(q_hole)),
                                        p_peg - p_hole)

        return p_peg_in_hole_frame[2]
def denormalize_action(norm_action, base_pos, base_quat):
    action = np.clip(norm_action.copy(), -1, 1)
    for d in range(ranges.shape[0]):
        action[d] = 0.5 * (action[d] + 1) * (ranges[d, 1] -
                                             ranges[d, 0]) + ranges[d, 0]
    action[3] = action[3] - 2 if action[3] > 1 else action[3]

    cmd_quat = Quaternion(angle=action[3] * np.pi, axis=action[4:7])
    cmd_quat = np.array([cmd_quat.x, cmd_quat.y, cmd_quat.z, cmd_quat.w])
    quat = T.quat_multiply(T.quat_inverse(base_quat), cmd_quat)
    return np.concatenate((action[:3] - base_pos, quat, action[7:]))
    def cons_3(self, x):
        # Third  pose: constrain peg less than lPeg/2 in z direction relative to hole
        # also constrain peg x and y in hole frame to be below a tolerance
        # also constrain same orientations as in pose 2
        last_ind_offset = 14  # ind at which to start looking for pose 2 info
        ind_offset = 28  # ind at which to start looking for pose 3 info
        p_peg = x[ind_offset + 0:ind_offset + 3]
        p_hole = x[ind_offset + 3:ind_offset + 6]
        # Using the quats from pose 2 because assuming they will stay the same
        q_hole = x[last_ind_offset + 10:ind_offset + 14]
        p_peg_in_hole_frame = np.matmul(T.quat2mat(T.quat_inverse(q_hole)),
                                        p_peg - p_hole)

        return np.hstack(p_peg_in_hole_frame)
    def cons_2(self, x):
        # Second  pose: constrain peg further than lPeg in z direction relative to hole
        # also constrain peg x and y in hole frame to be below a tolerance
        # also constrain rotation error
        ind_offset = 14  # ind at which to start looking for pose 2 info
        p_peg = x[ind_offset + 0:ind_offset + 3]
        p_hole = x[ind_offset + 3:ind_offset + 6]
        q_peg = x[ind_offset + 6:ind_offset + 10]
        q_hole = x[ind_offset + 10:ind_offset + 14]
        p_peg_in_hole_frame = np.matmul(T.quat2mat(T.quat_inverse(q_hole)),
                                        p_peg - p_hole)

        z_hole = np.matmul(T.quat2mat(q_hole),
                           np.array([0, 0, 1]))  # hole z in world frame
        z_peg = np.matmul(T.quat2mat(q_peg),
                          np.array([0, 0, 1]))  # peg z in world frames

        # Want the z axes to be antiparallel
        z_defect = np.linalg.norm(z_hole + z_peg)

        return np.hstack((p_peg_in_hole_frame, z_defect))
    def controller_action(self,
                          obs: dict,
                          take_action: bool = True,
                          DEBUG: bool = False):
        observation = obs['observation']
        goal_pos = obs['desired_goal']
        achieved_goal = obs['achieved_goal']

        gripper_pos = observation[:3]
        gripper_quat = observation[3:7]
        object_pos = observation[7:10]
        object_quat = observation[10:]

        z_table = 0.8610982

        object_axang = T.quat2axisangle(object_quat)
        if abs(object_axang[-1] - 1.24) < 0.2:
            object_axang_touse = [
                0, 0, object_axang[-1] % (2 * pi / 8) + (2 * pi / 8)
            ]
        else:
            object_axang_touse = [0, 0, object_axang[-1] % (2 * pi / 8)]
        gripper_axang = T.quat2axisangle(gripper_quat)
        # print('object axang to use ' + str(object_axang_touse))

        if self.gripper_reoriented == 0:
            self.gripper_init_quat = gripper_quat
            self.gripper_reoriented = 1

        init_inv = T.quat_inverse(self.gripper_init_quat)
        changing_wf = T.quat_multiply(init_inv, gripper_quat)
        changing_wf_axang = T.quat2axisangle(changing_wf)

        # gripper_quat_inv = T.quat_inverse(gripper_quat)
        # changing_wf = T.quat_multiply(gripper_quat_inv,self.gripper_init_quat)
        # changing_wf_axang = T.quat2axisangle(changing_wf)

        # print('changing wf axis ' +str(changing_wf_axang))

        if not self.object_below_hand or self.gripper_reoriented < 5:
            self.nut_p = T.quat2axisangle(object_quat)[-1]
            # print(self.nut_p)
            if not self.object_below_hand:
                action = 20 * (object_pos[:2] - gripper_pos[:2])
            else:
                action = [0, 0]

            action = 20 * (object_pos[:2] - gripper_pos[:2])

            # frac = 0.2 # Rate @ which to rotate gripper about z.
            # ang_goal = frac*self.nut_p # Nut p is the nut's random intial pertubation about z.

            # if self.gripper_reoriented == 0:
            #     self.gripper_init_quat = gripper_quat
            # if self.gripper_reoriented < 5: # Gripper should be aligned with nut after 5 action steps
            #     action_angle= [0,0,ang_goal]

            #     #print('object ' + str(object_axang))
            #     #print('gripper ' + str(gripper_axang))

            #     init_inv = T.quat_inverse(self.gripper_init_quat)
            #     init_inv_mat = T.quat2mat(init_inv)

            #     rel = T.quat_multiply(gripper_quat,init_inv)
            #     rel_axang = T.quat2axisangle(rel)
            #     #print('rel_axang ' +str(rel_axang))

            #     rel_axang_WF = np.matmul(init_inv_mat,rel_axang)

            #     #print('rel_axang_WF ' +str(rel_axang_WF))

            #     if take_action:
            #         self.gripper_reoriented+=1
            # else: # After 5 action steps, don't rotate gripper any more
            #     action_angle=[0,0,0]

            action_angle = 0.2 * (object_axang_touse - changing_wf_axang)
            action_angle = [0, 0, action_angle[-1]]
            #action_angle = [0,0,0]
            #print('action angle ' +str(action_angle))

            if np.linalg.norm(object_axang_touse - changing_wf_axang) < 0.1:
                if take_action:
                    self.gripper_reoriented = 5

            action = np.hstack((action, [0], action_angle, [-1]))
            if np.linalg.norm((object_pos[:2] - gripper_pos[:2])) < 0.01:
                if take_action:
                    self.object_below_hand = True
                    #self.gripper_reoriented = 5

        elif not self.object_in_hand:  # Close gripper
            action = [0, 0, -1, 0, 0, 0, -1]
            if np.linalg.norm((object_pos[2] - gripper_pos[2])) < 0.01:
                action = [0, 0, 0, 0, 0, 0, 1]
                if take_action:
                    self.object_in_hand = True

        else:  # Move gripper up and toward goal position
            action = [0, 0, 1, 0, 0, 0, 1]
            if object_pos[2] - z_table > 0.1:
                action = 20 * (goal_pos[:2] - object_pos[:2])
                action = np.hstack((action, [0, 0, 0, 0, 1]))
                if np.linalg.norm((goal_pos[:2] - object_pos[:2])) < 0.0225:
                    action = [0, 0, 0, 0, 0, 0,
                              -1]  # Drop nut once it's close enough to the peg

        action = np.clip(action, -1, 1)
        return action
    def controller_action(self,
                          obs: dict,
                          take_action: bool = True,
                          DEBUG: bool = False):
        observation = obs['observation']
        goal_pos = obs['desired_goal']
        achieved_goal = obs['achieved_goal']

        gripper_pos = observation[:3]
        gripper_quat = observation[3:7]
        object_pos = observation[7:10]
        object_quat = observation[10:]

        z_table = 0.8610982  # the z-coordinate of the table surface

        object_axang = T.quat2axisangle(object_quat)
        if abs(object_axang[-1] - 1.24) < 0.2:
            object_axang_touse = [
                0, 0, object_axang[-1] % (2 * pi / 8) + (2 * pi / 8)
            ]
        else:
            object_axang_touse = [0, 0, object_axang[-1] % (2 * pi / 8)]
        gripper_axang = T.quat2axisangle(gripper_quat)

        if self.gripper_reoriented == 0:
            self.gripper_init_quat = gripper_quat
            self.gripper_reoriented = 1

        init_inv = T.quat_inverse(self.gripper_init_quat)
        changing_wf = T.quat_multiply(init_inv, gripper_quat)
        changing_wf_axang = T.quat2axisangle(changing_wf)

        # reorient the gripper to match the nut faces and move above the nut
        if not self.object_below_hand or self.gripper_reoriented < 5:
            self.nut_p = T.quat2axisangle(object_quat)[-1]
            if not self.object_below_hand:
                action = 20 * (object_pos[:2] - gripper_pos[:2])
            else:
                action = [0, 0]

            action = 20 * (object_pos[:2] - gripper_pos[:2])

            action_angle = 0.2 * (object_axang_touse - changing_wf_axang)
            action_angle = [0, 0, action_angle[-1]]

            if np.linalg.norm(object_axang_touse - changing_wf_axang) < 0.1:
                if take_action:
                    self.gripper_reoriented = 5

            action = np.hstack((action, [0], action_angle, [-1]))
            if np.linalg.norm((object_pos[:2] - gripper_pos[:2])) < 0.01:
                if take_action:
                    self.object_below_hand = True
        # close the gripper and pick the nut
        elif not self.object_in_hand:  # Close gripper
            action = [0, 0, -1, 0, 0, 0, -1]
            if np.linalg.norm((object_pos[2] - gripper_pos[2])) < 0.01:
                action = [0, 0, 0, 0, 0, 0, 1]
                if take_action:
                    self.object_in_hand = True

        else:  # Move gripper up and toward goal position
            action = [0, 0, 1, 0, 0, 0, 1]
            if object_pos[2] - z_table > 0.1:
                action = 20 * (goal_pos[:2] - object_pos[:2])
                action = np.hstack((action, [0, 0, 0, 0, 1]))
                if np.linalg.norm((goal_pos[:2] - object_pos[:2])) < 0.0225:
                    action = [0, 0, 0, 0, 0, 0,
                              -1]  # Drop nut once it's close enough to the peg

        action = np.clip(action, -1, 1)
        return action
    def __init__(self, *args, **kwargs):
        options = {}
        # Absolute pose control
        controller_name = 'OSC_POSE'
        options["controller_configs"] = suite.load_controller_config(
            default_controller=controller_name)

        self.cameraName = "frontview"
        skip_frame = 2
        self.peg_env = suite.make(
            "TwoArmPegInHole",
            robots=["IIWA", "IIWA"],
            **options,
            has_renderer=False,
            ignore_done=True,
            use_camera_obs=True,
            use_object_obs=True,
            camera_names=self.cameraName,
            camera_heights=512,
            camera_widths=512,
        )

        # Tolerances on position and rotation of peg relative to hole
        posTol = 0.005
        rotTol = 0.05

        observation = self.peg_env.reset()

        # observation["peg_to_hole"] is the vector FROM the hole TO the peg
        peg_pos0 = observation["hole_pos"] + observation["peg_to_hole"]
        self.peg_pos0 = peg_pos0
        self.hole_pos0 = observation["hole_pos"]

        # Positions of robots 0 and 1 rel peg and hole, in peg and hole frames.  Constant forever.
        pRob0RelPeg = np.matmul(
            T.quat2mat(T.quat_inverse(observation["peg_quat"])),
            observation["robot0_eef_pos"] - (peg_pos0))
        pRob1RelHole = np.matmul(
            T.quat2mat(T.quat_inverse(observation["hole_quat"])),
            observation["robot1_eef_pos"] - observation["hole_pos"])
        qRob0RelPeg = T.quat_multiply(
            T.quat_inverse(observation["robot0_eef_quat"]),
            observation["peg_quat"])
        qRob1RelHole = T.quat_multiply(
            T.quat_inverse(observation["robot1_eef_quat"]),
            observation["hole_quat"])

        # Store geometric constants
        model = self.peg_env.model.get_model()
        pegDim = model.geom_size[15]
        rPeg = pegDim[0]
        lPeg = pegDim[2]
        distSlack = 2 * lPeg

        # Set up optimization problem.
        # Define 3 keyframes: peg higher than hole, peg centered above hole with hole facing up, and peg in hole.
        # One constraint for each keyframe, and one constraint for unit quaternions
        nonlinear_constraint_1 = NonlinearConstraint(self.cons_1,
                                                     distSlack,
                                                     np.inf,
                                                     jac='2-point',
                                                     hess=BFGS())
        nonlinear_constraint_2 = NonlinearConstraint(
            self.cons_2,
            np.array([-posTol, -posTol, distSlack, -rotTol]),
            np.array([posTol, posTol, np.inf, rotTol]),
            jac='2-point',
            hess=BFGS())
        nonlinear_constraint_3 = NonlinearConstraint(
            self.cons_3,
            np.array([-posTol, -posTol, distSlack]),
            np.array([posTol, posTol, np.inf]),
            jac='2-point',
            hess=BFGS())
        nonlinear_constraint_4 = NonlinearConstraint(self.cons_unit_quat,
                                                     np.array([1, 1, 1, 1]),
                                                     np.array([1, 1, 1, 1]),
                                                     jac='2-point',
                                                     hess=BFGS())

        # Initial guess for optimizer
        x0 = np.tile(
            np.hstack((peg_pos0, observation["hole_pos"],
                       observation["peg_quat"], observation["hole_quat"])), 3)
        # Cut out quat from last chunk
        x0 = x0[0:34]

        # Solve optimization problem
        res = minimize(self.traj_obj,
                       x0,
                       method='trust-constr',
                       jac='2-point',
                       hess=BFGS(),
                       constraints=[
                           nonlinear_constraint_1, nonlinear_constraint_2,
                           nonlinear_constraint_3, nonlinear_constraint_4
                       ],
                       options={'verbose': 1})
        # 'xtol': 1e-6,
        x = res.x
        # Extract optimization results
        # x = [p_peg_1, p_hole_1, q_peg_1, q_hole_1, p_peg_2, ... q_peg_3, q_hole_3]
        ind_offset_1 = 14
        ind_offset_2 = 28

        p_peg_1 = x[0:3]
        p_hole_1 = x[3:6]
        p_peg_2 = x[ind_offset_1 + 0:ind_offset_1 + 3]
        p_hole_2 = x[ind_offset_1 + 3:ind_offset_1 + 6]
        p_peg_3 = x[ind_offset_2 + 0:ind_offset_2 + 3]
        p_hole_3 = x[ind_offset_2 + 3:ind_offset_2 + 6]

        q_peg_1 = x[6:10]
        q_hole_1 = x[10:14]
        q_peg_2 = x[ind_offset_1 + 6:ind_offset_1 + 10]
        q_hole_2 = x[ind_offset_1 + 10:ind_offset_1 + 14]

        # Use the same orientations as in pose 2
        q_peg_3 = q_peg_2
        q_hole_3 = q_hole_2

        # Robot rel world = peg rel world + robot rel peg
        # Robot rel peg in world frame = (q world frame rel peg frame) * (robot rel peg in peg frame)
        q_rob0_1 = T.quat_inverse(
            T.quat_multiply(qRob0RelPeg, T.quat_inverse(q_peg_1)))
        q_rob1_1 = T.quat_inverse(
            T.quat_multiply(qRob1RelHole, T.quat_inverse(q_hole_1)))
        q_rob0_2 = T.quat_inverse(
            T.quat_multiply(qRob0RelPeg, T.quat_inverse(q_peg_2)))
        q_rob1_2 = T.quat_inverse(
            T.quat_multiply(qRob1RelHole, T.quat_inverse(q_hole_2)))
        q_rob0_3 = T.quat_inverse(
            T.quat_multiply(qRob0RelPeg, T.quat_inverse(q_peg_3)))
        q_rob1_3 = T.quat_inverse(
            T.quat_multiply(qRob1RelHole, T.quat_inverse(q_hole_3)))

        self.p_rob0_1 = p_peg_1 + np.matmul(T.quat2mat(q_peg_1), pRob0RelPeg)
        self.p_rob1_1 = p_hole_1 + np.matmul(T.quat2mat(q_hole_1),
                                             pRob1RelHole)
        self.p_rob0_2 = p_peg_2 + np.matmul(T.quat2mat(q_peg_2), pRob0RelPeg)
        self.p_rob1_2 = p_hole_2 + np.matmul(T.quat2mat(q_hole_2),
                                             pRob1RelHole)
        self.p_rob0_3 = p_peg_3 + np.matmul(T.quat2mat(q_peg_3), pRob0RelPeg)
        self.p_rob1_3 = p_hole_3 + np.matmul(T.quat2mat(q_hole_3),
                                             pRob1RelHole)

        self.axang_rob0_1 = T.quat2axisangle(q_rob0_1)
        self.axang_rob1_1 = T.quat2axisangle(q_rob1_1)
        self.axang_rob0_2 = T.quat2axisangle(q_rob0_2)
        self.axang_rob1_2 = T.quat2axisangle(q_rob1_2)
        self.axang_rob0_3 = T.quat2axisangle(q_rob0_3)
        self.axang_rob1_3 = T.quat2axisangle(q_rob1_3)

        self.max_episode_steps = 200
        # Gains for rotation and position error compensation rates
        self.kpp = 4
        self.kpr = 0.1

        # Initial pose Information
        self.rob0quat0 = observation["robot0_eef_quat"]
        self.rob1quat0 = observation["robot1_eef_quat"]
        self.rob0pos0 = observation["robot0_eef_pos"]
        self.rob1pos0 = observation["robot1_eef_pos"]
Ejemplo n.º 10
0
    def setup_inverse_kinematics(self, load_urdf=True):
        """
        This function is responsible for doing any setup for inverse kinematics.

        Inverse Kinematics maps end effector (EEF) poses to joint angles that are necessary to achieve those poses.

        Args:
            load_urdf (bool): specifies whether the robot urdf should be loaded into the sim. Useful flag that
                should be cleared in the case of multi-armed robots which might have multiple IK controller instances
                but should all reference the same (single) robot urdf within the bullet sim

        Raises:
            ValueError: [Invalid eef id]
        """

        # get paths to urdfs
        self.robot_urdf = pjoin(
            os.path.join(robosuite.models.assets_root, "bullet_data"),
            "{}_description/urdf/{}_arm.urdf".format(self.robot_name.lower(), self.robot_name.lower())
        )

        # import reference to the global pybullet server and load the urdfs
        from robosuite.controllers import get_pybullet_server
        if load_urdf:
            self.ik_robot = p.loadURDF(fileName=self.robot_urdf,
                                       useFixedBase=1,
                                       physicsClientId=self.bullet_server_id)
            # Add this to the pybullet server
            get_pybullet_server().bodies[self.ik_robot] = self.robot_name
        else:
            # We'll simply assume the most recent robot (robot with highest pybullet id) is the relevant robot and
            # mark this controller as belonging to that robot body
            self.ik_robot = max(get_pybullet_server().bodies)

        # load the number of joints from the bullet data
        self.num_bullet_joints = p.getNumJoints(self.ik_robot, physicsClientId=self.bullet_server_id)

        # Disable collisions between all the joints
        for joint in range(self.num_bullet_joints):
            p.setCollisionFilterGroupMask(
                bodyUniqueId=self.ik_robot,
                linkIndexA=joint,
                collisionFilterGroup=0,
                collisionFilterMask=0,
                physicsClientId=self.bullet_server_id
            )

        # TODO: Very ugly initialization - any way to automate this? Maybe move the hardcoded magic numbers to the robot model files?
        # TODO: Rotations for non-default grippers are not all supported -- e.g.: Robotiq140 Gripper whose coordinate frame
        #   is fully flipped about its x axis -- resulting in mirrored rotational behavior when trying to execute IK control

        # For now, hard code baxter bullet eef idx
        if self.robot_name == "Baxter":
            if "right" in self.eef_name:
                self.bullet_ee_idx = 27
                self.bullet_joint_indexes = [13, 14, 15, 16, 17, 19, 20]
                self.ik_command_indexes = np.arange(1, self.joint_dim + 1)
            elif "left" in self.eef_name:
                self.bullet_ee_idx = 45
                self.bullet_joint_indexes = [31, 32, 33, 34, 35, 37, 38]
                self.ik_command_indexes = np.arange(self.joint_dim + 1, self.joint_dim * 2 + 1)
            else:
                # Error with inputted id
                raise ValueError("Error loading ik controller for Baxter -- arm id's must contain 'right' or 'left'!")
        else:
            # Default assumes pybullet has same number of joints compared to mujoco sim
            self.bullet_ee_idx = self.num_bullet_joints - 1
            self.bullet_joint_indexes = np.arange(self.joint_dim)
            self.ik_command_indexes = np.arange(self.joint_dim)

        # Set rotation offsets (for mujoco eef -> pybullet eef) and rest poses
        self.rest_poses = list(self.initial_joint)
        eef_offset = np.eye(4)
        eef_offset[:3, :3] = T.quat2mat(T.quat_inverse(self.eef_rot_offset))

        self.rotation_offset = eef_offset

        # Simulation will update as fast as it can in real time, instead of waiting for
        # step commands like in the non-realtime case.
        p.setRealTimeSimulation(1, physicsClientId=self.bullet_server_id)
Ejemplo n.º 11
0
            target_rot_X = T.rotation_matrix(0,
                                             np.array([1, 0, 0]),
                                             point=target_pos)
            target_rot_Y = T.rotation_matrix(math.pi,
                                             np.array([0, 1, 0]),
                                             point=target_pos)
            target_rot_Z = T.rotation_matrix(math.pi + target_angle,
                                             np.array([0, 0, 1]),
                                             point=target_pos)
            target_rot = np.matmul(target_rot_Z,
                                   np.matmul(target_rot_Y, target_rot_X))
            target_quat = T.mat2quat(target_rot)

            dquat = T.quat_slerp(current_quat, target_quat, 0.01)
            dquat = T.quat_multiply(dquat, T.quat_inverse(current_quat))

            action = np.concatenate([dpos, dquat, [grasp]])
            obs, reward, done, info = env.step(action)

            pos_traj.append(current_pos - table_top_center)
            angle_traj.append(T.mat2euler(T.quat2mat(current_quat)))
            # env.render()

        time = 0

        done_task = False
        while not done_task:
            time += 1
            if time > 2000:
                break