Exemple #1
0
    def load_model(self):
        """
        Loads robot and optionally add grippers.
        """
        # First, run the superclass method to load the relevant model
        super().load_model()

        # Verify that the loaded model is of the correct type for this robot
        if self.robot_model.arm_type != "single":
            raise TypeError(
                "Error loading robot model: Incompatible arm type specified for this robot. "
                "Requested model arm type: {}, robot arm type: {}".format(
                    self.robot_model.arm_type, type(self)))

        # Now, load the gripper if necessary
        if self.has_gripper:
            if self.gripper_type == "default":
                # Load the default gripper from the robot file
                self.gripper = gripper_factory(
                    self.robot_model.default_gripper, idn=self.idn)
            else:
                # Load user-specified gripper
                self.gripper = gripper_factory(self.gripper_type, idn=self.idn)
        else:
            # Load null gripper
            self.gripper = gripper_factory(None, idn=self.idn)
        # Grab eef rotation offset
        self.eef_rot_offset = T.quat_multiply(
            self.robot_model.hand_rotation_offset,
            self.gripper.rotation_offset)
        # Add gripper to this robot model
        self.robot_model.add_gripper(self.gripper)
    def ik_robot_eef_joint_cartesian_pose(self):
        """
        Calculates the current cartesian pose of the last joint of the ik robot with respect to the base frame as
        a (pos, orn) tuple where orn is a x-y-z-w quaternion

        Returns:
            2-tuple:

                - (np.array) position
                - (np.array) orientation
        """
        eef_pos_in_world = np.array(p.getLinkState(self.ik_robot, self.bullet_ee_idx,
                                                   physicsClientId=self.bullet_server_id)[0])
        eef_orn_in_world = np.array(p.getLinkState(self.ik_robot, self.bullet_ee_idx,
                                                   physicsClientId=self.bullet_server_id)[1])
        eef_pose_in_world = T.pose2mat((eef_pos_in_world, eef_orn_in_world))

        base_pos_in_world = np.array(p.getBasePositionAndOrientation(self.ik_robot,
                                                                     physicsClientId=self.bullet_server_id)[0])
        base_orn_in_world = np.array(p.getBasePositionAndOrientation(self.ik_robot,
                                                                     physicsClientId=self.bullet_server_id)[1])
        base_pose_in_world = T.pose2mat((base_pos_in_world, base_orn_in_world))
        world_pose_in_base = T.pose_inv(base_pose_in_world)

        # Update reference to inverse orientation offset from pybullet base frame to world frame
        self.base_orn_offset_inv = T.quat2mat(T.quat_inverse(base_orn_in_world))

        # Update reference target orientation
        self.reference_target_orn = T.quat_multiply(self.reference_target_orn, base_orn_in_world)

        eef_pose_in_base = T.pose_in_A_to_pose_in_B(
            pose_A=eef_pose_in_world, pose_A_in_B=world_pose_in_base
        )

        return T.mat2pose(eef_pose_in_base)
    def _reset_internal(self):
        """
        Resets simulation internal configurations.
        """
        super()._reset_internal()

        # Reset all object positions using initializer sampler if we're not directly loading from an xml
        if not self.deterministic_reset:

            # Sample from the placement initializer for all objects
            object_placements = self.placement_initializer.sample()

            # Loop through all objects and reset their positions
            for obj_pos, obj_quat, obj in object_placements.values():
                # If prehensile, set the object normally
                if self.prehensile:
                    self.sim.data.set_joint_qpos(
                        obj.joints[0],
                        np.concatenate([np.array(obj_pos),
                                        np.array(obj_quat)]))
                # Else, set the object in the hand of the robot and loop a few steps to guarantee the robot is grasping
                #   the object initially
                else:
                    eef_rot_quat = T.mat2quat(
                        T.euler2mat(
                            [np.pi - T.mat2euler(self._eef0_xmat)[2], 0, 0]))
                    obj_quat = T.quat_multiply(obj_quat, eef_rot_quat)
                    for j in range(100):
                        # Set object in hand
                        self.sim.data.set_joint_qpos(
                            obj.joints[0],
                            np.concatenate(
                                [self._eef0_xpos,
                                 np.array(obj_quat)]))
                        # Close gripper (action = 1) and prevent arm from moving
                        if self.env_configuration == 'bimanual':
                            # Execute no-op action with gravity compensation
                            torques = np.concatenate([
                                self.robots[0].controller["right"].
                                torque_compensation, self.robots[0].
                                controller["left"].torque_compensation
                            ])
                            self.sim.data.ctrl[self.robots[
                                0]._ref_joint_actuator_indexes] = torques
                            # Execute gripper action
                            self.robots[0].grip_action(
                                gripper=self.robots[0].gripper["right"],
                                gripper_action=[1])
                        else:
                            # Execute no-op action with gravity compensation
                            self.sim.data.ctrl[self.robots[0]._ref_joint_actuator_indexes] =\
                                self.robots[0].controller.torque_compensation
                            self.sim.data.ctrl[self.robots[1]._ref_joint_actuator_indexes] = \
                                self.robots[1].controller.torque_compensation
                            # Execute gripper action
                            self.robots[0].grip_action(
                                gripper=self.robots[0].gripper,
                                gripper_action=[1])
                        # Take forward step
                        self.sim.step()
Exemple #4
0
def point_down(env):
    current_pos = env._right_hand_pos
    target_rot_X = T.rotation_matrix(0, np.array([1, 0, 0]), point=current_pos)
    target_rot_Y = T.rotation_matrix(math.pi,
                                     np.array([0, 1, 0]),
                                     point=current_pos)
    target_rot_Z = T.rotation_matrix(math.pi,
                                     np.array([0, 0, 1]),
                                     point=current_pos)
    target_rot = np.matmul(target_rot_Z, np.matmul(target_rot_Y, target_rot_X))
    target_quat = T.mat2quat(target_rot)

    done_task = False
    while not done_task:

        current_pos = env._right_hand_pos
        current_quat = env._right_hand_quat

        dquat = T.quat_slerp(current_quat, target_quat, 0.01)
        dquat = T.quat_multiply(dquat, T.quat_inverse(current_quat))

        if np.abs(dquat[3] - 1.0) < 1e-4:
            done_task = True

        grasp = -1
        dpos = np.zeros(3)
        action = np.concatenate([dpos, dquat, [grasp]])
        obs, reward, done, info = env.step(action)

        if RENDER:
            env.render()
Exemple #5
0
 def _make_input(self, action, old_quat):
     """
     Helper function that returns a dictionary with keys dpos, rotation from a raw input
     array. The first three elements are taken to be displacement in position, and a
     quaternion indicating the change in rotation with respect to @old_quat.
     """
     return {
         "dpos": action[:3],
         # IK controller takes an absolute orientation in robot base frame
         "rotation": T.quat2mat(T.quat_multiply(old_quat, action[3:7])),
     }
def denormalize_action(norm_action, base_pos, base_quat):
    action = np.clip(norm_action.copy(), -1, 1)
    for d in range(ranges.shape[0]):
        action[d] = 0.5 * (action[d] + 1) * (ranges[d, 1] -
                                             ranges[d, 0]) + ranges[d, 0]
    action[3] = action[3] - 2 if action[3] > 1 else action[3]

    cmd_quat = Quaternion(angle=action[3] * np.pi, axis=action[4:7])
    cmd_quat = np.array([cmd_quat.x, cmd_quat.y, cmd_quat.z, cmd_quat.w])
    quat = T.quat_multiply(T.quat_inverse(base_quat), cmd_quat)
    return np.concatenate((action[:3] - base_pos, quat, action[7:]))
Exemple #7
0
    def _make_input(self, action, old_quat):
        """
        Helper function that returns a dictionary with keys dpos, rotation from a raw input
        array. The first three elements are taken to be displacement in position, and a
        quaternion indicating the change in rotation with respect to @old_quat. Additionally clips @action as well

        Args:
            action (np.array) should have form: [dx, dy, dz, ax, ay, az] (orientation in
                scaled axis-angle form)
            old_quat (np.array) the old target quaternion that will be updated with the relative change in @action
        """
        # Clip action appropriately
        dpos, rotation = self._clip_ik_input(action[:3], action[3:])

        # Update reference targets
        self.reference_target_pos += dpos * self.user_sensitivity
        self.reference_target_orn = T.quat_multiply(old_quat, rotation)

        return {"dpos": dpos * self.user_sensitivity, "rotation": T.quat2mat(rotation)}
    def load_model(self):
        """
        Loads robot and optionally add grippers.
        """
        # First, run the superclass method to load the relevant model
        super().load_model()

        # Verify that the loaded model is of the correct type for this robot
        if self.robot_model.arm_type != "bimanual":
            raise TypeError(
                "Error loading robot model: Incompatible arm type specified for this robot. "
                "Requested model arm type: {}, robot arm type: {}".format(
                    self.robot_model.arm_type, type(self)))

        # Now, load the gripper if necessary
        for arm in self.arms:
            if self.has_gripper[arm]:
                if self.gripper_type[arm] == 'default':
                    # Load the default gripper from the robot file
                    self.gripper[arm] = gripper_factory(
                        self.robot_model.gripper[arm],
                        idn="_".join((str(self.idn), arm)))
                else:
                    # Load user-specified gripper
                    self.gripper[arm] = gripper_factory(self.gripper_type[arm],
                                                        idn="_".join(
                                                            (str(self.idn),
                                                             arm)))
            else:
                # Load null gripper
                self.gripper[arm] = gripper_factory(None,
                                                    idn="_".join(
                                                        (str(self.idn), arm)))
            # Grab eef rotation offset
            self.eef_rot_offset[arm] = T.quat_multiply(
                self.robot_model.hand_rotation_offset[arm],
                self.gripper[arm].rotation_offset)
            # Use gripper visualization if necessary
            if not self.gripper_visualization[arm]:
                self.gripper[arm].hide_visualization()
            self.robot_model.add_gripper(self.gripper[arm],
                                         self.robot_model.eef_name[arm])
    def sample(self, fixtures=None, reference=None, on_top=True):
        """
        Uniformly sample relative to this sampler's reference_pos or @reference (if specified).

        Args:
            fixtures (dict): dictionary of current object placements in the scene as well as any other relevant
                obstacles that should not be in contact with newly sampled objects. Used to make sure newly
                generated placements are valid. Should be object names mapped to (pos, quat, MujocoObject)

            reference (str or 3-tuple or None): if provided, sample relative placement. Can either be a string, which
                corresponds to an existing object found in @fixtures, or a direct (x,y,z) value. If None, will sample
                relative to this sampler's `'reference_pos'` value.

            on_top (bool): if True, sample placement on top of the reference object. This corresponds to a sampled
                z-offset of the current sampled object's bottom_offset + the reference object's top_offset
                (if specified)

        Return:
            dict: dictionary of all object placements, mapping object_names to (pos, quat, obj), including the
                placements specified in @fixtures. Note quat is in (w,x,y,z) form

        Raises:
            RandomizationError: [Cannot place all objects]
            AssertionError: [Reference object name does not exist, invalid inputs]
        """
        # Standardize inputs
        placed_objects = {} if fixtures is None else copy(fixtures)
        if reference is None:
            base_offset = self.reference_pos
        elif type(reference) is str:
            assert reference in placed_objects, "Invalid reference received. Current options are: {}, requested: {}"\
                .format(placed_objects.keys(), reference)
            ref_pos, _, ref_obj = placed_objects[reference]
            base_offset = np.array(ref_pos)
            if on_top:
                base_offset += np.array((0, 0, ref_obj.top_offset[-1]))
        else:
            base_offset = np.array(reference)
            assert base_offset.shape[0] == 3, "Invalid reference received. Should be (x,y,z) 3-tuple, but got: {}"\
                .format(base_offset)

        # Sample pos and quat for all objects assigned to this sampler
        for obj in self.mujoco_objects:
            # First make sure the currently sampled object hasn't already been sampled
            assert obj.name not in placed_objects, "Object '{}' has already been sampled!".format(
                obj.name)

            horizontal_radius = obj.horizontal_radius
            bottom_offset = obj.bottom_offset
            success = False
            for i in range(5000):  # 5000 retries
                object_x = self._sample_x(horizontal_radius) + base_offset[0]
                object_y = self._sample_y(horizontal_radius) + base_offset[1]
                object_z = self.z_offset + base_offset[2]
                if on_top:
                    object_z -= bottom_offset[-1]

                # objects cannot overlap
                location_valid = True
                if self.ensure_valid_placement:
                    for (x, y, z), _, other_obj in placed_objects.values():
                        if (np.linalg.norm((object_x - x, object_y - y)) <=
                                other_obj.horizontal_radius + horizontal_radius
                            ) and (object_z - z <= other_obj.top_offset[-1] -
                                   bottom_offset[-1]):
                            location_valid = False
                            break

                if location_valid:
                    # random rotation
                    quat = self._sample_quat()

                    # multiply this quat by the object's initial rotation if it has the attribute specified
                    if hasattr(obj, "init_quat"):
                        quat = quat_multiply(quat, obj.init_quat)

                    # location is valid, put the object down
                    pos = (object_x, object_y, object_z)
                    placed_objects[obj.name] = (pos, quat, obj)
                    success = True
                    break

            if not success:
                raise RandomizationError("Cannot place all objects ):")

        return placed_objects
    def controller_action(self,
                          obs: dict,
                          take_action: bool = True,
                          DEBUG: bool = False):
        observation = obs['observation']
        goal_pos = obs['desired_goal']
        achieved_goal = obs['achieved_goal']

        gripper_pos = observation[:3]
        gripper_quat = observation[3:7]
        object_pos = observation[7:10]
        object_quat = observation[10:]

        z_table = 0.8610982

        object_axang = T.quat2axisangle(object_quat)
        if abs(object_axang[-1] - 1.24) < 0.2:
            object_axang_touse = [
                0, 0, object_axang[-1] % (2 * pi / 8) + (2 * pi / 8)
            ]
        else:
            object_axang_touse = [0, 0, object_axang[-1] % (2 * pi / 8)]
        gripper_axang = T.quat2axisangle(gripper_quat)
        # print('object axang to use ' + str(object_axang_touse))

        if self.gripper_reoriented == 0:
            self.gripper_init_quat = gripper_quat
            self.gripper_reoriented = 1

        init_inv = T.quat_inverse(self.gripper_init_quat)
        changing_wf = T.quat_multiply(init_inv, gripper_quat)
        changing_wf_axang = T.quat2axisangle(changing_wf)

        # gripper_quat_inv = T.quat_inverse(gripper_quat)
        # changing_wf = T.quat_multiply(gripper_quat_inv,self.gripper_init_quat)
        # changing_wf_axang = T.quat2axisangle(changing_wf)

        # print('changing wf axis ' +str(changing_wf_axang))

        if not self.object_below_hand or self.gripper_reoriented < 5:
            self.nut_p = T.quat2axisangle(object_quat)[-1]
            # print(self.nut_p)
            if not self.object_below_hand:
                action = 20 * (object_pos[:2] - gripper_pos[:2])
            else:
                action = [0, 0]

            action = 20 * (object_pos[:2] - gripper_pos[:2])

            # frac = 0.2 # Rate @ which to rotate gripper about z.
            # ang_goal = frac*self.nut_p # Nut p is the nut's random intial pertubation about z.

            # if self.gripper_reoriented == 0:
            #     self.gripper_init_quat = gripper_quat
            # if self.gripper_reoriented < 5: # Gripper should be aligned with nut after 5 action steps
            #     action_angle= [0,0,ang_goal]

            #     #print('object ' + str(object_axang))
            #     #print('gripper ' + str(gripper_axang))

            #     init_inv = T.quat_inverse(self.gripper_init_quat)
            #     init_inv_mat = T.quat2mat(init_inv)

            #     rel = T.quat_multiply(gripper_quat,init_inv)
            #     rel_axang = T.quat2axisangle(rel)
            #     #print('rel_axang ' +str(rel_axang))

            #     rel_axang_WF = np.matmul(init_inv_mat,rel_axang)

            #     #print('rel_axang_WF ' +str(rel_axang_WF))

            #     if take_action:
            #         self.gripper_reoriented+=1
            # else: # After 5 action steps, don't rotate gripper any more
            #     action_angle=[0,0,0]

            action_angle = 0.2 * (object_axang_touse - changing_wf_axang)
            action_angle = [0, 0, action_angle[-1]]
            #action_angle = [0,0,0]
            #print('action angle ' +str(action_angle))

            if np.linalg.norm(object_axang_touse - changing_wf_axang) < 0.1:
                if take_action:
                    self.gripper_reoriented = 5

            action = np.hstack((action, [0], action_angle, [-1]))
            if np.linalg.norm((object_pos[:2] - gripper_pos[:2])) < 0.01:
                if take_action:
                    self.object_below_hand = True
                    #self.gripper_reoriented = 5

        elif not self.object_in_hand:  # Close gripper
            action = [0, 0, -1, 0, 0, 0, -1]
            if np.linalg.norm((object_pos[2] - gripper_pos[2])) < 0.01:
                action = [0, 0, 0, 0, 0, 0, 1]
                if take_action:
                    self.object_in_hand = True

        else:  # Move gripper up and toward goal position
            action = [0, 0, 1, 0, 0, 0, 1]
            if object_pos[2] - z_table > 0.1:
                action = 20 * (goal_pos[:2] - object_pos[:2])
                action = np.hstack((action, [0, 0, 0, 0, 1]))
                if np.linalg.norm((goal_pos[:2] - object_pos[:2])) < 0.0225:
                    action = [0, 0, 0, 0, 0, 0,
                              -1]  # Drop nut once it's close enough to the peg

        action = np.clip(action, -1, 1)
        return action
    def sample(self,
               fixtures=None,
               return_placements=False,
               reference_object_name=None,
               sample_on_top=False):
        """
        Uniformly sample on a surface (not necessarily table surface).

        Args:
            fixtures (dict): current dictionary of object placements in the scene. Used to make sure
                generated placements are valid.

            return_placements (bool): if True, return the updated dictionary
                of object placements.

            reference_object_name (str): if provided, sample placement relative to this object's
                placement (which must be provided in @fixtures).

            sample_on_top (bool): if True, sample placement on top of the reference object.

        Return:
            2-tuple or 3-tuple:

                - (list) list of placed object positions

                - (list) list of placed object quaternions

                - (dict) if @return_placements is True, returns a dictionary of all
                    object placements, including the ones placed by this sampler.

        Raises:
            RandomizationError: [Cannot place all objects]
            AssertionError: [Reference object name does not exist]
        """
        pos_arr = []
        quat_arr = []

        if fixtures is None:
            placed_objects = {}
        else:
            placed_objects = deepcopy(fixtures)

        # compute reference position
        base_offset = self.table_top_offset
        if reference_object_name is not None:
            assert reference_object_name in placed_objects
            reference_pos, reference_mjcf = placed_objects[
                reference_object_name]
            base_offset[:2] = reference_pos[:2]
            if sample_on_top:
                base_offset[-1] = reference_pos[
                    -1] + reference_mjcf.get_top_offset()[-1]  # set surface z

        index = 0
        for obj_name, obj_mjcf in self.mujoco_objects.items():
            horizontal_radius = obj_mjcf.get_horizontal_radius()
            bottom_offset = obj_mjcf.get_bottom_offset()
            success = False
            for i in range(5000):  # 5000 retries
                object_x = self.sample_x(horizontal_radius) + base_offset[0]
                object_y = self.sample_y(horizontal_radius) + base_offset[1]
                object_z = base_offset[2] + self.z_offset - bottom_offset[-1]

                # objects cannot overlap
                location_valid = True
                for (x, y, z), other_obj_mjcf in placed_objects.values():
                    if (np.linalg.norm([object_x - x, object_y - y], 2) <=
                            other_obj_mjcf.get_horizontal_radius() +
                            horizontal_radius) and (
                                object_z - z <=
                                other_obj_mjcf.get_top_offset()[-1] -
                                bottom_offset[-1]):
                        location_valid = False
                        break

                if location_valid:
                    # location is valid, put the object down
                    pos = (object_x, object_y, object_z)
                    placed_objects[obj_name] = (pos, obj_mjcf)

                    # random z-rotation
                    quat = self.sample_quat()

                    # multiply this quat by the object's initial rotation if it has the attribute specified
                    if hasattr(obj_mjcf, "init_quat"):
                        quat = quat_multiply(quat, obj_mjcf.init_quat)

                    quat_arr.append(quat)
                    pos_arr.append(pos)
                    success = True
                    break

            if not success:
                raise RandomizationError(
                    "Cannot place all objects on the desk")
            index += 1

        if return_placements:
            return pos_arr, quat_arr, placed_objects
        return pos_arr, quat_arr
    def controller_action(self,
                          obs: dict,
                          take_action: bool = True,
                          DEBUG: bool = False):
        observation = obs['observation']
        goal_pos = obs['desired_goal']
        achieved_goal = obs['achieved_goal']

        gripper_pos = observation[:3]
        gripper_quat = observation[3:7]
        object_pos = observation[7:10]
        object_quat = observation[10:]

        z_table = 0.8610982  # the z-coordinate of the table surface

        object_axang = T.quat2axisangle(object_quat)
        if abs(object_axang[-1] - 1.24) < 0.2:
            object_axang_touse = [
                0, 0, object_axang[-1] % (2 * pi / 8) + (2 * pi / 8)
            ]
        else:
            object_axang_touse = [0, 0, object_axang[-1] % (2 * pi / 8)]
        gripper_axang = T.quat2axisangle(gripper_quat)

        if self.gripper_reoriented == 0:
            self.gripper_init_quat = gripper_quat
            self.gripper_reoriented = 1

        init_inv = T.quat_inverse(self.gripper_init_quat)
        changing_wf = T.quat_multiply(init_inv, gripper_quat)
        changing_wf_axang = T.quat2axisangle(changing_wf)

        # reorient the gripper to match the nut faces and move above the nut
        if not self.object_below_hand or self.gripper_reoriented < 5:
            self.nut_p = T.quat2axisangle(object_quat)[-1]
            if not self.object_below_hand:
                action = 20 * (object_pos[:2] - gripper_pos[:2])
            else:
                action = [0, 0]

            action = 20 * (object_pos[:2] - gripper_pos[:2])

            action_angle = 0.2 * (object_axang_touse - changing_wf_axang)
            action_angle = [0, 0, action_angle[-1]]

            if np.linalg.norm(object_axang_touse - changing_wf_axang) < 0.1:
                if take_action:
                    self.gripper_reoriented = 5

            action = np.hstack((action, [0], action_angle, [-1]))
            if np.linalg.norm((object_pos[:2] - gripper_pos[:2])) < 0.01:
                if take_action:
                    self.object_below_hand = True
        # close the gripper and pick the nut
        elif not self.object_in_hand:  # Close gripper
            action = [0, 0, -1, 0, 0, 0, -1]
            if np.linalg.norm((object_pos[2] - gripper_pos[2])) < 0.01:
                action = [0, 0, 0, 0, 0, 0, 1]
                if take_action:
                    self.object_in_hand = True

        else:  # Move gripper up and toward goal position
            action = [0, 0, 1, 0, 0, 0, 1]
            if object_pos[2] - z_table > 0.1:
                action = 20 * (goal_pos[:2] - object_pos[:2])
                action = np.hstack((action, [0, 0, 0, 0, 1]))
                if np.linalg.norm((goal_pos[:2] - object_pos[:2])) < 0.0225:
                    action = [0, 0, 0, 0, 0, 0,
                              -1]  # Drop nut once it's close enough to the peg

        action = np.clip(action, -1, 1)
        return action
    def __init__(self, *args, **kwargs):
        options = {}
        # Absolute pose control
        controller_name = 'OSC_POSE'
        options["controller_configs"] = suite.load_controller_config(
            default_controller=controller_name)

        self.cameraName = "frontview"
        skip_frame = 2
        self.peg_env = suite.make(
            "TwoArmPegInHole",
            robots=["IIWA", "IIWA"],
            **options,
            has_renderer=False,
            ignore_done=True,
            use_camera_obs=True,
            use_object_obs=True,
            camera_names=self.cameraName,
            camera_heights=512,
            camera_widths=512,
        )

        # Tolerances on position and rotation of peg relative to hole
        posTol = 0.005
        rotTol = 0.05

        observation = self.peg_env.reset()

        # observation["peg_to_hole"] is the vector FROM the hole TO the peg
        peg_pos0 = observation["hole_pos"] + observation["peg_to_hole"]
        self.peg_pos0 = peg_pos0
        self.hole_pos0 = observation["hole_pos"]

        # Positions of robots 0 and 1 rel peg and hole, in peg and hole frames.  Constant forever.
        pRob0RelPeg = np.matmul(
            T.quat2mat(T.quat_inverse(observation["peg_quat"])),
            observation["robot0_eef_pos"] - (peg_pos0))
        pRob1RelHole = np.matmul(
            T.quat2mat(T.quat_inverse(observation["hole_quat"])),
            observation["robot1_eef_pos"] - observation["hole_pos"])
        qRob0RelPeg = T.quat_multiply(
            T.quat_inverse(observation["robot0_eef_quat"]),
            observation["peg_quat"])
        qRob1RelHole = T.quat_multiply(
            T.quat_inverse(observation["robot1_eef_quat"]),
            observation["hole_quat"])

        # Store geometric constants
        model = self.peg_env.model.get_model()
        pegDim = model.geom_size[15]
        rPeg = pegDim[0]
        lPeg = pegDim[2]
        distSlack = 2 * lPeg

        # Set up optimization problem.
        # Define 3 keyframes: peg higher than hole, peg centered above hole with hole facing up, and peg in hole.
        # One constraint for each keyframe, and one constraint for unit quaternions
        nonlinear_constraint_1 = NonlinearConstraint(self.cons_1,
                                                     distSlack,
                                                     np.inf,
                                                     jac='2-point',
                                                     hess=BFGS())
        nonlinear_constraint_2 = NonlinearConstraint(
            self.cons_2,
            np.array([-posTol, -posTol, distSlack, -rotTol]),
            np.array([posTol, posTol, np.inf, rotTol]),
            jac='2-point',
            hess=BFGS())
        nonlinear_constraint_3 = NonlinearConstraint(
            self.cons_3,
            np.array([-posTol, -posTol, distSlack]),
            np.array([posTol, posTol, np.inf]),
            jac='2-point',
            hess=BFGS())
        nonlinear_constraint_4 = NonlinearConstraint(self.cons_unit_quat,
                                                     np.array([1, 1, 1, 1]),
                                                     np.array([1, 1, 1, 1]),
                                                     jac='2-point',
                                                     hess=BFGS())

        # Initial guess for optimizer
        x0 = np.tile(
            np.hstack((peg_pos0, observation["hole_pos"],
                       observation["peg_quat"], observation["hole_quat"])), 3)
        # Cut out quat from last chunk
        x0 = x0[0:34]

        # Solve optimization problem
        res = minimize(self.traj_obj,
                       x0,
                       method='trust-constr',
                       jac='2-point',
                       hess=BFGS(),
                       constraints=[
                           nonlinear_constraint_1, nonlinear_constraint_2,
                           nonlinear_constraint_3, nonlinear_constraint_4
                       ],
                       options={'verbose': 1})
        # 'xtol': 1e-6,
        x = res.x
        # Extract optimization results
        # x = [p_peg_1, p_hole_1, q_peg_1, q_hole_1, p_peg_2, ... q_peg_3, q_hole_3]
        ind_offset_1 = 14
        ind_offset_2 = 28

        p_peg_1 = x[0:3]
        p_hole_1 = x[3:6]
        p_peg_2 = x[ind_offset_1 + 0:ind_offset_1 + 3]
        p_hole_2 = x[ind_offset_1 + 3:ind_offset_1 + 6]
        p_peg_3 = x[ind_offset_2 + 0:ind_offset_2 + 3]
        p_hole_3 = x[ind_offset_2 + 3:ind_offset_2 + 6]

        q_peg_1 = x[6:10]
        q_hole_1 = x[10:14]
        q_peg_2 = x[ind_offset_1 + 6:ind_offset_1 + 10]
        q_hole_2 = x[ind_offset_1 + 10:ind_offset_1 + 14]

        # Use the same orientations as in pose 2
        q_peg_3 = q_peg_2
        q_hole_3 = q_hole_2

        # Robot rel world = peg rel world + robot rel peg
        # Robot rel peg in world frame = (q world frame rel peg frame) * (robot rel peg in peg frame)
        q_rob0_1 = T.quat_inverse(
            T.quat_multiply(qRob0RelPeg, T.quat_inverse(q_peg_1)))
        q_rob1_1 = T.quat_inverse(
            T.quat_multiply(qRob1RelHole, T.quat_inverse(q_hole_1)))
        q_rob0_2 = T.quat_inverse(
            T.quat_multiply(qRob0RelPeg, T.quat_inverse(q_peg_2)))
        q_rob1_2 = T.quat_inverse(
            T.quat_multiply(qRob1RelHole, T.quat_inverse(q_hole_2)))
        q_rob0_3 = T.quat_inverse(
            T.quat_multiply(qRob0RelPeg, T.quat_inverse(q_peg_3)))
        q_rob1_3 = T.quat_inverse(
            T.quat_multiply(qRob1RelHole, T.quat_inverse(q_hole_3)))

        self.p_rob0_1 = p_peg_1 + np.matmul(T.quat2mat(q_peg_1), pRob0RelPeg)
        self.p_rob1_1 = p_hole_1 + np.matmul(T.quat2mat(q_hole_1),
                                             pRob1RelHole)
        self.p_rob0_2 = p_peg_2 + np.matmul(T.quat2mat(q_peg_2), pRob0RelPeg)
        self.p_rob1_2 = p_hole_2 + np.matmul(T.quat2mat(q_hole_2),
                                             pRob1RelHole)
        self.p_rob0_3 = p_peg_3 + np.matmul(T.quat2mat(q_peg_3), pRob0RelPeg)
        self.p_rob1_3 = p_hole_3 + np.matmul(T.quat2mat(q_hole_3),
                                             pRob1RelHole)

        self.axang_rob0_1 = T.quat2axisangle(q_rob0_1)
        self.axang_rob1_1 = T.quat2axisangle(q_rob1_1)
        self.axang_rob0_2 = T.quat2axisangle(q_rob0_2)
        self.axang_rob1_2 = T.quat2axisangle(q_rob1_2)
        self.axang_rob0_3 = T.quat2axisangle(q_rob0_3)
        self.axang_rob1_3 = T.quat2axisangle(q_rob1_3)

        self.max_episode_steps = 200
        # Gains for rotation and position error compensation rates
        self.kpp = 4
        self.kpr = 0.1

        # Initial pose Information
        self.rob0quat0 = observation["robot0_eef_quat"]
        self.rob1quat0 = observation["robot1_eef_quat"]
        self.rob0pos0 = observation["robot0_eef_pos"]
        self.rob1pos0 = observation["robot1_eef_pos"]
            target_rot_X = T.rotation_matrix(0,
                                             np.array([1, 0, 0]),
                                             point=target_pos)
            target_rot_Y = T.rotation_matrix(math.pi,
                                             np.array([0, 1, 0]),
                                             point=target_pos)
            target_rot_Z = T.rotation_matrix(math.pi + target_angle,
                                             np.array([0, 0, 1]),
                                             point=target_pos)
            target_rot = np.matmul(target_rot_Z,
                                   np.matmul(target_rot_Y, target_rot_X))
            target_quat = T.mat2quat(target_rot)

            dquat = T.quat_slerp(current_quat, target_quat, 0.01)
            dquat = T.quat_multiply(dquat, T.quat_inverse(current_quat))

            action = np.concatenate([dpos, dquat, [grasp]])
            obs, reward, done, info = env.step(action)

            pos_traj.append(current_pos - table_top_center)
            angle_traj.append(T.mat2euler(T.quat2mat(current_quat)))
            # env.render()

        time = 0

        done_task = False
        while not done_task:
            time += 1
            if time > 2000:
                break