def _set_action(self, action):
        assert action.shape == (3,)
        self.counter += 1
        action = action.copy()  # ensure that we don't change the action outside of this scope
        pos_ctrl = action[:3]

        rot_ctrl = [1., 0., 1., 0.]  # fixed rotation of the end effector, expressed as a quaternion
        gripper_ctrl = np.array([-1, -1])
        assert gripper_ctrl.shape == (2,)

        if self.counter <= 1:
            action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
            action[2] = 0.56402952

            for _ in range(20):
                utils.mocap_set_action_abs(self.sim, action)
                self.sim.step()
        else:
            pos_ctrl *= 0.05  # limit maximum change in position
            action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
            utils.mocap_set_action(self.sim, action)

        # Apply action to simulation.
        utils.ctrl_set_action(self.sim, action)
        utils.mocap_set_action(self.sim, action)

        if self.counter >= 3:
        # if np.linalg.norm(pos_ctrl, axis=-1) < 0.025:
            action = np.array([0,0,-0.05,1,0,1,0,1,1])
            utils.mocap_set_action(self.sim, action)
            for _ in range(5):
                utils.ctrl_set_action(self.sim, action)
                self.sim.step()
Exemple #2
0
    def _set_action(self, action):
        assert action.shape == (3, )
        self.counter += 1
        action = action.copy(
        )  # ensure that we don't change the action outside of this scope
        pos_ctrl = action[:3]

        rot_ctrl = [
            1., 0., 1., 0.
        ]  # fixed rotation of the end effector, expressed as a quaternion
        gripper_ctrl = np.array([1, 1])
        assert gripper_ctrl.shape == (2, )
        if self.block_gripper:
            gripper_ctrl = np.zeros_like(gripper_ctrl)

        if self.counter <= 1:
            action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
            if self.use_close_loop:
                action[2] = 0.46470766
            for _ in range(20):
                utils.mocap_set_action_abs(self.sim, action)
                self.sim.step()
        else:
            pos_ctrl *= 0.05  # limit maximum change in position
            action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
            utils.mocap_set_action(self.sim, action)

        # Apply action to simulation.
        # utils.ctrl_set_action(self.sim, action)

        utils.mocap_set_action(self.sim, action)

        if self.counter >= 2:
            # if np.linalg.norm(pos_ctrl, axis=-1) < 0.025:
            self.sim.step()
            pos_ctrl = np.array([0.0, 0.0, 0.0])
            gripper_ctrl = np.array([-1, -1])
            action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
            utils.ctrl_set_action(self.sim, action)
            utils.mocap_set_action(self.sim, action)
            # logging
            # grip_pos = self.sim.data.get_site_xpos('robot0:grip')
            # obs_pos = self.sim.data.get_site_xpos('object0')
            # offset = obs_pos - grip_pos
            # print(offset, np.linalg.norm(offset, axis = -1))

            for _ in range(20):
                utils.ctrl_set_action(self.sim, action)
                self.sim.step()

            for _ in range(10):
                pos_ctrl = np.array([0.0, 0.02, 0.0])
                gripper_ctrl = np.array([-1, -1])
                action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
                utils.ctrl_set_action(self.sim, action)
                utils.mocap_set_action(self.sim, action)
                self.sim.step()
    def _set_action(self, action):
        assert action.shape == (3, )
        self.counter += 1
        action = action.copy(
        )  # ensure that we don't change the action outside of this scope
        pos_ctrl = action[:3]

        rot_ctrl = [
            1., 0., 1., 0.
        ]  # fixed rotation of the end effector, expressed as a quaternion
        gripper_ctrl = np.array([0, 0])
        assert gripper_ctrl.shape == (2, )
        if self.block_gripper:
            gripper_ctrl = np.zeros_like(gripper_ctrl)

        # Apply action to simulation.
        # utils.ctrl_set_action(self.sim, action)
        if self.counter <= 1:
            action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
            if self.use_close_loop:
                action[2] = 0.41644691
            for _ in range(20):
                utils.mocap_set_action_abs(self.sim, action)
                self.sim.step()
        else:
            pos_ctrl *= 0.05  # limit maximum change in position
            action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
            utils.mocap_set_action(self.sim, action)

        if self.counter >= 3:
            for _ in range(10):
                self.sim.step()
                pos_ctrl = np.array([0.0, 0.02, 0.0])
                gripper_ctrl = np.array([0, 0])
                action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
                utils.ctrl_set_action(self.sim, action)
                utils.mocap_set_action(self.sim, action)
                self.sim.step()
    def _reset_sim(self):
        self.sim.set_state(self.initial_state)

        if self.random_obj:
            min_color_diff = 0
            while min_color_diff < 0.1:
                rgba = np.random.uniform(0, 0.5, size=3)
                if self.train_random:
                    rgba[2] = 1
                elif self.test_random:
                    rgba[1] = 1
                else:
                    rgba *= 2
                color_diff = np.abs(self.sim.model.geom_rgba.copy()[:-1, :3] -
                                    rgba)
                min_color_diff = min(np.sum(color_diff, axis=1))
            self.sim.model.geom_rgba[-1][:3] = rgba

        # Randomize start position of hole.
        if self.goal_type == "fixed":
            offset = np.array([0.02, 0.02])
        else:
            if self.limit_dir:
                if self.train_random:
                    offset = np.array([
                        self.np_random.uniform(0, self.obj_range),
                        self.np_random.uniform(-self.obj_range, self.obj_range)
                    ])
                elif self.test_random:
                    offset = np.array([
                        self.np_random.uniform(-self.obj_range, 0),
                        self.np_random.uniform(-self.obj_range, self.obj_range)
                    ])
            else:
                offset = self.np_random.uniform(-self.obj_range,
                                                self.obj_range,
                                                size=2)
            norm = np.linalg.norm(offset, axis=-1)
            if norm < 0.05:
                offset = offset / norm * 0.05
        hole_qpos = self.sim.data.get_joint_qpos('table_top:joint')
        assert hole_qpos.shape == (7, )
        hole_qpos[0] = hole_qpos[0] + offset[0]
        hole_qpos[1] = hole_qpos[1] + offset[1]
        self.sim.data.set_joint_qpos('table_top:joint', hole_qpos)

        # Randomize start position of object.
        offset = np.array([0.1, 0.1])

        object_xpos = self.initial_gripper_xpos[:2] + offset
        object_qpos = self.sim.data.get_joint_qpos('object0:joint')
        assert object_qpos.shape == (7, )
        object_qpos[:2] = object_xpos
        self.sim.data.set_joint_qpos('object0:joint', object_qpos)

        self.sim.forward()

        # move gripper to grasp box
        rot_ctrl = [
            1., 0., 1., 0.
        ]  # fixed rotation of the end effector, expressed as a quaternion
        gripper_ctrl = np.array([1, 1])
        pos_ctrl = np.array([0, 0, 0])
        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

        for _ in range(10):
            utils.ctrl_set_action(self.sim, action)
            self.sim.step()

        box_pose = self.sim.data.get_site_xpos('handle0').copy()
        pos_ctrl = box_pose.copy()
        pos_ctrl[2] = box_pose[2] + 0.2

        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

        utils.mocap_set_action_abs(self.sim, action)

        box_pose = self.sim.data.get_site_xpos('handle0').copy()
        pos_ctrl = box_pose.copy()
        pos_ctrl[2] = box_pose[2]

        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

        utils.mocap_set_action_abs(self.sim, action)

        action = np.array([0, 0, 0, 1, 0, 1, 0, -1, -1])
        for _ in range(20):
            utils.ctrl_set_action(self.sim, action)
            self.sim.step()

        pos_ctrl = self.initial_gripper_xpos.copy()
        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
        utils.mocap_set_action_abs(self.sim, action)

        return True