Ejemplo n.º 1
0
def main():
    # env = gym.make('HandPickAndPlaceDense-v0')
    env = gym.make(
        'HandPickAndPlace-v0',
        ignore_rotation_ctrl=True,
        ignore_target_rotation=True,
        success_on_grasp_only=True,
        randomize_initial_arm_pos=True,
        randomize_initial_object_pos=True,
        # grasp_state=True,
        # grasp_state_reset_p=0.5,
        object_cage=True,
        object_id='teapot'
    )
    obs = env.reset()

    env.render()
    sim = env.unwrapped.sim
    add_selection_logger(env.unwrapped.viewer, sim)
    print('nconmax:', sim.model.nconmax)
    print('obs.shape:', obs['observation'].shape)

    global selected_action
    p = Thread(target=action_thread)
    # p.start()
    selected_action = np.zeros(27)

    for i in it.count():

        for j in range(6):

            env.reset()
            # for val in np.linspace(-1, 1, 60):
            while True:

                env.render()
                action = selected_action.copy()
                action[-7:] *= 0.2
                selected_action[-7:] *= 0.0

                render_pose(env.unwrapped.get_table_surface_pose(), env.unwrapped.viewer, unique_id=535)

                rew, done = env.step(action)[1:3]
Ejemplo n.º 2
0
    def _move_arms(self,
                   *,
                   left_target: np.ndarray,
                   right_target: np.ndarray,
                   left_yaw=0.0,
                   right_yaw=0.0,
                   pos_threshold=0.02,
                   rot_threshold=0.1,
                   k=2.0,
                   max_steps=100,
                   count_stable_steps=False,
                   targets_relative_to=None,
                   left_grp_config=-1.0,
                   right_grp_config=-1.0):

        targets = {'l': left_target, 'r': right_target}
        yaws = {'l': left_yaw, 'r': right_yaw}
        stable_steps = 0
        prev_rel_pos = np.zeros(3)
        u = np.zeros(self.sim_env.action_space.shape)
        prev_err_l = np.zeros(7)
        prev_err_r = np.zeros(7)
        max_pos_err = -np.inf

        for i in range(max_steps):

            grasp_center_pos = np.zeros(3)
            max_rot_err = -np.inf
            max_pos_err = -np.inf

            for arm_i, arm in enumerate(('l', 'r')):

                curr_pose = self.get_gripper_pose(arm)
                curr_q = self.get_arm_config(arm)

                if arm == 'l':
                    pitch = np.pi - 0.9
                    u_masked = u[:7]
                    prev_err = prev_err_l
                else:
                    pitch = np.pi - 0.9
                    u_masked = u[8:15]
                    prev_err = prev_err_r

                if callable(targets_relative_to):
                    reference = targets_relative_to()
                    target_pos = tf.apply_tf(targets[arm], reference)[:3]
                else:
                    target_pos = targets[arm]

                target_pose = np.r_[target_pos,
                                    tf.rotations.euler2quat(np.r_[0., 0.,
                                                                  yaws[arm]])]
                target_pose = tf.apply_tf(
                    np.r_[0., 0., 0.,
                          tf.rotations.euler2quat(np.r_[pitch, 0., 0.])],
                    target_pose)

                grasp_center_pos += curr_pose[:3]
                max_pos_err = max(
                    max_pos_err,
                    np.abs(curr_pose[:3] - target_pose[:3]).max())
                max_rot_err = max(
                    max_rot_err,
                    tf.quat_angle_diff(curr_pose[3:], target_pose[3:]))

                target_q = self.sim_env.mocap_ik(target_pose - curr_pose, arm)
                u_masked[:] = self._controller(curr_q - target_q, prev_err, k)

                if self.render_substeps:
                    tf.render_pose(target_pos.copy(),
                                   self.viewer,
                                   label=f"{arm}_p",
                                   unique_label=True)
                    tf.render_pose(target_pose.copy(),
                                   self.viewer,
                                   label=f"{arm}_t",
                                   unique_label=True)
                    tf.render_pose(curr_pose.copy(),
                                   self.viewer,
                                   label=f"{arm}",
                                   unique_label=True)

            grasp_center_pos /= 2.0

            u[7] = left_grp_config
            u[15] = right_grp_config
            u = np.clip(u, self.sim_env.action_space.low,
                        self.sim_env.action_space.high)
            self.sim_env.step(u)

            if self.render_substeps:
                tf.render_pose(grasp_center_pos,
                               self.viewer,
                               label="grasp_center",
                               unique_id=5554)
                self.render(keep_markers=True)

            if max_pos_err < pos_threshold and max_rot_err < rot_threshold:
                break

            if count_stable_steps:
                obj_pos = self.get_object_pos()
                rel_pos = obj_pos - grasp_center_pos
                still = prev_rel_pos is not None and np.all(
                    np.abs(rel_pos - prev_rel_pos) < 0.002)
                obj_above_table = len(
                    self.sim_env.get_object_contact_points(
                        other_body='table')) == 0
                if still and obj_above_table:
                    stable_steps += 1
                elif i > 10:
                    break
                prev_rel_pos = rel_pos

        if count_stable_steps:
            return stable_steps

        return max_pos_err
Ejemplo n.º 3
0
    def _move_arms_mocap(self,
                         *,
                         left_target: np.ndarray,
                         right_target: np.ndarray,
                         left_yaw=0.0,
                         right_yaw=0.0,
                         pos_threshold=0.02,
                         rot_threshold=0.1,
                         k=1.0,
                         max_steps=1,
                         left_grp_config=-1.0,
                         right_grp_config=-1.0):

        targets = {'l': left_target, 'r': right_target}
        yaws = {'l': left_yaw, 'r': right_yaw}
        self.sim.model.eq_active[:] = 1

        for i in range(max_steps):

            grasp_center_pos = np.zeros(3)
            max_rot_err = -np.inf
            max_pos_err = -np.inf

            d_above_table = self.get_object_pos(
            )[2] - self.sim_env._object_z_offset
            grp_xrot = 0.9 + d_above_table * 2.0

            mocap_a = np.zeros((self.sim.model.nmocap, 7))

            for arm_i, arm in enumerate(('l', 'r')):

                curr_pose = self.get_gripper_pose(arm)
                pitch = np.pi - grp_xrot

                target_pos = targets[arm]
                target_pose = np.r_[target_pos,
                                    tf.rotations.euler2quat(np.r_[0., 0.,
                                                                  yaws[arm]])]
                target_pose = tf.apply_tf(
                    np.r_[0., 0., 0.,
                          tf.rotations.euler2quat(np.r_[pitch, 0., 0.])],
                    target_pose)

                grasp_center_pos += curr_pose[:3]
                if max_steps > 1:
                    max_pos_err = max(
                        max_pos_err,
                        np.abs(curr_pose[:3] - target_pose[:3]).max())
                    max_rot_err = max(
                        max_rot_err,
                        tf.quat_angle_diff(curr_pose[3:], target_pose[3:]))

                mocap_a[arm_i] = target_pose - curr_pose

                if self.viewer is not None and self.render_poses:
                    tf.render_pose(target_pos.copy(),
                                   self.viewer,
                                   label=f"{arm}_p",
                                   unique_label=True)
                    tf.render_pose(target_pose.copy(),
                                   self.viewer,
                                   label=f"{arm}_t",
                                   unique_label=True)
                    tf.render_pose(curr_pose.copy(),
                                   self.viewer,
                                   label=f"{arm}",
                                   unique_label=True)

            grasp_center_pos /= 2.0

            # set config of grippers
            u = np.zeros(self.sim_env.action_space.shape)
            u[7] = left_grp_config
            u[15] = right_grp_config
            self.sim_env._set_action(u)

            # set pose of grippers
            _mocap_set_action(self.sim, mocap_a * k)

            # take simulation step
            self.sim.step()
            self.sim_env._step_callback()

            if self.viewer is not None:
                if self.render_poses:
                    tf.render_pose(grasp_center_pos,
                                   self.viewer,
                                   label="grasp_center",
                                   unique_id=5554)
                self.render(keep_markers=True)

            if max_pos_err < pos_threshold and max_rot_err < rot_threshold:
                break
Ejemplo n.º 4
0
def main():

    env = gym.make('YumiConstrained-v2',
                   reward_type='sparse',
                   render_poses=False,
                   has_rotating_platform=False,
                   has_button=False,
                   has_object_box=False,
                   object_id="fetch_sphere")
    raw_env = env.unwrapped
    sim = raw_env.sim
    env.render()
    add_selection_logger(raw_env.viewer, sim)

    agent = YumiConstrainedAgent(env)

    done = True
    obs = None
    steps_to_success = []
    n_steps = 0

    reachability = np.zeros((2, 3))
    reachability[0] = np.inf
    reachability[1] = -np.inf
    unreachable_eps = 0
    tot_eps = 0

    for i in it.count():
        if done:
            obs = env.reset()
            n_steps = 0
            tot_eps += 1

        # center_pos = obs['observation'][:3]
        # achieved_goal = obs['achieved_goal']
        # err = achieved_goal - center_pos
        # # u = np.zeros(env.action_space.shape)
        # # u[1:4] = err * 10.0

        u = env.action_space.sample()
        u[0] = -1 if i // 8 % 2 == 0 else 1
        u[1:] = 0.

        u = agent.predict(obs)

        render_pose(env.unwrapped.get_table_surface_pose(),
                    env.unwrapped.viewer,
                    unique_id=535)

        obs, rew, done, _ = env.step(u)
        # done = False
        n_steps += 1
        env.render()

        if rew == 0.0:
            steps_to_success.append(n_steps)
            arr = np.asarray(steps_to_success)
            # print('min', arr.min(), 'max', arr.max(), 'avg', arr.mean(), 'std', arr.std())
            done = True

            goal = obs['desired_goal'].copy()
            reachability[0] = np.minimum(reachability[0], goal)
            reachability[1] = np.maximum(reachability[1], goal)

            # print(reachability)
            # print(unreachable_eps / tot_eps)
            # print()

        elif done:
            unreachable_eps += 1
Ejemplo n.º 5
0
    def predict(self, obs, **kwargs):

        u = np.zeros(self._env.action_space.shape)
        new_goal = obs['desired_goal']
        if self._goal is None or np.any(self._goal != new_goal):
            self._reset(new_goal)
            # TFDebugger.reset()
        # TFDebugger.step(self._raw_env.viewer)

        curr_grp_poses = {
            a:
            np.r_[self._raw_env.sim.data.get_site_xpos(f'gripper_{a}_center'),
                  rotations.mat2quat(
                      self._raw_env.sim.data.
                      get_site_xmat(f'gripper_{a}_center')), ]
            for a in ('l', 'r')
        }

        pos_errors = []
        for arm in ('l', 'r'):
            if arm == 'l':
                solver = self._ikp_solver_l
                jac_solver = self._jac_solver_l
                joint_lims = self._raw_env.arm_l_joint_lims

                achieved_pos = obs['observation'][32:35]
                obj_l_rel_pos = obs['observation'][44:47]
                obj_pos = obj_l_rel_pos + achieved_pos

                target_pose = np.r_[obj_pos, np.pi - 0.9, 0.01, 0.01]
                target_pose = np.r_[target_pose[:3],
                                    rotations.euler2quat(target_pose[3:])]

                if self._phase == 3:
                    target_pose[:3] = self._raw_env.sim.data.get_site_xpos(
                        'target0:left').copy()
                    q = rotations.mat2quat(
                        self._raw_env.sim.data.get_site_xmat('target0:left'))
                    target_pose[3:] = rotations.quat_mul(target_pose[3:], q)

                prev_err = self._prev_err_l
                curr_q = obs['observation'][:7]
                u_masked = u[:7]
            elif arm == 'r':
                solver = self._ikp_solver_r
                jac_solver = self._jac_solver_r
                joint_lims = self._raw_env.arm_r_joint_lims

                achieved_pos = obs['observation'][35:38]
                obj_r_rel_pos = obs['observation'][47:50]
                obj_pos = obj_r_rel_pos + achieved_pos

                target_pose = np.r_[obj_pos, -np.pi + 0.9, 0.01, np.pi]
                target_pose = np.r_[target_pose[:3],
                                    rotations.euler2quat(target_pose[3:])]

                if self._phase == 3:
                    target_pose[:3] = self._raw_env.sim.data.get_site_xpos(
                        'target0:right').copy()
                    q = rotations.mat2quat(
                        self._raw_env.sim.data.get_site_xmat('target0:right'))
                    target_pose[3:] = rotations.quat_mul(q, target_pose[3:])

                prev_err = self._prev_err_r
                curr_q = obs['observation'][16:23]
                u_masked = u[8:15]
            else:
                continue

            if self._phase == 0:
                u[7] = -1.0
                u[15] = -1.0
                target_pose[2] += 0.1
            elif self._phase == 1:
                u[7] = -1.0
                u[15] = -1.0
                target_pose[2] -= 0.002
            elif self._phase == 2:
                u[7] = -1 + self._phase_steps / 3.
                u[15] = -1 + self._phase_steps / 3.
            elif self._phase == 3:
                u[7] = 1.0
                u[15] = 1.0

            if self._raw_env.viewer is not None:
                tf.render_pose(target_pose.copy(), self._raw_env.viewer)

            curr_pose = curr_grp_poses[arm].copy()
            err_pose = curr_pose - target_pose
            err_pos = np.linalg.norm(err_pose[:3])
            pos_errors.append(err_pos)

            if self.use_mocap_ctrl:
                if self._phase < 1:
                    controller_k = 3.0
                else:
                    controller_k = 2.5
                err_rot = tf.quat_angle_diff(curr_pose[3:], target_pose[3:])
                target_q = self._raw_env.mocap_ik(-err_pose, arm)
            else:
                controller_k = 0.1
                err_rot = 0.0
                target_pose[:3] -= self._base_offset
                if self.use_qp_solver:
                    if not self.check_joint_limits:
                        joint_lims = None
                    curr_pose[:3] -= self._base_offset
                    target_q = self._position_ik_qp(curr_pose, target_pose,
                                                    curr_q, jac_solver,
                                                    joint_lims)
                else:
                    target_q = self._position_ik(target_pose, curr_q, solver)

            if target_q is not None:
                err_q = curr_q - target_q
                u_masked[:] = self._controller(err_q, prev_err, controller_k)

        self._phase_steps += 1
        if self._phase == 0 and np.all(
                np.array(pos_errors) < 0.03) and err_rot < 0.05:
            self._phase = 1
            self._phase_steps = 0
        elif self._phase == 1 and np.all(
                np.array(pos_errors) < 0.007) and err_rot < 0.05:
            self._phase = 2
            self._phase_steps = 0
        elif self._phase == 2:
            if self._phase_steps > 6:
                self._phase = 3
                self._phase_steps = 0
                self._locked_l_to_r_tf = tf.get_tf(curr_grp_poses['r'],
                                                   curr_grp_poses['l'])

        u = np.clip(u, self._env.action_space.low, self._env.action_space.high)
        return u
Ejemplo n.º 6
0
    def predict(self, obs, **kwargs):

        u = np.zeros(self._env.action_space.shape)
        new_goal = obs['desired_goal']
        if self._goal is None or np.any(self._goal != new_goal):
            self.reset(new_goal)

        obj_radius = self._object_size[0]
        obj_achieved_alt = obs['achieved_goal'].item()
        obj_achieved_pose = obs['observation'][44:51]
        if np.all(obj_achieved_pose[3:] == 0):
            obj_achieved_pose[3] = 1.0
        # tf.render_pose(obj_achieved_pose, self._raw_env.viewer, label="O")

        grp_xrot = 0.9 + obj_achieved_alt * 2.0

        curr_grp_poses = {
            a:
            np.r_[self._raw_env.sim.data.get_site_xpos(f'gripper_{a}_center'),
                  rotations.mat2quat(
                      self._raw_env.sim.data.
                      get_site_xmat(f'gripper_{a}_center')), ]
            for a in ('l', 'r')
        }

        pos_errors = []
        for arm in ('l', 'r'):

            if arm == 'l':
                transf = np.r_[0., obj_radius, 0., 1., 0., 0., 0.]
                if self._phase == 3:
                    transf[1] *= 0.9
                    transf[2] = 0.05
                pose = tf.apply_tf(transf, obj_achieved_pose)
                # tf.render_pose(pose, self._raw_env.viewer, label="L")

                grp_target_pos = pose[:3]
                grp_target_rot = np.r_[np.pi - grp_xrot, 0.01, 0.01]
                target_pose = np.r_[grp_target_pos,
                                    rotations.euler2quat(grp_target_rot)]

                prev_err = self._prev_err_l
                curr_q = obs['observation'][:7]
                u_masked = u[:7]

            elif arm == 'r':
                transf = np.r_[0., -obj_radius, 0., 1., 0., 0., 0.]
                if self._phase == 3:
                    transf[1] *= 0.9
                    transf[2] = 0.05
                pose = tf.apply_tf(transf, obj_achieved_pose)
                # tf.render_pose(pose, self._raw_env.viewer, label="R")

                grp_target_pos = pose[:3]
                grp_target_rot = np.r_[-np.pi + grp_xrot, 0.01, np.pi]
                target_pose = np.r_[grp_target_pos,
                                    rotations.euler2quat(grp_target_rot)]

                prev_err = self._prev_err_r
                curr_q = obs['observation'][16:23]
                u_masked = u[8:15]
            else:
                continue

            u[7] = -1.0
            u[15] = -1.0

            if self._phase == 0:
                target_pose[2] += 0.1
                target_pose[1] += 0.05 * np.sign(target_pose[1])
            elif self._phase == 1:
                target_pose[2] += 0.01
                target_pose[1] += 0.05 * np.sign(target_pose[1])
            elif self._phase == 2:
                target_pose[2] += 0.01
            elif self._phase == 3:
                target_pose[2] += 0.00

            if self._raw_env.viewer is not None:
                tf.render_pose(target_pose.copy(), self._raw_env.viewer)

            curr_pose = curr_grp_poses[arm].copy()
            err_pose = curr_pose - target_pose
            err_pos = np.linalg.norm(err_pose[:3])
            pos_errors.append(err_pos)

            controller_k = 2.0
            err_rot = tf.quat_angle_diff(curr_pose[3:], target_pose[3:])
            target_q = self._raw_env.mocap_ik(-err_pose, arm)

            err_q = curr_q - target_q
            u_masked[:] = self._controller(err_q, prev_err, controller_k)

        self._phase_steps += 1
        if self._phase == 0 and np.all(
                np.array(pos_errors) < 0.03) and err_rot < 0.1:
            self._phase = 1
            self._phase_steps = 0
        elif self._phase == 1 and np.all(
                np.array(pos_errors) < 0.03) and err_rot < 0.1:
            self._phase = 2
            self._phase_steps = 0
        elif self._phase == 2:
            if self._phase_steps > 30:
                self._phase = 3
                self._phase_steps = 0

        u = np.clip(u, self._env.action_space.low, self._env.action_space.high)
        return u
Ejemplo n.º 7
0
    def predict(self, obs, **kwargs):

        u = np.zeros(self._env.action_space.shape)
        new_goal = obs['desired_goal']
        if self._goal is None or np.any(self._goal != new_goal):
            self.reset(new_goal)

        grasp_center_pos = obs['observation'][:3]
        object_pos = obs['observation'][18:21]
        object_rel_pos = obs['observation'][24:27]
        c_points = self._raw_env.sim_env.get_object_contact_points()

        # button_tf = tf.get_tf(
        #     np.r_[self._raw_env.sim.data.get_geom_xpos('button_geom'), 1., 0., 0., 0.],
        #     self._raw_env.get_table_surface_pose()
        # )
        # print('button_tf', button_tf)

        if self._raw_env.sim_env.has_button and self._phase < 2:

            if np.linalg.norm(object_pos) > 0.55:
                button_pos = self._raw_env.sim.data.get_geom_xpos(
                    'button_geom')
                tf.render_pose(np.r_[button_pos, 1., 0., 0., 0.],
                               self._raw_env.viewer)
                err = button_pos - grasp_center_pos
                u[0] = -1.
                u[1:4] = err * 5.0
            else:
                u[0] = 0.
                u[3] = 0.5
                self._phase_steps += 1
                if self._phase_steps > 5:
                    self._phase = 2
                    self._phase_steps = 0

        elif self._raw_env.sim_env.has_rotating_platform:

            if np.linalg.norm(object_pos[:2]) > 0.12:  # FIXME
                far_end_pose = np.r_[
                    self._raw_env.sim.data.
                    get_site_xpos('rotating_platform:far_end'),
                    tf.rotations.mat2quat(
                        self._raw_env.sim.data.
                        get_site_xmat('rotating_platform:far_end')), ]

                close_end_pose = tf.apply_tf(
                    np.r_[-0.5, -0.02, 0., 1., 0., 0., 0.], far_end_pose)
                tf.render_pose(close_end_pose, self._raw_env.viewer)

                push_target = np.r_[0.2, 0.2, 0., 1., 0., 0., 0.]
                # push_target = tf.apply_tf(np.r_[0., 0.1, 0., 1., 0., 0., 0.], close_end_pose)
                tf.render_pose(push_target, self._raw_env.viewer)
                err = close_end_pose[:3] - grasp_center_pos

                if self._phase == 0:
                    if np.linalg.norm(err) > 0.01:
                        u[0] = 1.
                        u[1:4] = err * 5.0
                    else:
                        self._phase += 1
                        self._phase_steps = 0

                if self._phase == 1:
                    u[0] = 1.
                    u[1:4] = (push_target[:3] - grasp_center_pos) * 0.5
                    self._phase_steps += 1
                    if self._phase_steps > 80:
                        self._phase += 1
                        self._phase_steps = 0

            elif self._phase < 2:
                self._phase = 2
                self._phase_steps = 0

        elif self._phase < 2:
            self._phase = 2
            self._phase_steps = 0

        if self._phase == 2:
            if np.linalg.norm(object_rel_pos) > 0.01:
                u[0] = 0.0
                u[1:4] = object_rel_pos * 5.0
            else:
                self._phase += 1
                self._phase_steps = 0

        if self._phase == 3:
            if len(c_points) < 3:
                u[0] = -self._phase_steps / 10.0
                self._phase_steps += 1
            else:
                self._phase += 1
                self._phase_steps = 0

        if self._phase == 4:
            if len(c_points) > 2:
                u[0] = -1.0
                u[1:4] = (new_goal - object_pos) * 2.0
            else:
                self._phase = 0
                self._phase_steps = 0
        return u
Ejemplo n.º 8
0
    def predict(self, obs, **kwargs):

        goal = obs['desired_goal']
        if self._goal is None or np.any(goal != self._goal):
            self._goal = goal.copy()
            self._phase = -1
            self._last_disp = None

        grp_pos = obs['observation'][:3]
        object_pos = obs['observation'][3:6]
        # object_rel_pos = obs['observation'][6:9]

        raw_env = self._env.unwrapped
        table_tf = raw_env.get_table_surface_pose()

        # button_tf = tf.get_tf(
        #     np.r_[raw_env.sim.data.get_geom_xpos('button_geom'), 1., 0., 0., 0.],
        #     table_tf
        # )
        # print('button_tf', button_tf)

        if raw_env.has_button and self._phase < 0:
            u = np.zeros(4)
            if np.linalg.norm(object_pos[:2] - table_tf[:2]) > 0.38:
                button_pos = raw_env.sim.data.get_geom_xpos('button_geom')
                tf.render_pose(np.r_[button_pos, 1., 0., 0., 0.],
                               raw_env.viewer)
                err = button_pos - grp_pos
                u[3] = -1.
                u[:3] = err * 5.0
                return u
            else:
                self._phase = 0
        elif self._phase < 0:
            self._phase = 0

        disp = self._goal[:2] - object_pos[:2]
        if self._last_disp is None or np.linalg.norm(disp) > 0.05:
            self._last_disp = disp.copy()

        disp_dir = self._last_disp / np.linalg.norm(self._last_disp)

        push_goal_pos = object_pos - np.r_[disp_dir, 0.] * 0.06

        if self._phase == 0:
            push_goal_pos[2] = 0.5
        elif self._phase == 2:
            push_goal_pos[:2] += np.clip(disp * 1.7, -0.08, 0.08)

        grp_disp = push_goal_pos - grp_pos

        action = np.zeros(4)
        action[-1] = -1.0

        if np.linalg.norm(disp) < 0.01:
            return action

        action[:2] = grp_disp[:2] * 8.
        action[2] = grp_disp[2] * 8.

        if np.linalg.norm(grp_disp) < 0.01:
            if self._phase == 0:
                self._phase = 1
            elif self._phase == 1:
                self._phase = 2

        return action
Ejemplo n.º 9
0
    def predict(self, obs, **kwargs):

        # Modified from original implementation in OpenAI's baselines:
        # https://github.com/openai/baselines/blob/master/baselines/her/experiment/data_generation/fetch_data_generation.py

        goal = obs['desired_goal']
        if self._goal is None or np.any(goal != self._goal):
            self._goal = goal.copy()
            self._phase = -1
            self._phase_steps = 0

        grasp_center_pos = obs['observation'][:3]
        object_pos = obs['observation'][3:6]
        object_rel_pos = obs['observation'][6:9]

        object_oriented_goal = object_rel_pos.copy()
        object_oriented_goal[
            2] += 0.03  # first make the gripper go slightly above the object

        raw_env = self._env.unwrapped
        table_tf = raw_env.get_table_surface_pose()

        if raw_env.has_button and self._phase < 2:
            u = np.zeros(4)
            if np.linalg.norm(object_pos[:2] - table_tf[:2]) > 0.38:
                button_pos = raw_env.sim.data.get_geom_xpos('button_geom')
                tf.render_pose(np.r_[button_pos, 1., 0., 0., 0.],
                               raw_env.viewer)
                err = button_pos - grasp_center_pos
                u[3] = -1.
                u[:3] = err * 5.0
                return u
            else:
                u[3] = 0.
                u[2] = 0.5
                self._phase_steps += 1
                if self._phase_steps > 5:
                    self._phase = 2
                    self._phase_steps = 0
                else:
                    return u

        elif raw_env.has_rotating_platform:

            if np.linalg.norm(object_pos[:2] - table_tf[:2]) > 0.22:  # FIXME
                far_end_pose = np.r_[
                    raw_env.sim.data.get_site_xpos('rotating_platform:far_end'
                                                   ),
                    tf.rotations.mat2quat(
                        raw_env.sim.data.
                        get_site_xmat('rotating_platform:far_end')), ]

                close_end_pose = tf.apply_tf(
                    np.r_[-0.5, -0.09, 0., 1., 0., 0., 0.], far_end_pose)
                tf.render_pose(close_end_pose,
                               raw_env.viewer,
                               label="close_end")

                # push_target = np.r_[0.2, 0.2, 0., 1., 0., 0., 0.]
                # push_target = tf.apply_tf(table_tf, push_target)
                push_target = tf.apply_tf(np.r_[0., 0.1, 0., 1., 0., 0., 0.],
                                          close_end_pose)
                tf.render_pose(push_target, raw_env.viewer)

                if self._phase == -1:
                    close_end_pose[2] += 0.05

                err = close_end_pose[:3] - grasp_center_pos
                u = np.zeros(4)

                if self._phase < 1:
                    if np.linalg.norm(err) > 0.015:
                        u[3] = -1.
                        u[:3] = err * 20.0
                    else:
                        self._phase += 1
                        self._phase_steps = 0

                if self._phase == 1:
                    u[3] = -1.
                    u[:3] = (push_target[:3] - grasp_center_pos) * 10
                    self._phase_steps += 1
                    if self._phase_steps > 80:
                        self._phase += 1
                        self._phase_steps = 0

                return u

            elif self._phase < 2:
                self._phase = 2
                self._phase_steps = 0

        elif self._phase < 2:
            self._phase = 2
            self._phase_steps = 0

        if np.abs(table_tf[2] -
                  grasp_center_pos[2]) < 0.07 and self._phase == 2:
            action = [0, 0, 0.4, 0.05]
            return action
        elif self._phase == 2:
            self._phase = 3
            self._phase_steps = 0

        if np.linalg.norm(object_oriented_goal) >= 0.005 and self._phase == 3:
            action = [0, 0, 0, 0]
            object_oriented_goal = object_rel_pos.copy()
            object_oriented_goal[2] += 0.03
            for i in range(len(object_oriented_goal)):
                action[i] = object_oriented_goal[i] * 6
            action[len(action) - 1] = 0.05
            return action
        elif self._phase == 3:
            self._phase = 4
            self._phase_steps = 0

        if np.linalg.norm(object_rel_pos) >= 0.005 and self._phase == 4:
            action = [0, 0, 0, 0]
            for i in range(len(object_rel_pos)):
                action[i] = object_rel_pos[i] * 6
            action[len(action) - 1] = -0.2
            return action
        elif self._phase == 4:
            self._phase = 5
            self._phase_steps = 0

        if np.linalg.norm(goal - object_pos) >= 0.01 and self._phase == 5:
            action = [0, 0, 0, 0]
            for i in range(len(goal - object_pos)):
                action[i] = (goal - object_pos)[i] * 6
            action[len(action) - 1] = -0.2
            return action
        elif self._phase == 5:
            self._phase = 6
            self._phase_steps = 0

        if self._phase == 6:
            action = [0, 0, 0, 0]
            action[len(action) - 1] = -0.2  # keep the gripper closed
            return action
Ejemplo n.º 10
0
    def _step(self, action: np.ndarray):
        action = np.clip(action, self.action_space.low, self.action_space.high)
        fingers_pos_wrt_obj = action[:(3 * 5)].reshape(
            -1, 3) * np.r_[0.03, 0.03, 0.03]

        if self.task == HandSteppedTask.LIFT_ABOVE_TABLE:
            arm_pos_wrt_world = np.zeros(3)
        elif self.task == HandSteppedTask.PICK_AND_PLACE:
            arm_pos_wrt_world = action[(3 * 5):]
        else:
            raise NotImplementedError

        arm_bounds = np.array(self.sim_env.forearm_bounds).T
        if self.render_substeps:
            tf.render_box(self.viewer, bounds=arm_bounds)

        arm_pos_wrt_world *= np.abs(arm_bounds[:, 1] - arm_bounds[:, 0]) / 2.0
        arm_pos_wrt_world += arm_bounds.mean(axis=1)

        obj_pose = self.sim_env._get_object_pose()
        obj_on_ground = obj_pose[2] < 0.37

        fingers_pos_targets = np.array([
            tf.apply_tf(np.r_[transf, 1., 0., 0., 0.], obj_pose)
            for transf in fingers_pos_wrt_obj
        ])

        self.sim.model.eq_active[1:] = 0
        # self.sim.data.mocap_pos[1:] = fingers_pos_targets[:, :3]

        if self.render_substeps:
            tf.render_pose(arm_pos_wrt_world.copy(),
                           self.sim_env.viewer,
                           label='arm_t')
            # for i, f in enumerate(fingers_pos_targets):
            #     tf.render_pose(f, self.sim_env.viewer, label=f'f_{i}')

        pregrasp_palm_target = fingers_pos_targets[:, :3].mean(axis=0)
        pregrasp_palm_target = np.r_[pregrasp_palm_target, 1., 0., 0., 0.]
        pregrasp_palm_target = tf.apply_tf(
            np.r_[-0.01, 0., 0.015, 1., 0., 0., 0.], pregrasp_palm_target)[:3]

        # move hand
        if len(self.sim_env.get_object_contact_points(
                other_body='robot')) == 0:
            hand_action = np.r_[0., -.5, -np.ones(18)]
            hand_action[15:] = (-1., -0.5, 1., -1., 0)
            self._move_arm(pregrasp_palm_target, hand_action=hand_action)

        # move fingers
        self._move_fingers(fingers_pos_targets, max_steps=30)

        # move arm
        stable_steps = self._move_arm(arm_pos_wrt_world,
                                      count_stable_steps=True)

        done = obj_on_ground
        obs = self._get_obs()

        if self.task == HandSteppedTask.LIFT_ABOVE_TABLE:
            info = dict()
            reward = stable_steps
        elif self.task == HandSteppedTask.PICK_AND_PLACE:
            info = {
                'is_success':
                self.sim_env._is_success(obs['achieved_goal'], self.goal)
            }
            dist_to_goal = np.linalg.norm(self.goal[:3] -
                                          self.sim_env._get_object_pose()[:3])
            dist_reward = 100 / (
                1 + dist_to_goal * _smooth_step(dist_to_goal) * 100
            )  # range: [0, 100]
            alpha = 0.25
            reward = dist_reward * alpha + stable_steps * (1 - alpha)
        else:
            raise NotImplementedError

        return obs, reward, done, info
Ejemplo n.º 11
0
    def predict(self, obs, **kwargs):

        if self._goal is None or np.any(self._goal != obs['desired_goal']):
            self._reset(obs)

        wrist_noise = 0.
        arm_pos_noise = 0.
        fingers_noise = 0.

        strategy = self._strategy
        action = np.zeros(self._env.action_space.shape)
        obj_pos = obs['achieved_goal'][:3]
        d = obj_pos - self._env.unwrapped._get_grasp_center_pose(
            no_rot=True)[:3]
        reached = np.linalg.norm(d) < 0.05
        on_palm = False
        dropped = obj_pos[2] < 0.38

        if dropped:
            return action * 0.0

        if reached:
            contacts = self._env.unwrapped.get_object_contact_points()
            palm_contacts = len([
                x for x in contacts
                if 'palm' in x['body1'] or 'palm' in x['body2']
            ])
            on_palm = palm_contacts > 0

        if strategy == 0:
            wrist_ctrl = -1.0
            self._hand_ctrl[:] = -1.0

            self._hand_ctrl[[0, 3]] = 1.0
            self._hand_ctrl[[6, 9]] = -1.0

            self._hand_ctrl[13:] = (-1., -0.5, 1., -1., 0)

            d += np.r_[0., -0.030, 0.0]
            still = np.linalg.norm(d - self._prev_d) < 0.002
            if self._grasp_steps > 10:
                still = np.linalg.norm(d - self._prev_d) < 0.005
            self._prev_d = d.copy()

            arm_pos_ctrl = d * 1.0
            if on_palm or still:

                self._hand_ctrl[:] = 1.0
                self._hand_ctrl[[0, 3]] = 1.0
                self._hand_ctrl[[6, 9]] = -1.0
                self._hand_ctrl[13:] = (0.1, 0.5, 1., -1., 0)

                arm_pos_ctrl *= 0.0
                self._grasp_steps += 1
                if self._grasp_steps > 10:
                    d = obs['desired_goal'][:3] - obs['achieved_goal'][:3]
                    arm_pos_ctrl = d * 0.5
            else:
                self._grasp_steps = 0

        elif strategy in [1, 2]:

            d += np.r_[0., -0.035, 0.025]
            reached = np.linalg.norm(d) < 0.02
            if self._grasp_steps > 10:
                reached = np.linalg.norm(d) < 0.04

            wrist_ctrl = 0.0
            self._hand_ctrl[:] = -1.0
            self._hand_ctrl[13:] = (-1., 1., 1., -1., -1.)
            arm_pos_ctrl = d * 1.0
            if reached:
                arm_pos_ctrl *= 0.0
                self._grasp_steps += 1

                if strategy == 1:
                    self._hand_ctrl[13:] = (-0.5, 1., 1., -1., -1.)
                    self._hand_ctrl[4] = 0.6
                    self._hand_ctrl[5] = 0.5

                if strategy == 2:
                    self._hand_ctrl[13:] = (-0.1, 1., 1., -1., -1.)
                    self._hand_ctrl[6] = -0.7
                    self._hand_ctrl[7] = 0.6
                    self._hand_ctrl[8] = 0.5

                if self._grasp_steps > 10:
                    d = obs['desired_goal'][:3] - obs['achieved_goal'][:3]
                    arm_pos_ctrl = d * 0.5
            else:
                self._grasp_steps = 0

        elif strategy == 3:
            wrist_ctrl = -0.5
            d = np.zeros(3)
            self._hand_ctrl[:] = -1.0
            self._hand_ctrl[13:] = (0.2, -0.2, 1., -1., 1.)

            obj_pose = self._env.unwrapped._get_object_pose()
            thdistal_pos = self._env.unwrapped.sim.data.get_body_xpos(
                'robot0:thdistal')
            grasp_pose = tf.apply_tf(
                np.r_[0.015, -0.10, 0.075, 1., 0., 0., 0.], obj_pose)
            pregrasp_pose = tf.apply_tf(np.r_[-0.08, 0., 0., 1., 0., 0., 0.],
                                        grasp_pose)

            viewer = self._env.unwrapped.viewer
            if viewer is not None:
                tf.render_pose(obj_pose, viewer, size=0.4)
                tf.render_pose(grasp_pose, viewer, size=0.2)
                tf.render_pose(pregrasp_pose, viewer, size=0.2)

            k = 2.0
            d_thresh = 0.008
            wrist_noise = 0.1
            arm_pos_noise = 0.01
            fingers_noise = 0.2

            if self._phase == 0:
                d = pregrasp_pose[:3] - thdistal_pos
                d[2] = 0.0
                arm_pos_noise = 0.05
                d_thresh = 0.02
            elif self._phase == 1:
                d = pregrasp_pose[:3] - thdistal_pos
                arm_pos_noise = 0.01
                d_thresh = 0.01
            elif self._phase == 2:
                d = grasp_pose[:3] - thdistal_pos
                fingers_noise = 0.05
            elif self._phase == 3:
                fingers_noise = 0.05
                self._hand_ctrl[:] = 1.0
                self._hand_ctrl[13:] = (0.1, 0.5, 1., -1., 0)
                self._grasp_steps += 1
                if self._grasp_steps > 5:
                    d = obs['desired_goal'][:3] - obs['achieved_goal'][:3]

            if self._phase < 3 and np.linalg.norm(d) < d_thresh:
                self._phase += 1
                self._grasp_steps = 0

            arm_pos_ctrl = d * k

        else:
            raise NotImplementedError

        action[1] = wrist_ctrl + np.random.randn() * wrist_noise
        action[-7:-4] = arm_pos_ctrl + np.random.randn(
            *arm_pos_ctrl.shape) * arm_pos_noise
        action[2:-7] = self._hand_ctrl + np.random.randn(
            *self._hand_ctrl.shape) * fingers_noise
        action = np.clip(action, self._env.action_space.low,
                         self._env.action_space.high)
        return action