예제 #1
0
    def _align_goal(self):

        tf_to_goal = tf.get_tf(np.r_[self._goal, 1., 0., 0., 0.],
                               self.s_table_tf)
        t_goal_pose = tf.apply_tf(tf_to_goal, self.t_table_tf)

        self.teacher_env.unwrapped.goal[:3] = t_goal_pose[:3]

        if self._env.unwrapped.sim_env.has_button:
            pos = self._env.unwrapped.sim_env._object_xy_pos_to_sync.copy()
            self.teacher_env.unwrapped.sync_object_init_pos(pos,
                                                            wrt_table=True,
                                                            now=False)

        elif not self._env.unwrapped.sim_env.has_rotating_platform:
            tf_to_obj = tf.get_tf(self._env.unwrapped.get_object_pose(),
                                  self.s_table_tf)
            t_obj_pose = tf.apply_tf(tf_to_obj, self.t_table_tf)

            joint_name = self.teacher_obj_name + ":joint"
            object_pos = self.teacher_env.unwrapped.sim.data.get_joint_qpos(
                joint_name).copy()
            object_pos[:2] = t_obj_pose[:2]
            self.teacher_env.unwrapped.sim.data.set_joint_qpos(
                joint_name, object_pos)
            self.teacher_env.unwrapped.sim.forward()

        self.teacher_agent.reset()
예제 #2
0
    def _sync_goals(*, t_env, s_env, **kwargs_):
        tf_to_goal = tf.get_tf(np.r_[s_env.goal, 1., 0., 0., 0.], s_table_tf)
        t_goal_pose = tf.apply_tf(tf_to_goal, t_table_tf)

        t_env.goal = np.r_[t_goal_pose[:3], np.zeros(4)]

        tf_to_obj = tf.get_tf(s_env.get_object_pose(), s_table_tf)
        t_obj_pose = tf.apply_tf(tf_to_obj, t_table_tf)

        object_pos = t_env.sim.data.get_joint_qpos('object:joint').copy()
        object_pos[:2] = t_obj_pose[:2]
        t_env.sim.data.set_joint_qpos('object:joint', object_pos)
        t_env.sim.forward()

        return dict(s_obs=s_env._get_obs(), t_obs=t_env._get_obs())
예제 #3
0
 def sync_goal(self, goal: np.ndarray, wrt_table=False):
     goal = goal.copy()
     if wrt_table:
         if goal.size == 3:
             goal = np.r_[goal, 1., 0., 0., 0.]
         goal = tf.apply_tf(goal, self.get_table_surface_pose())
         self.goal = goal[:3]
     else:
         self.goal = goal[:3]
예제 #4
0
파일: yumi_env.py 프로젝트: contactrika/gym
    def sync_object_init_pos(self,
                             pos: np.ndarray,
                             wrt_table=False,
                             now=False):
        assert pos.size == 2
        if wrt_table:
            pose = tf.apply_tf(np.r_[pos, 0., 1., 0., 0., 0.],
                               self.get_table_surface_pose())
            self._object_xy_pos_to_sync = pose[:2]
        else:
            self._object_xy_pos_to_sync = pos.copy()

        if now:
            object_qpos = self.sim.data.get_joint_qpos('object0:joint').copy()
            object_qpos[:2] = self._object_xy_pos_to_sync
            self.sim.data.set_joint_qpos('object0:joint', object_qpos)
            self.sim.forward()
예제 #5
0
    def step(self, action: np.ndarray):
        def failure():
            return self._get_obs(), 0.0, True, dict()

        action = np.clip(action, self.action_space.low, self.action_space.high)
        action = action.reshape(-1, 4)
        grippers_pos_wrt_obj = action[:, :3] * self.get_task_space()
        grippers_conf = action[:, 3]

        obj_pose = self.get_object_pose()
        grippers_pos_targets = np.array([
            tf.apply_tf(np.r_[transf, 1., 0., 0., 0.], obj_pose)[:3]
            for transf in grippers_pos_wrt_obj
        ])

        vec = grippers_pos_targets[0, :2] - grippers_pos_targets[1, :2]
        grasp_radius = np.linalg.norm(vec, ord=2) / 2.0
        right_yaw = np.arctan2(vec[1], vec[0]) + np.pi / 2
        left_yaw = np.arctan2(-vec[1], -vec[0]) + np.pi / 2

        left_target_pose = np.r_[grippers_pos_targets[0],
                                 tf.rotations.euler2quat(np.r_[0, 0,
                                                               left_yaw])]

        right_target_pose = np.r_[grippers_pos_targets[1],
                                  tf.rotations.euler2quat(np.r_[0, 0,
                                                                right_yaw])]

        # - move arms to pregrasp1 pose
        # targets are farther and higher wrt object center
        max_pos_err = self._move_arms(
            left_target=tf.apply_tf(np.r_[0., 0.05, 0.1],
                                    left_target_pose)[:3],
            left_yaw=left_yaw,
            right_target=tf.apply_tf(np.r_[0., 0.05, 0.1],
                                     right_target_pose)[:3],
            right_yaw=right_yaw,
            left_grp_config=grippers_conf[0],
            right_grp_config=grippers_conf[1],
            max_steps=120,
        )

        if max_pos_err > 0.05 or self.is_object_unreachable():
            return failure()

        # - move arms to pregrasp2 pose
        # targets are farther and but aligned to object center
        max_pos_err = self._move_arms(
            left_target=tf.apply_tf(np.r_[0., 0.05, 0.01],
                                    left_target_pose)[:3],
            left_yaw=left_yaw,
            right_target=tf.apply_tf(np.r_[0., 0.05, 0.01],
                                     right_target_pose)[:3],
            right_yaw=right_yaw,
            left_grp_config=grippers_conf[0],
            right_grp_config=grippers_conf[1],
            max_steps=50,
        )

        if max_pos_err > 0.05 or self.is_object_unreachable():
            return failure()

        # - move arms to grasp pose
        # targets are as specified by the agent
        self._move_arms(
            left_target=tf.apply_tf(np.r_[0., 0., 0.01], left_target_pose)[:3],
            left_yaw=left_yaw,
            right_target=tf.apply_tf(np.r_[0., 0., 0.01],
                                     right_target_pose)[:3],
            right_yaw=right_yaw,
            left_grp_config=grippers_conf[0],
            right_grp_config=grippers_conf[1],
            max_steps=40,
        )

        if self.is_object_unreachable():
            return failure()

        # - lift
        # targets are as specified by the agent but a bit higher to go up
        stable_steps = self._move_arms(
            left_target=np.r_[0., grasp_radius * 0.9, 0.05],
            left_yaw=left_yaw,
            right_target=np.r_[0., -grasp_radius * 0.9, 0.05],
            right_yaw=right_yaw,
            count_stable_steps=True,
            targets_relative_to=self.get_object_pos,
            left_grp_config=grippers_conf[0],
            right_grp_config=grippers_conf[1],
            max_steps=200,
        )

        obs = self._get_obs()
        reward = float(stable_steps)
        done = self.is_object_unreachable()
        info = dict()
        return obs, reward, done, info
예제 #6
0
    def _move_arms(self,
                   *,
                   left_target: np.ndarray,
                   right_target: np.ndarray,
                   left_yaw=0.0,
                   right_yaw=0.0,
                   pos_threshold=0.02,
                   rot_threshold=0.1,
                   k=2.0,
                   max_steps=100,
                   count_stable_steps=False,
                   targets_relative_to=None,
                   left_grp_config=-1.0,
                   right_grp_config=-1.0):

        targets = {'l': left_target, 'r': right_target}
        yaws = {'l': left_yaw, 'r': right_yaw}
        stable_steps = 0
        prev_rel_pos = np.zeros(3)
        u = np.zeros(self.sim_env.action_space.shape)
        prev_err_l = np.zeros(7)
        prev_err_r = np.zeros(7)
        max_pos_err = -np.inf

        for i in range(max_steps):

            grasp_center_pos = np.zeros(3)
            max_rot_err = -np.inf
            max_pos_err = -np.inf

            for arm_i, arm in enumerate(('l', 'r')):

                curr_pose = self.get_gripper_pose(arm)
                curr_q = self.get_arm_config(arm)

                if arm == 'l':
                    pitch = np.pi - 0.9
                    u_masked = u[:7]
                    prev_err = prev_err_l
                else:
                    pitch = np.pi - 0.9
                    u_masked = u[8:15]
                    prev_err = prev_err_r

                if callable(targets_relative_to):
                    reference = targets_relative_to()
                    target_pos = tf.apply_tf(targets[arm], reference)[:3]
                else:
                    target_pos = targets[arm]

                target_pose = np.r_[target_pos,
                                    tf.rotations.euler2quat(np.r_[0., 0.,
                                                                  yaws[arm]])]
                target_pose = tf.apply_tf(
                    np.r_[0., 0., 0.,
                          tf.rotations.euler2quat(np.r_[pitch, 0., 0.])],
                    target_pose)

                grasp_center_pos += curr_pose[:3]
                max_pos_err = max(
                    max_pos_err,
                    np.abs(curr_pose[:3] - target_pose[:3]).max())
                max_rot_err = max(
                    max_rot_err,
                    tf.quat_angle_diff(curr_pose[3:], target_pose[3:]))

                target_q = self.sim_env.mocap_ik(target_pose - curr_pose, arm)
                u_masked[:] = self._controller(curr_q - target_q, prev_err, k)

                if self.render_substeps:
                    tf.render_pose(target_pos.copy(),
                                   self.viewer,
                                   label=f"{arm}_p",
                                   unique_label=True)
                    tf.render_pose(target_pose.copy(),
                                   self.viewer,
                                   label=f"{arm}_t",
                                   unique_label=True)
                    tf.render_pose(curr_pose.copy(),
                                   self.viewer,
                                   label=f"{arm}",
                                   unique_label=True)

            grasp_center_pos /= 2.0

            u[7] = left_grp_config
            u[15] = right_grp_config
            u = np.clip(u, self.sim_env.action_space.low,
                        self.sim_env.action_space.high)
            self.sim_env.step(u)

            if self.render_substeps:
                tf.render_pose(grasp_center_pos,
                               self.viewer,
                               label="grasp_center",
                               unique_id=5554)
                self.render(keep_markers=True)

            if max_pos_err < pos_threshold and max_rot_err < rot_threshold:
                break

            if count_stable_steps:
                obj_pos = self.get_object_pos()
                rel_pos = obj_pos - grasp_center_pos
                still = prev_rel_pos is not None and np.all(
                    np.abs(rel_pos - prev_rel_pos) < 0.002)
                obj_above_table = len(
                    self.sim_env.get_object_contact_points(
                        other_body='table')) == 0
                if still and obj_above_table:
                    stable_steps += 1
                elif i > 10:
                    break
                prev_rel_pos = rel_pos

        if count_stable_steps:
            return stable_steps

        return max_pos_err
예제 #7
0
    def _move_arms_mocap(self,
                         *,
                         left_target: np.ndarray,
                         right_target: np.ndarray,
                         left_yaw=0.0,
                         right_yaw=0.0,
                         pos_threshold=0.02,
                         rot_threshold=0.1,
                         k=1.0,
                         max_steps=1,
                         left_grp_config=-1.0,
                         right_grp_config=-1.0):

        targets = {'l': left_target, 'r': right_target}
        yaws = {'l': left_yaw, 'r': right_yaw}
        self.sim.model.eq_active[:] = 1

        for i in range(max_steps):

            grasp_center_pos = np.zeros(3)
            max_rot_err = -np.inf
            max_pos_err = -np.inf

            d_above_table = self.get_object_pos(
            )[2] - self.sim_env._object_z_offset
            grp_xrot = 0.9 + d_above_table * 2.0

            mocap_a = np.zeros((self.sim.model.nmocap, 7))

            for arm_i, arm in enumerate(('l', 'r')):

                curr_pose = self.get_gripper_pose(arm)
                pitch = np.pi - grp_xrot

                target_pos = targets[arm]
                target_pose = np.r_[target_pos,
                                    tf.rotations.euler2quat(np.r_[0., 0.,
                                                                  yaws[arm]])]
                target_pose = tf.apply_tf(
                    np.r_[0., 0., 0.,
                          tf.rotations.euler2quat(np.r_[pitch, 0., 0.])],
                    target_pose)

                grasp_center_pos += curr_pose[:3]
                if max_steps > 1:
                    max_pos_err = max(
                        max_pos_err,
                        np.abs(curr_pose[:3] - target_pose[:3]).max())
                    max_rot_err = max(
                        max_rot_err,
                        tf.quat_angle_diff(curr_pose[3:], target_pose[3:]))

                mocap_a[arm_i] = target_pose - curr_pose

                if self.viewer is not None and self.render_poses:
                    tf.render_pose(target_pos.copy(),
                                   self.viewer,
                                   label=f"{arm}_p",
                                   unique_label=True)
                    tf.render_pose(target_pose.copy(),
                                   self.viewer,
                                   label=f"{arm}_t",
                                   unique_label=True)
                    tf.render_pose(curr_pose.copy(),
                                   self.viewer,
                                   label=f"{arm}",
                                   unique_label=True)

            grasp_center_pos /= 2.0

            # set config of grippers
            u = np.zeros(self.sim_env.action_space.shape)
            u[7] = left_grp_config
            u[15] = right_grp_config
            self.sim_env._set_action(u)

            # set pose of grippers
            _mocap_set_action(self.sim, mocap_a * k)

            # take simulation step
            self.sim.step()
            self.sim_env._step_callback()

            if self.viewer is not None:
                if self.render_poses:
                    tf.render_pose(grasp_center_pos,
                                   self.viewer,
                                   label="grasp_center",
                                   unique_id=5554)
                self.render(keep_markers=True)

            if max_pos_err < pos_threshold and max_rot_err < rot_threshold:
                break
예제 #8
0
    def step(self, action: np.ndarray):

        action = np.clip(action, self.action_space.low, self.action_space.high)

        (dist_between_grippers, grasp_center_pos_delta,
         grasp_center_quat_delta, l_fingers_ctrl,
         r_fingers_ctrl) = self._unpack_action(action)

        if self.mocap_ctrl:
            pos_k = 0.1
            dist_range = [0.05, 0.20]
        else:
            pos_k = 0.2
            dist_range = [0.05, 0.30]

        dist_between_grippers = np.interp(dist_between_grippers, [-1, 1],
                                          dist_range)

        curr_grasp_center_pose = self.get_grasp_center_pos(
        )  # TODO: Use pose rather than position only
        target_grasp_center_pose = np.r_[0., 0., 0., 1., 0., 0., 0.]

        target_grasp_center_pose[:
                                 3] = curr_grasp_center_pose[:
                                                             3] + grasp_center_pos_delta * pos_k
        if self.rotation_ctrl_enabled:
            # target_grasp_center_pose[3:] = curr_grasp_center_pose[3:] + grasp_center_quat_delta * 0.1
            raise NotImplementedError

        grippers_pos_targets = np.array([
            tf.apply_tf(
                np.r_[0., dist_between_grippers / 2.0, 0., 1., 0., 0., 0.],
                target_grasp_center_pose)[:3],
            tf.apply_tf(
                np.r_[0., -dist_between_grippers / 2.0, 0., 1., 0., 0., 0.],
                target_grasp_center_pose)[:3]
        ])

        vec = grippers_pos_targets[0, :2] - grippers_pos_targets[1, :2]
        right_yaw = np.arctan2(vec[1], vec[0]) + np.pi / 2
        left_yaw = np.arctan2(-vec[1], -vec[0]) + np.pi / 2

        grasp_radius = np.linalg.norm(vec, ord=2) / 2.0
        assert np.allclose(grasp_radius, dist_between_grippers / 2.0)

        if self.mocap_ctrl:
            self._move_arms_mocap(
                left_target=grippers_pos_targets[0],
                left_yaw=left_yaw,
                right_target=grippers_pos_targets[1],
                right_yaw=right_yaw,
                left_grp_config=l_fingers_ctrl,
                right_grp_config=r_fingers_ctrl,
                max_steps=1,
            )
        else:
            self._move_arms(
                left_target=grippers_pos_targets[0],
                left_yaw=left_yaw,
                right_target=grippers_pos_targets[1],
                right_yaw=right_yaw,
                left_grp_config=l_fingers_ctrl,
                right_grp_config=r_fingers_ctrl,
                max_steps=5,
            )

        obs = self._get_obs()
        done = False  # self.is_object_unreachable()
        info = {
            'is_success': self._is_success(obs['achieved_goal'], self.goal)
        }
        reward = self.compute_reward(obs['achieved_goal'], self.goal, info)
        return obs, reward, done, info
예제 #9
0
    def predict(self, obs, **kwargs):

        u = np.zeros(self._env.action_space.shape)
        new_goal = obs['desired_goal']
        if self._goal is None or np.any(self._goal != new_goal):
            self.reset(new_goal)

        obj_radius = self._object_size[0]
        obj_achieved_alt = obs['achieved_goal'].item()
        obj_achieved_pose = obs['observation'][44:51]
        if np.all(obj_achieved_pose[3:] == 0):
            obj_achieved_pose[3] = 1.0
        # tf.render_pose(obj_achieved_pose, self._raw_env.viewer, label="O")

        grp_xrot = 0.9 + obj_achieved_alt * 2.0

        curr_grp_poses = {
            a:
            np.r_[self._raw_env.sim.data.get_site_xpos(f'gripper_{a}_center'),
                  rotations.mat2quat(
                      self._raw_env.sim.data.
                      get_site_xmat(f'gripper_{a}_center')), ]
            for a in ('l', 'r')
        }

        pos_errors = []
        for arm in ('l', 'r'):

            if arm == 'l':
                transf = np.r_[0., obj_radius, 0., 1., 0., 0., 0.]
                if self._phase == 3:
                    transf[1] *= 0.9
                    transf[2] = 0.05
                pose = tf.apply_tf(transf, obj_achieved_pose)
                # tf.render_pose(pose, self._raw_env.viewer, label="L")

                grp_target_pos = pose[:3]
                grp_target_rot = np.r_[np.pi - grp_xrot, 0.01, 0.01]
                target_pose = np.r_[grp_target_pos,
                                    rotations.euler2quat(grp_target_rot)]

                prev_err = self._prev_err_l
                curr_q = obs['observation'][:7]
                u_masked = u[:7]

            elif arm == 'r':
                transf = np.r_[0., -obj_radius, 0., 1., 0., 0., 0.]
                if self._phase == 3:
                    transf[1] *= 0.9
                    transf[2] = 0.05
                pose = tf.apply_tf(transf, obj_achieved_pose)
                # tf.render_pose(pose, self._raw_env.viewer, label="R")

                grp_target_pos = pose[:3]
                grp_target_rot = np.r_[-np.pi + grp_xrot, 0.01, np.pi]
                target_pose = np.r_[grp_target_pos,
                                    rotations.euler2quat(grp_target_rot)]

                prev_err = self._prev_err_r
                curr_q = obs['observation'][16:23]
                u_masked = u[8:15]
            else:
                continue

            u[7] = -1.0
            u[15] = -1.0

            if self._phase == 0:
                target_pose[2] += 0.1
                target_pose[1] += 0.05 * np.sign(target_pose[1])
            elif self._phase == 1:
                target_pose[2] += 0.01
                target_pose[1] += 0.05 * np.sign(target_pose[1])
            elif self._phase == 2:
                target_pose[2] += 0.01
            elif self._phase == 3:
                target_pose[2] += 0.00

            if self._raw_env.viewer is not None:
                tf.render_pose(target_pose.copy(), self._raw_env.viewer)

            curr_pose = curr_grp_poses[arm].copy()
            err_pose = curr_pose - target_pose
            err_pos = np.linalg.norm(err_pose[:3])
            pos_errors.append(err_pos)

            controller_k = 2.0
            err_rot = tf.quat_angle_diff(curr_pose[3:], target_pose[3:])
            target_q = self._raw_env.mocap_ik(-err_pose, arm)

            err_q = curr_q - target_q
            u_masked[:] = self._controller(err_q, prev_err, controller_k)

        self._phase_steps += 1
        if self._phase == 0 and np.all(
                np.array(pos_errors) < 0.03) and err_rot < 0.1:
            self._phase = 1
            self._phase_steps = 0
        elif self._phase == 1 and np.all(
                np.array(pos_errors) < 0.03) and err_rot < 0.1:
            self._phase = 2
            self._phase_steps = 0
        elif self._phase == 2:
            if self._phase_steps > 30:
                self._phase = 3
                self._phase_steps = 0

        u = np.clip(u, self._env.action_space.low, self._env.action_space.high)
        return u
예제 #10
0
    def predict(self, obs, **kwargs):

        u = np.zeros(self._env.action_space.shape)
        new_goal = obs['desired_goal']
        if self._goal is None or np.any(self._goal != new_goal):
            self.reset(new_goal)

        grasp_center_pos = obs['observation'][:3]
        object_pos = obs['observation'][18:21]
        object_rel_pos = obs['observation'][24:27]
        c_points = self._raw_env.sim_env.get_object_contact_points()

        # button_tf = tf.get_tf(
        #     np.r_[self._raw_env.sim.data.get_geom_xpos('button_geom'), 1., 0., 0., 0.],
        #     self._raw_env.get_table_surface_pose()
        # )
        # print('button_tf', button_tf)

        if self._raw_env.sim_env.has_button and self._phase < 2:

            if np.linalg.norm(object_pos) > 0.55:
                button_pos = self._raw_env.sim.data.get_geom_xpos(
                    'button_geom')
                tf.render_pose(np.r_[button_pos, 1., 0., 0., 0.],
                               self._raw_env.viewer)
                err = button_pos - grasp_center_pos
                u[0] = -1.
                u[1:4] = err * 5.0
            else:
                u[0] = 0.
                u[3] = 0.5
                self._phase_steps += 1
                if self._phase_steps > 5:
                    self._phase = 2
                    self._phase_steps = 0

        elif self._raw_env.sim_env.has_rotating_platform:

            if np.linalg.norm(object_pos[:2]) > 0.12:  # FIXME
                far_end_pose = np.r_[
                    self._raw_env.sim.data.
                    get_site_xpos('rotating_platform:far_end'),
                    tf.rotations.mat2quat(
                        self._raw_env.sim.data.
                        get_site_xmat('rotating_platform:far_end')), ]

                close_end_pose = tf.apply_tf(
                    np.r_[-0.5, -0.02, 0., 1., 0., 0., 0.], far_end_pose)
                tf.render_pose(close_end_pose, self._raw_env.viewer)

                push_target = np.r_[0.2, 0.2, 0., 1., 0., 0., 0.]
                # push_target = tf.apply_tf(np.r_[0., 0.1, 0., 1., 0., 0., 0.], close_end_pose)
                tf.render_pose(push_target, self._raw_env.viewer)
                err = close_end_pose[:3] - grasp_center_pos

                if self._phase == 0:
                    if np.linalg.norm(err) > 0.01:
                        u[0] = 1.
                        u[1:4] = err * 5.0
                    else:
                        self._phase += 1
                        self._phase_steps = 0

                if self._phase == 1:
                    u[0] = 1.
                    u[1:4] = (push_target[:3] - grasp_center_pos) * 0.5
                    self._phase_steps += 1
                    if self._phase_steps > 80:
                        self._phase += 1
                        self._phase_steps = 0

            elif self._phase < 2:
                self._phase = 2
                self._phase_steps = 0

        elif self._phase < 2:
            self._phase = 2
            self._phase_steps = 0

        if self._phase == 2:
            if np.linalg.norm(object_rel_pos) > 0.01:
                u[0] = 0.0
                u[1:4] = object_rel_pos * 5.0
            else:
                self._phase += 1
                self._phase_steps = 0

        if self._phase == 3:
            if len(c_points) < 3:
                u[0] = -self._phase_steps / 10.0
                self._phase_steps += 1
            else:
                self._phase += 1
                self._phase_steps = 0

        if self._phase == 4:
            if len(c_points) > 2:
                u[0] = -1.0
                u[1:4] = (new_goal - object_pos) * 2.0
            else:
                self._phase = 0
                self._phase_steps = 0
        return u
예제 #11
0
파일: fetch.py 프로젝트: contactrika/gym
    def predict(self, obs, **kwargs):

        # Modified from original implementation in OpenAI's baselines:
        # https://github.com/openai/baselines/blob/master/baselines/her/experiment/data_generation/fetch_data_generation.py

        goal = obs['desired_goal']
        if self._goal is None or np.any(goal != self._goal):
            self._goal = goal.copy()
            self._phase = -1
            self._phase_steps = 0

        grasp_center_pos = obs['observation'][:3]
        object_pos = obs['observation'][3:6]
        object_rel_pos = obs['observation'][6:9]

        object_oriented_goal = object_rel_pos.copy()
        object_oriented_goal[
            2] += 0.03  # first make the gripper go slightly above the object

        raw_env = self._env.unwrapped
        table_tf = raw_env.get_table_surface_pose()

        if raw_env.has_button and self._phase < 2:
            u = np.zeros(4)
            if np.linalg.norm(object_pos[:2] - table_tf[:2]) > 0.38:
                button_pos = raw_env.sim.data.get_geom_xpos('button_geom')
                tf.render_pose(np.r_[button_pos, 1., 0., 0., 0.],
                               raw_env.viewer)
                err = button_pos - grasp_center_pos
                u[3] = -1.
                u[:3] = err * 5.0
                return u
            else:
                u[3] = 0.
                u[2] = 0.5
                self._phase_steps += 1
                if self._phase_steps > 5:
                    self._phase = 2
                    self._phase_steps = 0
                else:
                    return u

        elif raw_env.has_rotating_platform:

            if np.linalg.norm(object_pos[:2] - table_tf[:2]) > 0.22:  # FIXME
                far_end_pose = np.r_[
                    raw_env.sim.data.get_site_xpos('rotating_platform:far_end'
                                                   ),
                    tf.rotations.mat2quat(
                        raw_env.sim.data.
                        get_site_xmat('rotating_platform:far_end')), ]

                close_end_pose = tf.apply_tf(
                    np.r_[-0.5, -0.09, 0., 1., 0., 0., 0.], far_end_pose)
                tf.render_pose(close_end_pose,
                               raw_env.viewer,
                               label="close_end")

                # push_target = np.r_[0.2, 0.2, 0., 1., 0., 0., 0.]
                # push_target = tf.apply_tf(table_tf, push_target)
                push_target = tf.apply_tf(np.r_[0., 0.1, 0., 1., 0., 0., 0.],
                                          close_end_pose)
                tf.render_pose(push_target, raw_env.viewer)

                if self._phase == -1:
                    close_end_pose[2] += 0.05

                err = close_end_pose[:3] - grasp_center_pos
                u = np.zeros(4)

                if self._phase < 1:
                    if np.linalg.norm(err) > 0.015:
                        u[3] = -1.
                        u[:3] = err * 20.0
                    else:
                        self._phase += 1
                        self._phase_steps = 0

                if self._phase == 1:
                    u[3] = -1.
                    u[:3] = (push_target[:3] - grasp_center_pos) * 10
                    self._phase_steps += 1
                    if self._phase_steps > 80:
                        self._phase += 1
                        self._phase_steps = 0

                return u

            elif self._phase < 2:
                self._phase = 2
                self._phase_steps = 0

        elif self._phase < 2:
            self._phase = 2
            self._phase_steps = 0

        if np.abs(table_tf[2] -
                  grasp_center_pos[2]) < 0.07 and self._phase == 2:
            action = [0, 0, 0.4, 0.05]
            return action
        elif self._phase == 2:
            self._phase = 3
            self._phase_steps = 0

        if np.linalg.norm(object_oriented_goal) >= 0.005 and self._phase == 3:
            action = [0, 0, 0, 0]
            object_oriented_goal = object_rel_pos.copy()
            object_oriented_goal[2] += 0.03
            for i in range(len(object_oriented_goal)):
                action[i] = object_oriented_goal[i] * 6
            action[len(action) - 1] = 0.05
            return action
        elif self._phase == 3:
            self._phase = 4
            self._phase_steps = 0

        if np.linalg.norm(object_rel_pos) >= 0.005 and self._phase == 4:
            action = [0, 0, 0, 0]
            for i in range(len(object_rel_pos)):
                action[i] = object_rel_pos[i] * 6
            action[len(action) - 1] = -0.2
            return action
        elif self._phase == 4:
            self._phase = 5
            self._phase_steps = 0

        if np.linalg.norm(goal - object_pos) >= 0.01 and self._phase == 5:
            action = [0, 0, 0, 0]
            for i in range(len(goal - object_pos)):
                action[i] = (goal - object_pos)[i] * 6
            action[len(action) - 1] = -0.2
            return action
        elif self._phase == 5:
            self._phase = 6
            self._phase_steps = 0

        if self._phase == 6:
            action = [0, 0, 0, 0]
            action[len(action) - 1] = -0.2  # keep the gripper closed
            return action
예제 #12
0
    def _step(self, action: np.ndarray):
        action = np.clip(action, self.action_space.low, self.action_space.high)
        fingers_pos_wrt_obj = action[:(3 * 5)].reshape(
            -1, 3) * np.r_[0.03, 0.03, 0.03]

        if self.task == HandSteppedTask.LIFT_ABOVE_TABLE:
            arm_pos_wrt_world = np.zeros(3)
        elif self.task == HandSteppedTask.PICK_AND_PLACE:
            arm_pos_wrt_world = action[(3 * 5):]
        else:
            raise NotImplementedError

        arm_bounds = np.array(self.sim_env.forearm_bounds).T
        if self.render_substeps:
            tf.render_box(self.viewer, bounds=arm_bounds)

        arm_pos_wrt_world *= np.abs(arm_bounds[:, 1] - arm_bounds[:, 0]) / 2.0
        arm_pos_wrt_world += arm_bounds.mean(axis=1)

        obj_pose = self.sim_env._get_object_pose()
        obj_on_ground = obj_pose[2] < 0.37

        fingers_pos_targets = np.array([
            tf.apply_tf(np.r_[transf, 1., 0., 0., 0.], obj_pose)
            for transf in fingers_pos_wrt_obj
        ])

        self.sim.model.eq_active[1:] = 0
        # self.sim.data.mocap_pos[1:] = fingers_pos_targets[:, :3]

        if self.render_substeps:
            tf.render_pose(arm_pos_wrt_world.copy(),
                           self.sim_env.viewer,
                           label='arm_t')
            # for i, f in enumerate(fingers_pos_targets):
            #     tf.render_pose(f, self.sim_env.viewer, label=f'f_{i}')

        pregrasp_palm_target = fingers_pos_targets[:, :3].mean(axis=0)
        pregrasp_palm_target = np.r_[pregrasp_palm_target, 1., 0., 0., 0.]
        pregrasp_palm_target = tf.apply_tf(
            np.r_[-0.01, 0., 0.015, 1., 0., 0., 0.], pregrasp_palm_target)[:3]

        # move hand
        if len(self.sim_env.get_object_contact_points(
                other_body='robot')) == 0:
            hand_action = np.r_[0., -.5, -np.ones(18)]
            hand_action[15:] = (-1., -0.5, 1., -1., 0)
            self._move_arm(pregrasp_palm_target, hand_action=hand_action)

        # move fingers
        self._move_fingers(fingers_pos_targets, max_steps=30)

        # move arm
        stable_steps = self._move_arm(arm_pos_wrt_world,
                                      count_stable_steps=True)

        done = obj_on_ground
        obs = self._get_obs()

        if self.task == HandSteppedTask.LIFT_ABOVE_TABLE:
            info = dict()
            reward = stable_steps
        elif self.task == HandSteppedTask.PICK_AND_PLACE:
            info = {
                'is_success':
                self.sim_env._is_success(obs['achieved_goal'], self.goal)
            }
            dist_to_goal = np.linalg.norm(self.goal[:3] -
                                          self.sim_env._get_object_pose()[:3])
            dist_reward = 100 / (
                1 + dist_to_goal * _smooth_step(dist_to_goal) * 100
            )  # range: [0, 100]
            alpha = 0.25
            reward = dist_reward * alpha + stable_steps * (1 - alpha)
        else:
            raise NotImplementedError

        return obs, reward, done, info
예제 #13
0
파일: shadow_hand.py 프로젝트: carlo-/gym
    def predict(self, obs, **kwargs):

        if self._goal is None or np.any(self._goal != obs['desired_goal']):
            self._reset(obs)

        wrist_noise = 0.
        arm_pos_noise = 0.
        fingers_noise = 0.

        strategy = self._strategy
        action = np.zeros(self._env.action_space.shape)
        obj_pos = obs['achieved_goal'][:3]
        d = obj_pos - self._env.unwrapped._get_grasp_center_pose(
            no_rot=True)[:3]
        reached = np.linalg.norm(d) < 0.05
        on_palm = False
        dropped = obj_pos[2] < 0.38

        if dropped:
            return action * 0.0

        if reached:
            contacts = self._env.unwrapped.get_object_contact_points()
            palm_contacts = len([
                x for x in contacts
                if 'palm' in x['body1'] or 'palm' in x['body2']
            ])
            on_palm = palm_contacts > 0

        if strategy == 0:
            wrist_ctrl = -1.0
            self._hand_ctrl[:] = -1.0

            self._hand_ctrl[[0, 3]] = 1.0
            self._hand_ctrl[[6, 9]] = -1.0

            self._hand_ctrl[13:] = (-1., -0.5, 1., -1., 0)

            d += np.r_[0., -0.030, 0.0]
            still = np.linalg.norm(d - self._prev_d) < 0.002
            if self._grasp_steps > 10:
                still = np.linalg.norm(d - self._prev_d) < 0.005
            self._prev_d = d.copy()

            arm_pos_ctrl = d * 1.0
            if on_palm or still:

                self._hand_ctrl[:] = 1.0
                self._hand_ctrl[[0, 3]] = 1.0
                self._hand_ctrl[[6, 9]] = -1.0
                self._hand_ctrl[13:] = (0.1, 0.5, 1., -1., 0)

                arm_pos_ctrl *= 0.0
                self._grasp_steps += 1
                if self._grasp_steps > 10:
                    d = obs['desired_goal'][:3] - obs['achieved_goal'][:3]
                    arm_pos_ctrl = d * 0.5
            else:
                self._grasp_steps = 0

        elif strategy in [1, 2]:

            d += np.r_[0., -0.035, 0.025]
            reached = np.linalg.norm(d) < 0.02
            if self._grasp_steps > 10:
                reached = np.linalg.norm(d) < 0.04

            wrist_ctrl = 0.0
            self._hand_ctrl[:] = -1.0
            self._hand_ctrl[13:] = (-1., 1., 1., -1., -1.)
            arm_pos_ctrl = d * 1.0
            if reached:
                arm_pos_ctrl *= 0.0
                self._grasp_steps += 1

                if strategy == 1:
                    self._hand_ctrl[13:] = (-0.5, 1., 1., -1., -1.)
                    self._hand_ctrl[4] = 0.6
                    self._hand_ctrl[5] = 0.5

                if strategy == 2:
                    self._hand_ctrl[13:] = (-0.1, 1., 1., -1., -1.)
                    self._hand_ctrl[6] = -0.7
                    self._hand_ctrl[7] = 0.6
                    self._hand_ctrl[8] = 0.5

                if self._grasp_steps > 10:
                    d = obs['desired_goal'][:3] - obs['achieved_goal'][:3]
                    arm_pos_ctrl = d * 0.5
            else:
                self._grasp_steps = 0

        elif strategy == 3:
            wrist_ctrl = -0.5
            d = np.zeros(3)
            self._hand_ctrl[:] = -1.0
            self._hand_ctrl[13:] = (0.2, -0.2, 1., -1., 1.)

            obj_pose = self._env.unwrapped._get_object_pose()
            thdistal_pos = self._env.unwrapped.sim.data.get_body_xpos(
                'robot0:thdistal')
            grasp_pose = tf.apply_tf(
                np.r_[0.015, -0.10, 0.075, 1., 0., 0., 0.], obj_pose)
            pregrasp_pose = tf.apply_tf(np.r_[-0.08, 0., 0., 1., 0., 0., 0.],
                                        grasp_pose)

            viewer = self._env.unwrapped.viewer
            if viewer is not None:
                tf.render_pose(obj_pose, viewer, size=0.4)
                tf.render_pose(grasp_pose, viewer, size=0.2)
                tf.render_pose(pregrasp_pose, viewer, size=0.2)

            k = 2.0
            d_thresh = 0.008
            wrist_noise = 0.1
            arm_pos_noise = 0.01
            fingers_noise = 0.2

            if self._phase == 0:
                d = pregrasp_pose[:3] - thdistal_pos
                d[2] = 0.0
                arm_pos_noise = 0.05
                d_thresh = 0.02
            elif self._phase == 1:
                d = pregrasp_pose[:3] - thdistal_pos
                arm_pos_noise = 0.01
                d_thresh = 0.01
            elif self._phase == 2:
                d = grasp_pose[:3] - thdistal_pos
                fingers_noise = 0.05
            elif self._phase == 3:
                fingers_noise = 0.05
                self._hand_ctrl[:] = 1.0
                self._hand_ctrl[13:] = (0.1, 0.5, 1., -1., 0)
                self._grasp_steps += 1
                if self._grasp_steps > 5:
                    d = obs['desired_goal'][:3] - obs['achieved_goal'][:3]

            if self._phase < 3 and np.linalg.norm(d) < d_thresh:
                self._phase += 1
                self._grasp_steps = 0

            arm_pos_ctrl = d * k

        else:
            raise NotImplementedError

        action[1] = wrist_ctrl + np.random.randn() * wrist_noise
        action[-7:-4] = arm_pos_ctrl + np.random.randn(
            *arm_pos_ctrl.shape) * arm_pos_noise
        action[2:-7] = self._hand_ctrl + np.random.randn(
            *self._hand_ctrl.shape) * fingers_noise
        action = np.clip(action, self._env.action_space.low,
                         self._env.action_space.high)
        return action