class NutAssemblyDenseHand(gym.Env):
    """
        TODO: change obs and reward setting
        NutAssemblyHand:
            'FetchPickAndPlace-v1' with a perfect controller
            Action taken as:
                pi_theta(s) = pi(s) + f_theta(s)
        Parameters:
        -----------
        kp: float
            Scaling factor for position control
    """
    def __init__(self, kp: float = 20, *args, **kwargs):
        options = {}
        controller_name = 'OSC_POSE'
        options["controller_configs"] = suite.load_controller_config(
            default_controller=controller_name)

        self.env = GymWrapper(
            suite.make(
                "NutAssemblyRound",  # Nut Assembly task with the round peg
                robots="IIWA",  # use IIWA robot
                **options,  # controller options
                use_object_obs=True,
                use_camera_obs=False,  # do not use pixel observations
                has_offscreen_renderer=
                False,  # not needed since not using pixel obs
                has_renderer=False,  # make sure we can render to the screen
                reward_shaping=False,  # use dense rewards
                control_freq=
                20,  # control should happen fast enough so that simulation looks smooth
            ))

        self.max_episode_steps = 500
        self.action_space = self.env.action_space
        self.observation_space = self.env.observation_space
        self.reward_type = 'sparse'
        self.distance_threshold = 0.065

    def step(self, residual_action: np.ndarray):
        controller_action = np.array(
            self.controller_action(self.last_observation))
        if (controller_action > 1).any() or (controller_action < -1).any():
            print(controller_action)
        action = np.add(controller_action, residual_action)
        action = np.clip(action, -1, 1)
        ob, reward, done, info = self.env.step(action)
        ob = self.env.env._get_observation()
        observation = {}
        observation['observation'] = np.hstack(
            (ob['robot0_eef_pos'], ob['robot0_eef_quat'], ob['RoundNut0_pos'],
             ob['RoundNut0_quat']))
        observation['desired_goal'] = np.array(
            self.env.sim.data.body_xpos[self.env.peg2_body_id])
        observation['achieved_goal'] = ob['RoundNut0_pos']
        self.last_observation = observation.copy()
        info['is_success'] = reward
        return observation, reward, done, info

    def reset(self):
        self.env.reset()  # reset according to task defaults
        ob = self.env.env._get_observation()
        observation = {}
        observation['observation'] = np.hstack(
            (ob['robot0_eef_pos'], ob['robot0_eef_quat'], ob['RoundNut0_pos'],
             ob['RoundNut0_quat']))
        observation['desired_goal'] = np.array(
            self.env.sim.data.body_xpos[self.env.peg2_body_id])
        observation['achieved_goal'] = ob['RoundNut0_pos']
        self.last_observation = observation.copy()
        self.object_in_hand = False
        self.object_below_hand = False
        self.gripper_reoriented = 0
        return observation

    def seed(self, seed: int = 0):
        self.np_random, seed = seeding.np_random(seed)
        return self.env.seed(seed=seed)

    def render(self, mode: str = "human", *args, **kwargs):
        return self.env.render()

    def close(self):
        return self.env.close()

    def goal_distance(self, achieved_goal, desired_goal):
        return np.linalg.norm(achieved_goal - desired_goal, axis=1)

    def compute_reward(self, achieved_goal, goal, info):
        # Compute distance between goal and the achieved goal.
        d = self.goal_distance(achieved_goal, goal)
        if self.reward_type == 'sparse':
            return -(d > self.distance_threshold).astype(np.float32)
        else:
            return -d

    def controller_action(self,
                          obs: dict,
                          take_action: bool = True,
                          DEBUG: bool = False):
        observation = obs['observation']
        goal_pos = obs['desired_goal']
        achieved_goal = obs['achieved_goal']

        gripper_pos = observation[:3]
        gripper_quat = observation[3:7]
        object_pos = observation[7:10]
        object_quat = observation[10:]

        z_table = 0.8610982

        object_axang = T.quat2axisangle(object_quat)
        if abs(object_axang[-1] - 1.24) < 0.2:
            object_axang_touse = [
                0, 0, object_axang[-1] % (2 * pi / 8) + (2 * pi / 8)
            ]
        else:
            object_axang_touse = [0, 0, object_axang[-1] % (2 * pi / 8)]
        gripper_axang = T.quat2axisangle(gripper_quat)
        # print('object axang to use ' + str(object_axang_touse))

        if self.gripper_reoriented == 0:
            self.gripper_init_quat = gripper_quat
            self.gripper_reoriented = 1

        init_inv = T.quat_inverse(self.gripper_init_quat)
        changing_wf = T.quat_multiply(init_inv, gripper_quat)
        changing_wf_axang = T.quat2axisangle(changing_wf)

        # gripper_quat_inv = T.quat_inverse(gripper_quat)
        # changing_wf = T.quat_multiply(gripper_quat_inv,self.gripper_init_quat)
        # changing_wf_axang = T.quat2axisangle(changing_wf)

        # print('changing wf axis ' +str(changing_wf_axang))

        if not self.object_below_hand or self.gripper_reoriented < 5:
            self.nut_p = T.quat2axisangle(object_quat)[-1]
            # print(self.nut_p)
            if not self.object_below_hand:
                action = 20 * (object_pos[:2] - gripper_pos[:2])
            else:
                action = [0, 0]

            action = 20 * (object_pos[:2] - gripper_pos[:2])

            # frac = 0.2 # Rate @ which to rotate gripper about z.
            # ang_goal = frac*self.nut_p # Nut p is the nut's random intial pertubation about z.

            # if self.gripper_reoriented == 0:
            #     self.gripper_init_quat = gripper_quat
            # if self.gripper_reoriented < 5: # Gripper should be aligned with nut after 5 action steps
            #     action_angle= [0,0,ang_goal]

            #     #print('object ' + str(object_axang))
            #     #print('gripper ' + str(gripper_axang))

            #     init_inv = T.quat_inverse(self.gripper_init_quat)
            #     init_inv_mat = T.quat2mat(init_inv)

            #     rel = T.quat_multiply(gripper_quat,init_inv)
            #     rel_axang = T.quat2axisangle(rel)
            #     #print('rel_axang ' +str(rel_axang))

            #     rel_axang_WF = np.matmul(init_inv_mat,rel_axang)

            #     #print('rel_axang_WF ' +str(rel_axang_WF))

            #     if take_action:
            #         self.gripper_reoriented+=1
            # else: # After 5 action steps, don't rotate gripper any more
            #     action_angle=[0,0,0]

            action_angle = 0.2 * (object_axang_touse - changing_wf_axang)
            action_angle = [0, 0, action_angle[-1]]
            #action_angle = [0,0,0]
            #print('action angle ' +str(action_angle))

            if np.linalg.norm(object_axang_touse - changing_wf_axang) < 0.1:
                if take_action:
                    self.gripper_reoriented = 5

            action = np.hstack((action, [0], action_angle, [-1]))
            if np.linalg.norm((object_pos[:2] - gripper_pos[:2])) < 0.01:
                if take_action:
                    self.object_below_hand = True
                    #self.gripper_reoriented = 5

        elif not self.object_in_hand:  # Close gripper
            action = [0, 0, -1, 0, 0, 0, -1]
            if np.linalg.norm((object_pos[2] - gripper_pos[2])) < 0.01:
                action = [0, 0, 0, 0, 0, 0, 1]
                if take_action:
                    self.object_in_hand = True

        else:  # Move gripper up and toward goal position
            action = [0, 0, 1, 0, 0, 0, 1]
            if object_pos[2] - z_table > 0.1:
                action = 20 * (goal_pos[:2] - object_pos[:2])
                action = np.hstack((action, [0, 0, 0, 0, 1]))
                if np.linalg.norm((goal_pos[:2] - object_pos[:2])) < 0.0225:
                    action = [0, 0, 0, 0, 0, 0,
                              -1]  # Drop nut once it's close enough to the peg

        action = np.clip(action, -1, 1)
        return action
Beispiel #2
0
 def _init():
     register_gripper(UltrasoundProbeGripper)
     env = GymWrapper(suite.make(env_id, **options))
     env = Monitor(env)
     env.seed(seed + rank)
     return env
class NutAssemblyDense(gym.Env):
    """
        NutAssembly:
        NutAssembly task from robosuite with no controller. Can be used for learning from scratch.
    """
    def __init__(self, horizon=500, *args, **kwargs):
        options = {}
        controller_name = 'OSC_POSE'
        options["controller_configs"] = suite.load_controller_config(
            default_controller=controller_name)

        self.env = GymWrapper(
            suite.make(
                "NutAssemblyRound",  # Nut Assembly task with the round peg
                robots="IIWA",  # use IIWA robot
                **options,  # controller options
                use_object_obs=True,
                use_camera_obs=False,  # do not use pixel observations
                has_offscreen_renderer=
                False,  # not needed since not using pixel obs
                has_renderer=False,  # make sure we can render to the screen
                reward_shaping=True,  # use dense rewards
                reward_scale=1.0,
                control_freq=
                20,  # control should happen fast enough so that simulation looks smooth
                horizon=horizon,  # number of timesteps for ending episode
            ))
        self.max_episode_steps = horizon
        self.action_space = self.env.action_space
        self.observation_space = self.env.observation_space
        self.reward_type = 'sparse'
        self.distance_threshold = 0.065

    def step(self, action):
        ob, reward, done, info = self.env.step(action)
        ob = self.env.env._get_observation()
        peg_pos = np.array(self.env.sim.data.body_xpos[self.env.peg2_body_id])
        observation = np.hstack(
            (ob['robot0_eef_pos'], ob['robot0_eef_quat'], ob['RoundNut0_pos'],
             ob['RoundNut0_quat'], peg_pos))
        # info['is_success'] = self.get_success(ob['RoundNut0_pos'], peg_pos)
        info['is_success'] = self.env.env._check_success()
        return observation, reward, done, info

    def reset(self):
        ob = self.env.reset()
        ob = self.env.env._get_observation()
        peg_pos = np.array(self.env.sim.data.body_xpos[self.env.peg2_body_id])
        observation = np.hstack(
            (ob['robot0_eef_pos'], ob['robot0_eef_quat'], ob['RoundNut0_pos'],
             ob['RoundNut0_quat'], peg_pos))
        return observation

    def seed(self, seed=0):
        self.np_random, seed = seeding.np_random(seed)
        return self.env.seed(seed=seed)

    def render(self, mode="human", *args, **kwargs):
        return self.env.render()

    def close(self):
        return self.env.close()

    def goal_distance(self, achieved_goal, desired_goal):
        return np.linalg.norm(achieved_goal - desired_goal)

    def get_success(self, object_pos, goal):
        # Compute distance between goal and the achieved goal.
        d = self.goal_distance(object_pos, goal)
        return (d < self.distance_threshold).astype(np.float32)