Ejemplo n.º 1
0
 def reset(self):
     self.sim.model.eq_active[:] = 0
     self.sim_env.reset()
     reset_mocap2body_xpos(self.sim)
     self.sim.model.eq_active[:] = 1
     self.sim.step()
     return self._get_obs()
Ejemplo n.º 2
0
    def __init__(self, task=0, control_method="position_control", sparse_reward=False, for_her=False, *args, **kwargs):
        self._task = task
        self.onehot = np.zeros(len(TASKS))
        self.onehot[self._task] = 1
        self._step = 0
        self._start_pos = np.array([0.65, 0.0, 0.15])
        self._sparse_reward = sparse_reward
        self.reward_type = "sparse" if sparse_reward else "dense"
        self._max_episode_steps = 30  # used by HER
        self._for_her = for_her

        super().__init__(control_method=control_method, *args, **kwargs)

        self._distance_threshold = 0.03
        reset_mocap2body_xpos(self.sim)

        self.env_setup(self._initial_qpos)

        self.init_qpos = self.sim.data.qpos
        self.init_qvel = self.sim.data.qvel
        self.init_qacc = self.sim.data.qacc
        self.init_ctrl = self.sim.data.ctrl

        self.init_box_height = 0.

        self._gripper_pos = 0.

        self._goal = np.array(TASKS[task][:3])
        self.set_position(self._start_pos)
        print("Instantiating TaskPickAndPlaceEnv (task = %i, control_mode = %s)" % (self._task, self._control_method))
Ejemplo n.º 3
0
    def step(self, action):
        """
        Perform one step with action.

        :param action: the action to be performed, (x, y, z) only for pos_ctrl
        :return: next_obs, reward, done, info
        """
        action = np.clip(action, self.action_space.low, self.action_space.high)
        if self._control_method == 'torque_control':
            self.forward_dynamics(action)
        elif self._control_method == 'position_control':
            assert action.shape == (3, )
            action = action.copy()
            action *= 0.1  # limit the action
            reset_mocap2body_xpos(self.sim)
            self.sim.data.mocap_pos[:] = self.sim.data.mocap_pos + action
            self.sim.step()
        else:
            raise NotImplementedError

        obs = self.get_current_obs()
        next_obs = obs['observation']
        achieved_goal = obs['achieved_goal']
        goal = obs['desired_goal']
        reward = self._compute_reward(achieved_goal, goal)
        done = (self._goal_distance(achieved_goal, goal) <
                self._distance_threshold)
        return Step(next_obs, reward, done)
Ejemplo n.º 4
0
    def set_position(self, pos):
        # self.sim.data.set_mocap_pos('mocap', pos[:3])
        # self.sim.data.set_site_pos('object0', pos)

        # self.sim.step()
        # for _ in range(200):
        #     self.sim.step()

        reset_mocap2body_xpos(self.sim)
        self.sim.data.mocap_quat[:] = np.array([0, 1, 0, 0])
        # gripper_ctrl = -50 if gripper_ctrl < 0 else 10
        gripper_ctrl = np.array([0, 0])
        # action = np.concatenate([pos, rot_ctrl, gripper_ctrl])
        # ctrl_set_action(self.sim, action)  # For gripper
        # mocap_set_action(self.sim, action)
        # reset_mocap2body_xpos(self.sim)
        self.sim.data.set_mocap_pos('mocap', pos)
        for _ in range(100):
            self.sim.step()

            reset_mocap2body_xpos(self.sim)
            self.sim.data.mocap_quat[:] = np.array([0, 1, 0, 0])
            self.sim.data.set_mocap_pos('mocap', pos)
            # self.sim.forward()
        # # self.sim.data.mocap_pos[:] = pos
        # reset_mocap2body_xpos(self.sim)
        # print("Set position to", pos)
        # print("SawyerReach Servo Error:", np.linalg.norm(pos-grip_pos))
        self.sim.forward()
Ejemplo n.º 5
0
def _mocap_set_action(sim, action):
    # Originally from gym.envs.robotics.utils
    if sim.model.nmocap > 0:
        pos_delta = action[:, :3]
        quat_delta = action[:, 3:]
        reset_mocap2body_xpos(sim)
        sim.data.mocap_pos[:] += pos_delta
        sim.data.mocap_quat[:] += quat_delta
Ejemplo n.º 6
0
 def set_gripper_position(self, position):
     reset_mocap2body_xpos(self.sim)
     self.sim.data.mocap_quat[:] = np.array([0, 1, 0, 0])
     self.sim.data.set_mocap_pos('mocap', position)
     for _ in range(100):
         self.sim.step()
         reset_mocap2body_xpos(self.sim)
         self.sim.data.mocap_quat[:] = np.array([0, 1, 0, 0])
         self.sim.data.set_mocap_pos('mocap', position)
Ejemplo n.º 7
0
 def step(self, action):
     # Does not update the MuJoCo env entirely, just the Robot state
     reset_mocap2body_xpos(self._env.sim)
     self._env.sim.data.mocap_pos[0, :3] = self._env.sim.data.mocap_pos[0, :3] + action[:3]
     self._env.sim.data.mocap_quat[:] = np.array([0, 0, 1, 0])
     self.gripper_state = action[3]
     for _ in range(5):
         self._env.sim.step()
     self._env.sim.forward()
Ejemplo n.º 8
0
    def step(self, action):
        # if self._for_her:
        #     # actions are in [-1, 1]
        #     action = (action + 1.) / 2.
        #     # actions are in [0, 1]
        #     action = action * (self.action_space.high - self.action_space.low) + self.action_space.low

        # action = np.clip(action, self.action_space.low, self.action_space.high)

        # action = np.array(action).flatten()
        if self._control_method == "torque_control":
            self.forward_dynamics(action)
            self.sim.forward()
        else:
            reset_mocap2body_xpos(self.sim)
            self.sim.data.mocap_pos[
                0, :3] = self.sim.data.mocap_pos[0, :3] + action[:3]
            self.sim.data.mocap_quat[:] = np.array([0, 1, 0, 0])
            self.set_gripper_state(action[3])
            for _ in range(1):
                self.sim.step()
            self.sim.forward()

        self._step += 1

        obs = self.get_obs()
        self._achieved_goal = obs.get('achieved_goal')
        self._desired_goal = obs.get('desired_goal')

        info = {
            "l": self._step,
            "grasped": obs["has_object"],
            "gripper_state": obs["gripper_state"],
            "gripper_position": obs["gripper_pos"],
            "object_position": obs["object_pos"],
            "is_success": self._is_success,
            "joints": obs["joints"]
        }

        r = self.compute_reward(achieved_goal=obs.get('achieved_goal'),
                                desired_goal=obs.get('desired_goal'),
                                info=info)

        self._is_success = self._success_fn(self, self._achieved_goal,
                                            self._desired_goal, info)
        done = False
        if self._is_success:
            r = self._completion_bonus
            done = True

        info["r"] = r
        info["d"] = done

        return obs, r, done, info
Ejemplo n.º 9
0
    def step(self, action):
        # no clipping / rescaling of actions
        # action = np.clip(action, self.action_space.low, self.action_space.high)
        # rot_ctrl = np.array([1., 0., 1., 0.])
        # action = np.concatenate([action, rot_ctrl])
        # action, _ = np.split(action, (self.sim.model.nmocap * 7,))
        # action = action.reshape(self.sim.model.nmocap, 7)

        # pos_delta = action[:, :3]
        if self._control_method == "torque_control":
            self.forward_dynamics(action)
            self.sim.forward()
        else:
            reset_mocap2body_xpos(self.sim)
            self.sim.data.mocap_pos[:] = self.sim.data.mocap_pos + action
            for _ in range(50):
                self.sim.step()
            self._step += 1

        # obs = self.get_current_obs()
        # achieved_goal = obs['achieved_goal']
        # goal = obs['desired_goal']
        achieved_goal = self.position
        # reward = self._compute_reward(achieved_goal, goal)

        obs = self.get_obs()

        achieved_dist = self._goal_distance(achieved_goal, self._goal)
        # reward = rewards._sigmoids(self._goal_distance(achieved_goal, goal) / self._goal_distance(self.initial_pos, goal), 0., "cosine")
        # reward = 1. - achieved_dist / self._goal_distance(self._start_pos, self._goal) / 2.  # before
        # reward = 1. - achieved_dist / self._goal_distance(np.zeros(3), self._goal) / 2.
        # TODO sparse reward
        reward = 1. - achieved_dist / self._goal_distance(
            self._start_pos, self._goal)

        # print(self.initial_pos, achieved_goal)

        done = (achieved_dist < self._distance_threshold)

        if done:
            reward = 20.  # 20.

        info = {
            "episode": {
                "l": self._step,
                "r": reward,
                "d": done,
                "position": self.position,
                "task": np.copy(self.onehot)
            }
        }
        # just take gripper position as observation
        return Step(obs, reward, False, **info)
Ejemplo n.º 10
0
def set_gripper_position_in_sim(env, new_gripper_pos, old_gripper_pos):
    '''
    Setting new XY position of the gripper
    '''
    residual_pos = new_gripper_pos - old_gripper_pos
    reset_mocap2body_xpos(env.env.sim)
    mocap_pos = env.env.sim.data.get_mocap_pos('robot0:mocap').copy()
    mocap_pos[0:2] = mocap_pos[0:2] + residual_pos
    env.env.sim.data.set_mocap_pos('robot0:mocap', mocap_pos)
    env.env.sim.forward()
    for _ in range(2):
        env.env.sim.step()
    # env.env.sim.step()
    return True
Ejemplo n.º 11
0
    def _move_arm(self,
                  grasp_center_target: np.ndarray,
                  hand_action: np.ndarray = None,
                  threshold=0.01,
                  k=0.1,
                  max_steps=100,
                  count_stable_steps=False):
        if hand_action is not None:
            hand_action = hand_action.copy()
        stable_steps = 0
        prev_rel_pos = np.zeros(3)
        reset_mocap2body_xpos(self.sim)

        for i in range(max_steps):
            grasp_center_pos = self.sim_env._get_grasp_center_pose(
                no_rot=True)[:3]
            d = grasp_center_target - grasp_center_pos
            if np.linalg.norm(d) < threshold and not count_stable_steps:
                break
            # set hand action
            if hand_action is not None:
                hand_env.HandEnv._set_action(self.sim_env, hand_action)
            self.sim.data.mocap_pos[0] += d * k
            self.sim.step()

            if count_stable_steps:
                obj_pos = self.sim_env._get_object_pose()[:3]
                rel_pos = obj_pos - grasp_center_pos
                still = prev_rel_pos is not None and np.all(
                    np.abs(rel_pos - prev_rel_pos) < 0.002)
                obj_above_table = len(
                    self.sim_env.get_object_contact_points(
                        other_body='table')) == 0
                if still and obj_above_table:
                    stable_steps += 1
                elif i > 10:
                    break
                prev_rel_pos = rel_pos

            if self.render_substeps:
                self.render(keep_markers=True)

        if count_stable_steps:
            return stable_steps
Ejemplo n.º 12
0
    def step(self, action):
        if self._control_method == "torque_control":
            self.forward_dynamics(action)
            self.sim.forward()
        else:
            reset_mocap2body_xpos(self.sim)
            self.sim.data.mocap_pos[
                0, :3] = self.sim.data.mocap_pos[0, :3] + action[:3]
            self.sim.data.mocap_quat[:] = np.array([0, 1, 0, 0])
            self.set_gripper_state(action[3])
            for _ in range(1):
                self.sim.step()
            self.sim.forward()

        self._step += 1

        obs = self.get_obs()
        self._achieved_goal = obs.get('achieved_goal')
        self._desired_goal = obs.get('desired_goal')

        info = {
            "l": self._step,
            "grasped": obs["has_object"],
            "gripper_state": obs["gripper_state"],
            "gripper_position": obs["gripper_pos"],
            "object_position": obs["object_pos"],
            "is_success": self._is_success
        }

        r = self.compute_reward(achieved_goal=obs.get('achieved_goal'),
                                desired_goal=obs.get('desired_goal'),
                                info=info)

        self._is_success = self._success_fn(self, self._achieved_goal,
                                            self._desired_goal, info)
        done = False
        if self._is_success:
            r = self._completion_bonus
            done = True

        info["r"] = r
        info["d"] = done

        return obs, r, done, info
Ejemplo n.º 13
0
    def _env_setup(self):
        utils.reset_mocap_welds(self.sim)
        utils.reset_mocap2body_xpos(self.sim)
        lim1_id = self.sim.model.site_name2id('limit0')
        lim2_id = self.sim.model.site_name2id('limit1')
        lim3_id = self.sim.model.site_name2id('limit2')
        lim4_id = self.sim.model.site_name2id('limit3')

        self.sim.model.site_pos[lim1_id] = self.origin + \
            np.array([-self.maxdist, -self.maxdist, 0])
        self.sim.model.site_pos[lim2_id] = self.origin + \
            np.array([self.maxdist, -self.maxdist, 0])
        self.sim.model.site_pos[lim3_id] = self.origin + \
            np.array([-self.maxdist, self.maxdist, 0])
        self.sim.model.site_pos[lim4_id] = self.origin + \
            np.array([self.maxdist, self.maxdist, 0])

        for _ in range(10):
            self._take_substeps()
Ejemplo n.º 14
0
    def __init__(self,
                 task=0,
                 control_method="position_control",
                 *args,
                 **kwargs):
        self._task = task
        self.onehot = np.zeros(len(TASKS))
        self.onehot[self._task] = 1
        self._step = 0
        self._start_pos = np.array([0.8, 0.0, 0.15])
        super().__init__(control_method=control_method, *args, **kwargs)
        self._distance_threshold = 0.03
        reset_mocap2body_xpos(self.sim)

        self.init_qpos = self.sim.data.qpos
        self.init_qvel = self.sim.data.qvel
        self.init_qacc = self.sim.data.qacc
        self.init_ctrl = self.sim.data.ctrl

        self._goal = np.array(TASKS[task])
        self.set_position(self._start_pos)
        print("Instantiating TaskReacherEnv (task = %i, control_mode = %s)" %
              (self._task, self._control_method))
Ejemplo n.º 15
0
 def mocap_control(self, action):
     reset_mocap2body_xpos(self.sim)
     self.sim.model.eq_active[:] = 1
     _mocap_set_action(self.sim, action)
     self.sim.step()
     self.sim.model.eq_active[:] = 0
Ejemplo n.º 16
0
    def step(self, action):
        # no clipping / rescaling of actions
        if self._for_her:
            # actions are in [-1, 1]
            action = (action + 1.) / 2.
            # actions are in [0, 1]
            action = action * (self.action_space.high - self.action_space.low) + self.action_space.low
            action = np.clip(action, self.action_space.low, self.action_space.high)
        # rot_ctrl = np.array([1., 0., 1., 0.])
        # action = np.concatenate([action, rot_ctrl])
        # action, _ = np.split(action, (self.sim.model.nmocap * 7,))
        # action = action.reshape(self.sim.model.nmocap, 7)

        # pos_delta = action[:, :3]
        # action = np.concatenate((np.zeros(2), np.array(action).flatten()))
        action = np.array(action).flatten()
        if self._control_method == "torque_control":
            self.forward_dynamics(action)
            self.sim.forward()
        else:
            reset_mocap2body_xpos(self.sim)
            self.sim.data.mocap_pos[0, :3] = self.sim.data.mocap_pos[0, :3] + action[:3]
            self.set_gripper_state(action[3])
            for _ in range(1):
                self.sim.step()
            self.sim.forward()

        self._step += 1

        # obs = self.get_current_obs()
        # achieved_goal = obs['achieved_goal']
        # goal = obs['desired_goal']


        grasped = self._grasp()  # TODO fix grasp recognition (always returns False)
        object_pos = self.sim.data.get_site_xpos('object0')

        desired_goal = object_pos

        if self._for_her:
            if not grasped:
                achieved_goal = self.position
            else:
                # print("Grasped!")
                achieved_goal = np.squeeze(object_pos.copy())
                desired_goal = self.goal
        else:
            achieved_goal = self.position
        # reward = self._compute_reward(achieved_goal, goal)

        obs = self.get_obs()

        # penalize x/y movement of object
        penalize_2d_motion = np.linalg.norm(object_pos[:2] - np.array(TASKS[self._task][:2]))
        # reward positive change in z direction (up)
        lifting = object_pos[2] - self.init_box_height

        # achieved_dist = self._goal_distance(achieved_goal, self._goal)
        achieved_dist = self._goal_distance(achieved_goal, desired_goal)
        # reward = rewards._sigmoids(self._goal_distance(achieved_goal, goal) / self._goal_distance(self.initial_pos, goal), 0., "cosine")
        # reward = 1. - achieved_dist / self._goal_distance(self._start_pos, self._goal) / 2.  # before
        # reward = 1. - achieved_dist / self._goal_distance(np.zeros(3), self._goal) / 2.
        # TODO sparse reward
        # if grasped:
        #     print("Grasped!")

        # print(self.initial_pos, achieved_goal)

        done = grasped and lifting > 0.005  # (action[3] < 0.2 and not grasped) or grasped and lifting > 0.02  #(achieved_dist < self._distance_threshold)

        if self._for_her:
            reward = self.compute_reward(achieved_goal, desired_goal, {})
        else:
            if self._sparse_reward:
                reward = 0. - penalize_2d_motion + lifting + float(grasped) * 0.3
            else:
                reward = (.3 + lifting * 30.) * float(grasped) + 1. - achieved_dist / self._goal_distance(
                    self._start_pos, object_pos) - penalize_2d_motion

        if done:  # done:
            reward = 20.  # 20.

        if self._for_her:
            info = {
                "l": self._step,
                "r": reward,
                "d": done,
                "grasped": grasped,
                "is_success": grasped and lifting > 0.005
            }
        else:
            info = {
                "episode": {
                    "l": self._step,
                    "r": reward,
                    "d": done,
                    "grasped": grasped,
                    "position": self.position,
                    "task": np.copy(self.onehot)
                }
            }

        # just take gripper position as observation
        return Step(obs, reward, done, **info)
Ejemplo n.º 17
0
 def _set_arm_pose(self, pose: np.ndarray):
     assert pose.size == 7 or pose.size == 3
     reset_mocap2body_xpos(self.sim)
     self.sim.data.mocap_pos[0, :] = np.clip(pose[:3], *self.forearm_bounds)
     if pose.size == 7:
         self.sim.data.mocap_quat[0, :] = pose[3:]
Ejemplo n.º 18
0
    def step(self, action):
        assert action.shape == self.action_space.shape

        # Note: you MUST copy the action if you modify it
        a = action.copy()

        # Clip to action space
        a *= self._action_scale
        a = np.clip(a, self.action_space.low, self.action_space.high)
        if self._control_method == "torque_control":
            self.forward_dynamics(a)
            self.sim.forward()
        elif self._control_method == "task_space_control":
            reset_mocap2body_xpos(self.sim)
            self.sim.data.mocap_pos[
                0, :3] = self.sim.data.mocap_pos[0, :3] + a[:3]
            self.sim.data.mocap_quat[:] = np.array([0, 1, 0, 0])
            self.set_gripper_state(a[3])
            for _ in range(5):
                self.sim.step()
            self.sim.forward()
        elif self._control_method == "position_control":
            curr_pos = self.joint_positions

            next_pos = np.clip(a + curr_pos[:],
                               self.joint_position_space.low[:],
                               self.joint_position_space.high[:])
            self.sim.data.ctrl[:] = next_pos[:]
            self.sim.forward()
            for _ in range(5):
                self.sim.step()
        else:
            raise NotImplementedError
        self._step += 1

        obs = self.get_obs()

        # collision checking is expensive so cache the value
        in_collision = self.in_collision

        info = {
            "l": self._step,
            "grasped": obs["has_object"],
            "gripper_state": obs["gripper_state"],
            "gripper_position": obs["gripper_pos"],
            "object_position": obs["object_pos"],
            "is_success": self._is_success,
            "in_collision": in_collision,
        }

        r = self.compute_reward(achieved_goal=self._achieved_goal,
                                desired_goal=self._desired_goal,
                                info=info)

        self._is_success = self._success_fn(self, self._achieved_goal,
                                            self._desired_goal, info)
        done = self._is_success and not self._never_done

        # collision detection
        if in_collision:
            r -= self._collision_penalty
            if self._terminate_on_collision:
                done = True

        if self._is_success:
            r = self._completion_bonus

        info["r"] = r
        info["d"] = done

        return obs, r, done, info
Ejemplo n.º 19
0
 def reset(self):
     utils.reset_mocap_welds(self.mj_sim)
     utils.reset_mocap2body_xpos(self.mj_sim)