def reset(self): self.sim.model.eq_active[:] = 0 self.sim_env.reset() reset_mocap2body_xpos(self.sim) self.sim.model.eq_active[:] = 1 self.sim.step() return self._get_obs()
def __init__(self, task=0, control_method="position_control", sparse_reward=False, for_her=False, *args, **kwargs): self._task = task self.onehot = np.zeros(len(TASKS)) self.onehot[self._task] = 1 self._step = 0 self._start_pos = np.array([0.65, 0.0, 0.15]) self._sparse_reward = sparse_reward self.reward_type = "sparse" if sparse_reward else "dense" self._max_episode_steps = 30 # used by HER self._for_her = for_her super().__init__(control_method=control_method, *args, **kwargs) self._distance_threshold = 0.03 reset_mocap2body_xpos(self.sim) self.env_setup(self._initial_qpos) self.init_qpos = self.sim.data.qpos self.init_qvel = self.sim.data.qvel self.init_qacc = self.sim.data.qacc self.init_ctrl = self.sim.data.ctrl self.init_box_height = 0. self._gripper_pos = 0. self._goal = np.array(TASKS[task][:3]) self.set_position(self._start_pos) print("Instantiating TaskPickAndPlaceEnv (task = %i, control_mode = %s)" % (self._task, self._control_method))
def step(self, action): """ Perform one step with action. :param action: the action to be performed, (x, y, z) only for pos_ctrl :return: next_obs, reward, done, info """ action = np.clip(action, self.action_space.low, self.action_space.high) if self._control_method == 'torque_control': self.forward_dynamics(action) elif self._control_method == 'position_control': assert action.shape == (3, ) action = action.copy() action *= 0.1 # limit the action reset_mocap2body_xpos(self.sim) self.sim.data.mocap_pos[:] = self.sim.data.mocap_pos + action self.sim.step() else: raise NotImplementedError obs = self.get_current_obs() next_obs = obs['observation'] achieved_goal = obs['achieved_goal'] goal = obs['desired_goal'] reward = self._compute_reward(achieved_goal, goal) done = (self._goal_distance(achieved_goal, goal) < self._distance_threshold) return Step(next_obs, reward, done)
def set_position(self, pos): # self.sim.data.set_mocap_pos('mocap', pos[:3]) # self.sim.data.set_site_pos('object0', pos) # self.sim.step() # for _ in range(200): # self.sim.step() reset_mocap2body_xpos(self.sim) self.sim.data.mocap_quat[:] = np.array([0, 1, 0, 0]) # gripper_ctrl = -50 if gripper_ctrl < 0 else 10 gripper_ctrl = np.array([0, 0]) # action = np.concatenate([pos, rot_ctrl, gripper_ctrl]) # ctrl_set_action(self.sim, action) # For gripper # mocap_set_action(self.sim, action) # reset_mocap2body_xpos(self.sim) self.sim.data.set_mocap_pos('mocap', pos) for _ in range(100): self.sim.step() reset_mocap2body_xpos(self.sim) self.sim.data.mocap_quat[:] = np.array([0, 1, 0, 0]) self.sim.data.set_mocap_pos('mocap', pos) # self.sim.forward() # # self.sim.data.mocap_pos[:] = pos # reset_mocap2body_xpos(self.sim) # print("Set position to", pos) # print("SawyerReach Servo Error:", np.linalg.norm(pos-grip_pos)) self.sim.forward()
def _mocap_set_action(sim, action): # Originally from gym.envs.robotics.utils if sim.model.nmocap > 0: pos_delta = action[:, :3] quat_delta = action[:, 3:] reset_mocap2body_xpos(sim) sim.data.mocap_pos[:] += pos_delta sim.data.mocap_quat[:] += quat_delta
def set_gripper_position(self, position): reset_mocap2body_xpos(self.sim) self.sim.data.mocap_quat[:] = np.array([0, 1, 0, 0]) self.sim.data.set_mocap_pos('mocap', position) for _ in range(100): self.sim.step() reset_mocap2body_xpos(self.sim) self.sim.data.mocap_quat[:] = np.array([0, 1, 0, 0]) self.sim.data.set_mocap_pos('mocap', position)
def step(self, action): # Does not update the MuJoCo env entirely, just the Robot state reset_mocap2body_xpos(self._env.sim) self._env.sim.data.mocap_pos[0, :3] = self._env.sim.data.mocap_pos[0, :3] + action[:3] self._env.sim.data.mocap_quat[:] = np.array([0, 0, 1, 0]) self.gripper_state = action[3] for _ in range(5): self._env.sim.step() self._env.sim.forward()
def step(self, action): # if self._for_her: # # actions are in [-1, 1] # action = (action + 1.) / 2. # # actions are in [0, 1] # action = action * (self.action_space.high - self.action_space.low) + self.action_space.low # action = np.clip(action, self.action_space.low, self.action_space.high) # action = np.array(action).flatten() if self._control_method == "torque_control": self.forward_dynamics(action) self.sim.forward() else: reset_mocap2body_xpos(self.sim) self.sim.data.mocap_pos[ 0, :3] = self.sim.data.mocap_pos[0, :3] + action[:3] self.sim.data.mocap_quat[:] = np.array([0, 1, 0, 0]) self.set_gripper_state(action[3]) for _ in range(1): self.sim.step() self.sim.forward() self._step += 1 obs = self.get_obs() self._achieved_goal = obs.get('achieved_goal') self._desired_goal = obs.get('desired_goal') info = { "l": self._step, "grasped": obs["has_object"], "gripper_state": obs["gripper_state"], "gripper_position": obs["gripper_pos"], "object_position": obs["object_pos"], "is_success": self._is_success, "joints": obs["joints"] } r = self.compute_reward(achieved_goal=obs.get('achieved_goal'), desired_goal=obs.get('desired_goal'), info=info) self._is_success = self._success_fn(self, self._achieved_goal, self._desired_goal, info) done = False if self._is_success: r = self._completion_bonus done = True info["r"] = r info["d"] = done return obs, r, done, info
def step(self, action): # no clipping / rescaling of actions # action = np.clip(action, self.action_space.low, self.action_space.high) # rot_ctrl = np.array([1., 0., 1., 0.]) # action = np.concatenate([action, rot_ctrl]) # action, _ = np.split(action, (self.sim.model.nmocap * 7,)) # action = action.reshape(self.sim.model.nmocap, 7) # pos_delta = action[:, :3] if self._control_method == "torque_control": self.forward_dynamics(action) self.sim.forward() else: reset_mocap2body_xpos(self.sim) self.sim.data.mocap_pos[:] = self.sim.data.mocap_pos + action for _ in range(50): self.sim.step() self._step += 1 # obs = self.get_current_obs() # achieved_goal = obs['achieved_goal'] # goal = obs['desired_goal'] achieved_goal = self.position # reward = self._compute_reward(achieved_goal, goal) obs = self.get_obs() achieved_dist = self._goal_distance(achieved_goal, self._goal) # reward = rewards._sigmoids(self._goal_distance(achieved_goal, goal) / self._goal_distance(self.initial_pos, goal), 0., "cosine") # reward = 1. - achieved_dist / self._goal_distance(self._start_pos, self._goal) / 2. # before # reward = 1. - achieved_dist / self._goal_distance(np.zeros(3), self._goal) / 2. # TODO sparse reward reward = 1. - achieved_dist / self._goal_distance( self._start_pos, self._goal) # print(self.initial_pos, achieved_goal) done = (achieved_dist < self._distance_threshold) if done: reward = 20. # 20. info = { "episode": { "l": self._step, "r": reward, "d": done, "position": self.position, "task": np.copy(self.onehot) } } # just take gripper position as observation return Step(obs, reward, False, **info)
def set_gripper_position_in_sim(env, new_gripper_pos, old_gripper_pos): ''' Setting new XY position of the gripper ''' residual_pos = new_gripper_pos - old_gripper_pos reset_mocap2body_xpos(env.env.sim) mocap_pos = env.env.sim.data.get_mocap_pos('robot0:mocap').copy() mocap_pos[0:2] = mocap_pos[0:2] + residual_pos env.env.sim.data.set_mocap_pos('robot0:mocap', mocap_pos) env.env.sim.forward() for _ in range(2): env.env.sim.step() # env.env.sim.step() return True
def _move_arm(self, grasp_center_target: np.ndarray, hand_action: np.ndarray = None, threshold=0.01, k=0.1, max_steps=100, count_stable_steps=False): if hand_action is not None: hand_action = hand_action.copy() stable_steps = 0 prev_rel_pos = np.zeros(3) reset_mocap2body_xpos(self.sim) for i in range(max_steps): grasp_center_pos = self.sim_env._get_grasp_center_pose( no_rot=True)[:3] d = grasp_center_target - grasp_center_pos if np.linalg.norm(d) < threshold and not count_stable_steps: break # set hand action if hand_action is not None: hand_env.HandEnv._set_action(self.sim_env, hand_action) self.sim.data.mocap_pos[0] += d * k self.sim.step() if count_stable_steps: obj_pos = self.sim_env._get_object_pose()[:3] rel_pos = obj_pos - grasp_center_pos still = prev_rel_pos is not None and np.all( np.abs(rel_pos - prev_rel_pos) < 0.002) obj_above_table = len( self.sim_env.get_object_contact_points( other_body='table')) == 0 if still and obj_above_table: stable_steps += 1 elif i > 10: break prev_rel_pos = rel_pos if self.render_substeps: self.render(keep_markers=True) if count_stable_steps: return stable_steps
def step(self, action): if self._control_method == "torque_control": self.forward_dynamics(action) self.sim.forward() else: reset_mocap2body_xpos(self.sim) self.sim.data.mocap_pos[ 0, :3] = self.sim.data.mocap_pos[0, :3] + action[:3] self.sim.data.mocap_quat[:] = np.array([0, 1, 0, 0]) self.set_gripper_state(action[3]) for _ in range(1): self.sim.step() self.sim.forward() self._step += 1 obs = self.get_obs() self._achieved_goal = obs.get('achieved_goal') self._desired_goal = obs.get('desired_goal') info = { "l": self._step, "grasped": obs["has_object"], "gripper_state": obs["gripper_state"], "gripper_position": obs["gripper_pos"], "object_position": obs["object_pos"], "is_success": self._is_success } r = self.compute_reward(achieved_goal=obs.get('achieved_goal'), desired_goal=obs.get('desired_goal'), info=info) self._is_success = self._success_fn(self, self._achieved_goal, self._desired_goal, info) done = False if self._is_success: r = self._completion_bonus done = True info["r"] = r info["d"] = done return obs, r, done, info
def _env_setup(self): utils.reset_mocap_welds(self.sim) utils.reset_mocap2body_xpos(self.sim) lim1_id = self.sim.model.site_name2id('limit0') lim2_id = self.sim.model.site_name2id('limit1') lim3_id = self.sim.model.site_name2id('limit2') lim4_id = self.sim.model.site_name2id('limit3') self.sim.model.site_pos[lim1_id] = self.origin + \ np.array([-self.maxdist, -self.maxdist, 0]) self.sim.model.site_pos[lim2_id] = self.origin + \ np.array([self.maxdist, -self.maxdist, 0]) self.sim.model.site_pos[lim3_id] = self.origin + \ np.array([-self.maxdist, self.maxdist, 0]) self.sim.model.site_pos[lim4_id] = self.origin + \ np.array([self.maxdist, self.maxdist, 0]) for _ in range(10): self._take_substeps()
def __init__(self, task=0, control_method="position_control", *args, **kwargs): self._task = task self.onehot = np.zeros(len(TASKS)) self.onehot[self._task] = 1 self._step = 0 self._start_pos = np.array([0.8, 0.0, 0.15]) super().__init__(control_method=control_method, *args, **kwargs) self._distance_threshold = 0.03 reset_mocap2body_xpos(self.sim) self.init_qpos = self.sim.data.qpos self.init_qvel = self.sim.data.qvel self.init_qacc = self.sim.data.qacc self.init_ctrl = self.sim.data.ctrl self._goal = np.array(TASKS[task]) self.set_position(self._start_pos) print("Instantiating TaskReacherEnv (task = %i, control_mode = %s)" % (self._task, self._control_method))
def mocap_control(self, action): reset_mocap2body_xpos(self.sim) self.sim.model.eq_active[:] = 1 _mocap_set_action(self.sim, action) self.sim.step() self.sim.model.eq_active[:] = 0
def step(self, action): # no clipping / rescaling of actions if self._for_her: # actions are in [-1, 1] action = (action + 1.) / 2. # actions are in [0, 1] action = action * (self.action_space.high - self.action_space.low) + self.action_space.low action = np.clip(action, self.action_space.low, self.action_space.high) # rot_ctrl = np.array([1., 0., 1., 0.]) # action = np.concatenate([action, rot_ctrl]) # action, _ = np.split(action, (self.sim.model.nmocap * 7,)) # action = action.reshape(self.sim.model.nmocap, 7) # pos_delta = action[:, :3] # action = np.concatenate((np.zeros(2), np.array(action).flatten())) action = np.array(action).flatten() if self._control_method == "torque_control": self.forward_dynamics(action) self.sim.forward() else: reset_mocap2body_xpos(self.sim) self.sim.data.mocap_pos[0, :3] = self.sim.data.mocap_pos[0, :3] + action[:3] self.set_gripper_state(action[3]) for _ in range(1): self.sim.step() self.sim.forward() self._step += 1 # obs = self.get_current_obs() # achieved_goal = obs['achieved_goal'] # goal = obs['desired_goal'] grasped = self._grasp() # TODO fix grasp recognition (always returns False) object_pos = self.sim.data.get_site_xpos('object0') desired_goal = object_pos if self._for_her: if not grasped: achieved_goal = self.position else: # print("Grasped!") achieved_goal = np.squeeze(object_pos.copy()) desired_goal = self.goal else: achieved_goal = self.position # reward = self._compute_reward(achieved_goal, goal) obs = self.get_obs() # penalize x/y movement of object penalize_2d_motion = np.linalg.norm(object_pos[:2] - np.array(TASKS[self._task][:2])) # reward positive change in z direction (up) lifting = object_pos[2] - self.init_box_height # achieved_dist = self._goal_distance(achieved_goal, self._goal) achieved_dist = self._goal_distance(achieved_goal, desired_goal) # reward = rewards._sigmoids(self._goal_distance(achieved_goal, goal) / self._goal_distance(self.initial_pos, goal), 0., "cosine") # reward = 1. - achieved_dist / self._goal_distance(self._start_pos, self._goal) / 2. # before # reward = 1. - achieved_dist / self._goal_distance(np.zeros(3), self._goal) / 2. # TODO sparse reward # if grasped: # print("Grasped!") # print(self.initial_pos, achieved_goal) done = grasped and lifting > 0.005 # (action[3] < 0.2 and not grasped) or grasped and lifting > 0.02 #(achieved_dist < self._distance_threshold) if self._for_her: reward = self.compute_reward(achieved_goal, desired_goal, {}) else: if self._sparse_reward: reward = 0. - penalize_2d_motion + lifting + float(grasped) * 0.3 else: reward = (.3 + lifting * 30.) * float(grasped) + 1. - achieved_dist / self._goal_distance( self._start_pos, object_pos) - penalize_2d_motion if done: # done: reward = 20. # 20. if self._for_her: info = { "l": self._step, "r": reward, "d": done, "grasped": grasped, "is_success": grasped and lifting > 0.005 } else: info = { "episode": { "l": self._step, "r": reward, "d": done, "grasped": grasped, "position": self.position, "task": np.copy(self.onehot) } } # just take gripper position as observation return Step(obs, reward, done, **info)
def _set_arm_pose(self, pose: np.ndarray): assert pose.size == 7 or pose.size == 3 reset_mocap2body_xpos(self.sim) self.sim.data.mocap_pos[0, :] = np.clip(pose[:3], *self.forearm_bounds) if pose.size == 7: self.sim.data.mocap_quat[0, :] = pose[3:]
def step(self, action): assert action.shape == self.action_space.shape # Note: you MUST copy the action if you modify it a = action.copy() # Clip to action space a *= self._action_scale a = np.clip(a, self.action_space.low, self.action_space.high) if self._control_method == "torque_control": self.forward_dynamics(a) self.sim.forward() elif self._control_method == "task_space_control": reset_mocap2body_xpos(self.sim) self.sim.data.mocap_pos[ 0, :3] = self.sim.data.mocap_pos[0, :3] + a[:3] self.sim.data.mocap_quat[:] = np.array([0, 1, 0, 0]) self.set_gripper_state(a[3]) for _ in range(5): self.sim.step() self.sim.forward() elif self._control_method == "position_control": curr_pos = self.joint_positions next_pos = np.clip(a + curr_pos[:], self.joint_position_space.low[:], self.joint_position_space.high[:]) self.sim.data.ctrl[:] = next_pos[:] self.sim.forward() for _ in range(5): self.sim.step() else: raise NotImplementedError self._step += 1 obs = self.get_obs() # collision checking is expensive so cache the value in_collision = self.in_collision info = { "l": self._step, "grasped": obs["has_object"], "gripper_state": obs["gripper_state"], "gripper_position": obs["gripper_pos"], "object_position": obs["object_pos"], "is_success": self._is_success, "in_collision": in_collision, } r = self.compute_reward(achieved_goal=self._achieved_goal, desired_goal=self._desired_goal, info=info) self._is_success = self._success_fn(self, self._achieved_goal, self._desired_goal, info) done = self._is_success and not self._never_done # collision detection if in_collision: r -= self._collision_penalty if self._terminate_on_collision: done = True if self._is_success: r = self._completion_bonus info["r"] = r info["d"] = done return obs, r, done, info
def reset(self): utils.reset_mocap_welds(self.mj_sim) utils.reset_mocap2body_xpos(self.mj_sim)