def main(): # env = gym.make('HandPickAndPlaceDense-v0') env = gym.make( 'HandPickAndPlace-v0', ignore_rotation_ctrl=True, ignore_target_rotation=True, success_on_grasp_only=True, randomize_initial_arm_pos=True, randomize_initial_object_pos=True, # grasp_state=True, # grasp_state_reset_p=0.5, object_cage=True, object_id='teapot' ) obs = env.reset() env.render() sim = env.unwrapped.sim add_selection_logger(env.unwrapped.viewer, sim) print('nconmax:', sim.model.nconmax) print('obs.shape:', obs['observation'].shape) global selected_action p = Thread(target=action_thread) # p.start() selected_action = np.zeros(27) for i in it.count(): for j in range(6): env.reset() # for val in np.linspace(-1, 1, 60): while True: env.render() action = selected_action.copy() action[-7:] *= 0.2 selected_action[-7:] *= 0.0 render_pose(env.unwrapped.get_table_surface_pose(), env.unwrapped.viewer, unique_id=535) rew, done = env.step(action)[1:3]
def _move_arms(self, *, left_target: np.ndarray, right_target: np.ndarray, left_yaw=0.0, right_yaw=0.0, pos_threshold=0.02, rot_threshold=0.1, k=2.0, max_steps=100, count_stable_steps=False, targets_relative_to=None, left_grp_config=-1.0, right_grp_config=-1.0): targets = {'l': left_target, 'r': right_target} yaws = {'l': left_yaw, 'r': right_yaw} stable_steps = 0 prev_rel_pos = np.zeros(3) u = np.zeros(self.sim_env.action_space.shape) prev_err_l = np.zeros(7) prev_err_r = np.zeros(7) max_pos_err = -np.inf for i in range(max_steps): grasp_center_pos = np.zeros(3) max_rot_err = -np.inf max_pos_err = -np.inf for arm_i, arm in enumerate(('l', 'r')): curr_pose = self.get_gripper_pose(arm) curr_q = self.get_arm_config(arm) if arm == 'l': pitch = np.pi - 0.9 u_masked = u[:7] prev_err = prev_err_l else: pitch = np.pi - 0.9 u_masked = u[8:15] prev_err = prev_err_r if callable(targets_relative_to): reference = targets_relative_to() target_pos = tf.apply_tf(targets[arm], reference)[:3] else: target_pos = targets[arm] target_pose = np.r_[target_pos, tf.rotations.euler2quat(np.r_[0., 0., yaws[arm]])] target_pose = tf.apply_tf( np.r_[0., 0., 0., tf.rotations.euler2quat(np.r_[pitch, 0., 0.])], target_pose) grasp_center_pos += curr_pose[:3] max_pos_err = max( max_pos_err, np.abs(curr_pose[:3] - target_pose[:3]).max()) max_rot_err = max( max_rot_err, tf.quat_angle_diff(curr_pose[3:], target_pose[3:])) target_q = self.sim_env.mocap_ik(target_pose - curr_pose, arm) u_masked[:] = self._controller(curr_q - target_q, prev_err, k) if self.render_substeps: tf.render_pose(target_pos.copy(), self.viewer, label=f"{arm}_p", unique_label=True) tf.render_pose(target_pose.copy(), self.viewer, label=f"{arm}_t", unique_label=True) tf.render_pose(curr_pose.copy(), self.viewer, label=f"{arm}", unique_label=True) grasp_center_pos /= 2.0 u[7] = left_grp_config u[15] = right_grp_config u = np.clip(u, self.sim_env.action_space.low, self.sim_env.action_space.high) self.sim_env.step(u) if self.render_substeps: tf.render_pose(grasp_center_pos, self.viewer, label="grasp_center", unique_id=5554) self.render(keep_markers=True) if max_pos_err < pos_threshold and max_rot_err < rot_threshold: break if count_stable_steps: obj_pos = self.get_object_pos() rel_pos = obj_pos - grasp_center_pos still = prev_rel_pos is not None and np.all( np.abs(rel_pos - prev_rel_pos) < 0.002) obj_above_table = len( self.sim_env.get_object_contact_points( other_body='table')) == 0 if still and obj_above_table: stable_steps += 1 elif i > 10: break prev_rel_pos = rel_pos if count_stable_steps: return stable_steps return max_pos_err
def _move_arms_mocap(self, *, left_target: np.ndarray, right_target: np.ndarray, left_yaw=0.0, right_yaw=0.0, pos_threshold=0.02, rot_threshold=0.1, k=1.0, max_steps=1, left_grp_config=-1.0, right_grp_config=-1.0): targets = {'l': left_target, 'r': right_target} yaws = {'l': left_yaw, 'r': right_yaw} self.sim.model.eq_active[:] = 1 for i in range(max_steps): grasp_center_pos = np.zeros(3) max_rot_err = -np.inf max_pos_err = -np.inf d_above_table = self.get_object_pos( )[2] - self.sim_env._object_z_offset grp_xrot = 0.9 + d_above_table * 2.0 mocap_a = np.zeros((self.sim.model.nmocap, 7)) for arm_i, arm in enumerate(('l', 'r')): curr_pose = self.get_gripper_pose(arm) pitch = np.pi - grp_xrot target_pos = targets[arm] target_pose = np.r_[target_pos, tf.rotations.euler2quat(np.r_[0., 0., yaws[arm]])] target_pose = tf.apply_tf( np.r_[0., 0., 0., tf.rotations.euler2quat(np.r_[pitch, 0., 0.])], target_pose) grasp_center_pos += curr_pose[:3] if max_steps > 1: max_pos_err = max( max_pos_err, np.abs(curr_pose[:3] - target_pose[:3]).max()) max_rot_err = max( max_rot_err, tf.quat_angle_diff(curr_pose[3:], target_pose[3:])) mocap_a[arm_i] = target_pose - curr_pose if self.viewer is not None and self.render_poses: tf.render_pose(target_pos.copy(), self.viewer, label=f"{arm}_p", unique_label=True) tf.render_pose(target_pose.copy(), self.viewer, label=f"{arm}_t", unique_label=True) tf.render_pose(curr_pose.copy(), self.viewer, label=f"{arm}", unique_label=True) grasp_center_pos /= 2.0 # set config of grippers u = np.zeros(self.sim_env.action_space.shape) u[7] = left_grp_config u[15] = right_grp_config self.sim_env._set_action(u) # set pose of grippers _mocap_set_action(self.sim, mocap_a * k) # take simulation step self.sim.step() self.sim_env._step_callback() if self.viewer is not None: if self.render_poses: tf.render_pose(grasp_center_pos, self.viewer, label="grasp_center", unique_id=5554) self.render(keep_markers=True) if max_pos_err < pos_threshold and max_rot_err < rot_threshold: break
def main(): env = gym.make('YumiConstrained-v2', reward_type='sparse', render_poses=False, has_rotating_platform=False, has_button=False, has_object_box=False, object_id="fetch_sphere") raw_env = env.unwrapped sim = raw_env.sim env.render() add_selection_logger(raw_env.viewer, sim) agent = YumiConstrainedAgent(env) done = True obs = None steps_to_success = [] n_steps = 0 reachability = np.zeros((2, 3)) reachability[0] = np.inf reachability[1] = -np.inf unreachable_eps = 0 tot_eps = 0 for i in it.count(): if done: obs = env.reset() n_steps = 0 tot_eps += 1 # center_pos = obs['observation'][:3] # achieved_goal = obs['achieved_goal'] # err = achieved_goal - center_pos # # u = np.zeros(env.action_space.shape) # # u[1:4] = err * 10.0 u = env.action_space.sample() u[0] = -1 if i // 8 % 2 == 0 else 1 u[1:] = 0. u = agent.predict(obs) render_pose(env.unwrapped.get_table_surface_pose(), env.unwrapped.viewer, unique_id=535) obs, rew, done, _ = env.step(u) # done = False n_steps += 1 env.render() if rew == 0.0: steps_to_success.append(n_steps) arr = np.asarray(steps_to_success) # print('min', arr.min(), 'max', arr.max(), 'avg', arr.mean(), 'std', arr.std()) done = True goal = obs['desired_goal'].copy() reachability[0] = np.minimum(reachability[0], goal) reachability[1] = np.maximum(reachability[1], goal) # print(reachability) # print(unreachable_eps / tot_eps) # print() elif done: unreachable_eps += 1
def predict(self, obs, **kwargs): u = np.zeros(self._env.action_space.shape) new_goal = obs['desired_goal'] if self._goal is None or np.any(self._goal != new_goal): self._reset(new_goal) # TFDebugger.reset() # TFDebugger.step(self._raw_env.viewer) curr_grp_poses = { a: np.r_[self._raw_env.sim.data.get_site_xpos(f'gripper_{a}_center'), rotations.mat2quat( self._raw_env.sim.data. get_site_xmat(f'gripper_{a}_center')), ] for a in ('l', 'r') } pos_errors = [] for arm in ('l', 'r'): if arm == 'l': solver = self._ikp_solver_l jac_solver = self._jac_solver_l joint_lims = self._raw_env.arm_l_joint_lims achieved_pos = obs['observation'][32:35] obj_l_rel_pos = obs['observation'][44:47] obj_pos = obj_l_rel_pos + achieved_pos target_pose = np.r_[obj_pos, np.pi - 0.9, 0.01, 0.01] target_pose = np.r_[target_pose[:3], rotations.euler2quat(target_pose[3:])] if self._phase == 3: target_pose[:3] = self._raw_env.sim.data.get_site_xpos( 'target0:left').copy() q = rotations.mat2quat( self._raw_env.sim.data.get_site_xmat('target0:left')) target_pose[3:] = rotations.quat_mul(target_pose[3:], q) prev_err = self._prev_err_l curr_q = obs['observation'][:7] u_masked = u[:7] elif arm == 'r': solver = self._ikp_solver_r jac_solver = self._jac_solver_r joint_lims = self._raw_env.arm_r_joint_lims achieved_pos = obs['observation'][35:38] obj_r_rel_pos = obs['observation'][47:50] obj_pos = obj_r_rel_pos + achieved_pos target_pose = np.r_[obj_pos, -np.pi + 0.9, 0.01, np.pi] target_pose = np.r_[target_pose[:3], rotations.euler2quat(target_pose[3:])] if self._phase == 3: target_pose[:3] = self._raw_env.sim.data.get_site_xpos( 'target0:right').copy() q = rotations.mat2quat( self._raw_env.sim.data.get_site_xmat('target0:right')) target_pose[3:] = rotations.quat_mul(q, target_pose[3:]) prev_err = self._prev_err_r curr_q = obs['observation'][16:23] u_masked = u[8:15] else: continue if self._phase == 0: u[7] = -1.0 u[15] = -1.0 target_pose[2] += 0.1 elif self._phase == 1: u[7] = -1.0 u[15] = -1.0 target_pose[2] -= 0.002 elif self._phase == 2: u[7] = -1 + self._phase_steps / 3. u[15] = -1 + self._phase_steps / 3. elif self._phase == 3: u[7] = 1.0 u[15] = 1.0 if self._raw_env.viewer is not None: tf.render_pose(target_pose.copy(), self._raw_env.viewer) curr_pose = curr_grp_poses[arm].copy() err_pose = curr_pose - target_pose err_pos = np.linalg.norm(err_pose[:3]) pos_errors.append(err_pos) if self.use_mocap_ctrl: if self._phase < 1: controller_k = 3.0 else: controller_k = 2.5 err_rot = tf.quat_angle_diff(curr_pose[3:], target_pose[3:]) target_q = self._raw_env.mocap_ik(-err_pose, arm) else: controller_k = 0.1 err_rot = 0.0 target_pose[:3] -= self._base_offset if self.use_qp_solver: if not self.check_joint_limits: joint_lims = None curr_pose[:3] -= self._base_offset target_q = self._position_ik_qp(curr_pose, target_pose, curr_q, jac_solver, joint_lims) else: target_q = self._position_ik(target_pose, curr_q, solver) if target_q is not None: err_q = curr_q - target_q u_masked[:] = self._controller(err_q, prev_err, controller_k) self._phase_steps += 1 if self._phase == 0 and np.all( np.array(pos_errors) < 0.03) and err_rot < 0.05: self._phase = 1 self._phase_steps = 0 elif self._phase == 1 and np.all( np.array(pos_errors) < 0.007) and err_rot < 0.05: self._phase = 2 self._phase_steps = 0 elif self._phase == 2: if self._phase_steps > 6: self._phase = 3 self._phase_steps = 0 self._locked_l_to_r_tf = tf.get_tf(curr_grp_poses['r'], curr_grp_poses['l']) u = np.clip(u, self._env.action_space.low, self._env.action_space.high) return u
def predict(self, obs, **kwargs): u = np.zeros(self._env.action_space.shape) new_goal = obs['desired_goal'] if self._goal is None or np.any(self._goal != new_goal): self.reset(new_goal) obj_radius = self._object_size[0] obj_achieved_alt = obs['achieved_goal'].item() obj_achieved_pose = obs['observation'][44:51] if np.all(obj_achieved_pose[3:] == 0): obj_achieved_pose[3] = 1.0 # tf.render_pose(obj_achieved_pose, self._raw_env.viewer, label="O") grp_xrot = 0.9 + obj_achieved_alt * 2.0 curr_grp_poses = { a: np.r_[self._raw_env.sim.data.get_site_xpos(f'gripper_{a}_center'), rotations.mat2quat( self._raw_env.sim.data. get_site_xmat(f'gripper_{a}_center')), ] for a in ('l', 'r') } pos_errors = [] for arm in ('l', 'r'): if arm == 'l': transf = np.r_[0., obj_radius, 0., 1., 0., 0., 0.] if self._phase == 3: transf[1] *= 0.9 transf[2] = 0.05 pose = tf.apply_tf(transf, obj_achieved_pose) # tf.render_pose(pose, self._raw_env.viewer, label="L") grp_target_pos = pose[:3] grp_target_rot = np.r_[np.pi - grp_xrot, 0.01, 0.01] target_pose = np.r_[grp_target_pos, rotations.euler2quat(grp_target_rot)] prev_err = self._prev_err_l curr_q = obs['observation'][:7] u_masked = u[:7] elif arm == 'r': transf = np.r_[0., -obj_radius, 0., 1., 0., 0., 0.] if self._phase == 3: transf[1] *= 0.9 transf[2] = 0.05 pose = tf.apply_tf(transf, obj_achieved_pose) # tf.render_pose(pose, self._raw_env.viewer, label="R") grp_target_pos = pose[:3] grp_target_rot = np.r_[-np.pi + grp_xrot, 0.01, np.pi] target_pose = np.r_[grp_target_pos, rotations.euler2quat(grp_target_rot)] prev_err = self._prev_err_r curr_q = obs['observation'][16:23] u_masked = u[8:15] else: continue u[7] = -1.0 u[15] = -1.0 if self._phase == 0: target_pose[2] += 0.1 target_pose[1] += 0.05 * np.sign(target_pose[1]) elif self._phase == 1: target_pose[2] += 0.01 target_pose[1] += 0.05 * np.sign(target_pose[1]) elif self._phase == 2: target_pose[2] += 0.01 elif self._phase == 3: target_pose[2] += 0.00 if self._raw_env.viewer is not None: tf.render_pose(target_pose.copy(), self._raw_env.viewer) curr_pose = curr_grp_poses[arm].copy() err_pose = curr_pose - target_pose err_pos = np.linalg.norm(err_pose[:3]) pos_errors.append(err_pos) controller_k = 2.0 err_rot = tf.quat_angle_diff(curr_pose[3:], target_pose[3:]) target_q = self._raw_env.mocap_ik(-err_pose, arm) err_q = curr_q - target_q u_masked[:] = self._controller(err_q, prev_err, controller_k) self._phase_steps += 1 if self._phase == 0 and np.all( np.array(pos_errors) < 0.03) and err_rot < 0.1: self._phase = 1 self._phase_steps = 0 elif self._phase == 1 and np.all( np.array(pos_errors) < 0.03) and err_rot < 0.1: self._phase = 2 self._phase_steps = 0 elif self._phase == 2: if self._phase_steps > 30: self._phase = 3 self._phase_steps = 0 u = np.clip(u, self._env.action_space.low, self._env.action_space.high) return u
def predict(self, obs, **kwargs): u = np.zeros(self._env.action_space.shape) new_goal = obs['desired_goal'] if self._goal is None or np.any(self._goal != new_goal): self.reset(new_goal) grasp_center_pos = obs['observation'][:3] object_pos = obs['observation'][18:21] object_rel_pos = obs['observation'][24:27] c_points = self._raw_env.sim_env.get_object_contact_points() # button_tf = tf.get_tf( # np.r_[self._raw_env.sim.data.get_geom_xpos('button_geom'), 1., 0., 0., 0.], # self._raw_env.get_table_surface_pose() # ) # print('button_tf', button_tf) if self._raw_env.sim_env.has_button and self._phase < 2: if np.linalg.norm(object_pos) > 0.55: button_pos = self._raw_env.sim.data.get_geom_xpos( 'button_geom') tf.render_pose(np.r_[button_pos, 1., 0., 0., 0.], self._raw_env.viewer) err = button_pos - grasp_center_pos u[0] = -1. u[1:4] = err * 5.0 else: u[0] = 0. u[3] = 0.5 self._phase_steps += 1 if self._phase_steps > 5: self._phase = 2 self._phase_steps = 0 elif self._raw_env.sim_env.has_rotating_platform: if np.linalg.norm(object_pos[:2]) > 0.12: # FIXME far_end_pose = np.r_[ self._raw_env.sim.data. get_site_xpos('rotating_platform:far_end'), tf.rotations.mat2quat( self._raw_env.sim.data. get_site_xmat('rotating_platform:far_end')), ] close_end_pose = tf.apply_tf( np.r_[-0.5, -0.02, 0., 1., 0., 0., 0.], far_end_pose) tf.render_pose(close_end_pose, self._raw_env.viewer) push_target = np.r_[0.2, 0.2, 0., 1., 0., 0., 0.] # push_target = tf.apply_tf(np.r_[0., 0.1, 0., 1., 0., 0., 0.], close_end_pose) tf.render_pose(push_target, self._raw_env.viewer) err = close_end_pose[:3] - grasp_center_pos if self._phase == 0: if np.linalg.norm(err) > 0.01: u[0] = 1. u[1:4] = err * 5.0 else: self._phase += 1 self._phase_steps = 0 if self._phase == 1: u[0] = 1. u[1:4] = (push_target[:3] - grasp_center_pos) * 0.5 self._phase_steps += 1 if self._phase_steps > 80: self._phase += 1 self._phase_steps = 0 elif self._phase < 2: self._phase = 2 self._phase_steps = 0 elif self._phase < 2: self._phase = 2 self._phase_steps = 0 if self._phase == 2: if np.linalg.norm(object_rel_pos) > 0.01: u[0] = 0.0 u[1:4] = object_rel_pos * 5.0 else: self._phase += 1 self._phase_steps = 0 if self._phase == 3: if len(c_points) < 3: u[0] = -self._phase_steps / 10.0 self._phase_steps += 1 else: self._phase += 1 self._phase_steps = 0 if self._phase == 4: if len(c_points) > 2: u[0] = -1.0 u[1:4] = (new_goal - object_pos) * 2.0 else: self._phase = 0 self._phase_steps = 0 return u
def predict(self, obs, **kwargs): goal = obs['desired_goal'] if self._goal is None or np.any(goal != self._goal): self._goal = goal.copy() self._phase = -1 self._last_disp = None grp_pos = obs['observation'][:3] object_pos = obs['observation'][3:6] # object_rel_pos = obs['observation'][6:9] raw_env = self._env.unwrapped table_tf = raw_env.get_table_surface_pose() # button_tf = tf.get_tf( # np.r_[raw_env.sim.data.get_geom_xpos('button_geom'), 1., 0., 0., 0.], # table_tf # ) # print('button_tf', button_tf) if raw_env.has_button and self._phase < 0: u = np.zeros(4) if np.linalg.norm(object_pos[:2] - table_tf[:2]) > 0.38: button_pos = raw_env.sim.data.get_geom_xpos('button_geom') tf.render_pose(np.r_[button_pos, 1., 0., 0., 0.], raw_env.viewer) err = button_pos - grp_pos u[3] = -1. u[:3] = err * 5.0 return u else: self._phase = 0 elif self._phase < 0: self._phase = 0 disp = self._goal[:2] - object_pos[:2] if self._last_disp is None or np.linalg.norm(disp) > 0.05: self._last_disp = disp.copy() disp_dir = self._last_disp / np.linalg.norm(self._last_disp) push_goal_pos = object_pos - np.r_[disp_dir, 0.] * 0.06 if self._phase == 0: push_goal_pos[2] = 0.5 elif self._phase == 2: push_goal_pos[:2] += np.clip(disp * 1.7, -0.08, 0.08) grp_disp = push_goal_pos - grp_pos action = np.zeros(4) action[-1] = -1.0 if np.linalg.norm(disp) < 0.01: return action action[:2] = grp_disp[:2] * 8. action[2] = grp_disp[2] * 8. if np.linalg.norm(grp_disp) < 0.01: if self._phase == 0: self._phase = 1 elif self._phase == 1: self._phase = 2 return action
def predict(self, obs, **kwargs): # Modified from original implementation in OpenAI's baselines: # https://github.com/openai/baselines/blob/master/baselines/her/experiment/data_generation/fetch_data_generation.py goal = obs['desired_goal'] if self._goal is None or np.any(goal != self._goal): self._goal = goal.copy() self._phase = -1 self._phase_steps = 0 grasp_center_pos = obs['observation'][:3] object_pos = obs['observation'][3:6] object_rel_pos = obs['observation'][6:9] object_oriented_goal = object_rel_pos.copy() object_oriented_goal[ 2] += 0.03 # first make the gripper go slightly above the object raw_env = self._env.unwrapped table_tf = raw_env.get_table_surface_pose() if raw_env.has_button and self._phase < 2: u = np.zeros(4) if np.linalg.norm(object_pos[:2] - table_tf[:2]) > 0.38: button_pos = raw_env.sim.data.get_geom_xpos('button_geom') tf.render_pose(np.r_[button_pos, 1., 0., 0., 0.], raw_env.viewer) err = button_pos - grasp_center_pos u[3] = -1. u[:3] = err * 5.0 return u else: u[3] = 0. u[2] = 0.5 self._phase_steps += 1 if self._phase_steps > 5: self._phase = 2 self._phase_steps = 0 else: return u elif raw_env.has_rotating_platform: if np.linalg.norm(object_pos[:2] - table_tf[:2]) > 0.22: # FIXME far_end_pose = np.r_[ raw_env.sim.data.get_site_xpos('rotating_platform:far_end' ), tf.rotations.mat2quat( raw_env.sim.data. get_site_xmat('rotating_platform:far_end')), ] close_end_pose = tf.apply_tf( np.r_[-0.5, -0.09, 0., 1., 0., 0., 0.], far_end_pose) tf.render_pose(close_end_pose, raw_env.viewer, label="close_end") # push_target = np.r_[0.2, 0.2, 0., 1., 0., 0., 0.] # push_target = tf.apply_tf(table_tf, push_target) push_target = tf.apply_tf(np.r_[0., 0.1, 0., 1., 0., 0., 0.], close_end_pose) tf.render_pose(push_target, raw_env.viewer) if self._phase == -1: close_end_pose[2] += 0.05 err = close_end_pose[:3] - grasp_center_pos u = np.zeros(4) if self._phase < 1: if np.linalg.norm(err) > 0.015: u[3] = -1. u[:3] = err * 20.0 else: self._phase += 1 self._phase_steps = 0 if self._phase == 1: u[3] = -1. u[:3] = (push_target[:3] - grasp_center_pos) * 10 self._phase_steps += 1 if self._phase_steps > 80: self._phase += 1 self._phase_steps = 0 return u elif self._phase < 2: self._phase = 2 self._phase_steps = 0 elif self._phase < 2: self._phase = 2 self._phase_steps = 0 if np.abs(table_tf[2] - grasp_center_pos[2]) < 0.07 and self._phase == 2: action = [0, 0, 0.4, 0.05] return action elif self._phase == 2: self._phase = 3 self._phase_steps = 0 if np.linalg.norm(object_oriented_goal) >= 0.005 and self._phase == 3: action = [0, 0, 0, 0] object_oriented_goal = object_rel_pos.copy() object_oriented_goal[2] += 0.03 for i in range(len(object_oriented_goal)): action[i] = object_oriented_goal[i] * 6 action[len(action) - 1] = 0.05 return action elif self._phase == 3: self._phase = 4 self._phase_steps = 0 if np.linalg.norm(object_rel_pos) >= 0.005 and self._phase == 4: action = [0, 0, 0, 0] for i in range(len(object_rel_pos)): action[i] = object_rel_pos[i] * 6 action[len(action) - 1] = -0.2 return action elif self._phase == 4: self._phase = 5 self._phase_steps = 0 if np.linalg.norm(goal - object_pos) >= 0.01 and self._phase == 5: action = [0, 0, 0, 0] for i in range(len(goal - object_pos)): action[i] = (goal - object_pos)[i] * 6 action[len(action) - 1] = -0.2 return action elif self._phase == 5: self._phase = 6 self._phase_steps = 0 if self._phase == 6: action = [0, 0, 0, 0] action[len(action) - 1] = -0.2 # keep the gripper closed return action
def _step(self, action: np.ndarray): action = np.clip(action, self.action_space.low, self.action_space.high) fingers_pos_wrt_obj = action[:(3 * 5)].reshape( -1, 3) * np.r_[0.03, 0.03, 0.03] if self.task == HandSteppedTask.LIFT_ABOVE_TABLE: arm_pos_wrt_world = np.zeros(3) elif self.task == HandSteppedTask.PICK_AND_PLACE: arm_pos_wrt_world = action[(3 * 5):] else: raise NotImplementedError arm_bounds = np.array(self.sim_env.forearm_bounds).T if self.render_substeps: tf.render_box(self.viewer, bounds=arm_bounds) arm_pos_wrt_world *= np.abs(arm_bounds[:, 1] - arm_bounds[:, 0]) / 2.0 arm_pos_wrt_world += arm_bounds.mean(axis=1) obj_pose = self.sim_env._get_object_pose() obj_on_ground = obj_pose[2] < 0.37 fingers_pos_targets = np.array([ tf.apply_tf(np.r_[transf, 1., 0., 0., 0.], obj_pose) for transf in fingers_pos_wrt_obj ]) self.sim.model.eq_active[1:] = 0 # self.sim.data.mocap_pos[1:] = fingers_pos_targets[:, :3] if self.render_substeps: tf.render_pose(arm_pos_wrt_world.copy(), self.sim_env.viewer, label='arm_t') # for i, f in enumerate(fingers_pos_targets): # tf.render_pose(f, self.sim_env.viewer, label=f'f_{i}') pregrasp_palm_target = fingers_pos_targets[:, :3].mean(axis=0) pregrasp_palm_target = np.r_[pregrasp_palm_target, 1., 0., 0., 0.] pregrasp_palm_target = tf.apply_tf( np.r_[-0.01, 0., 0.015, 1., 0., 0., 0.], pregrasp_palm_target)[:3] # move hand if len(self.sim_env.get_object_contact_points( other_body='robot')) == 0: hand_action = np.r_[0., -.5, -np.ones(18)] hand_action[15:] = (-1., -0.5, 1., -1., 0) self._move_arm(pregrasp_palm_target, hand_action=hand_action) # move fingers self._move_fingers(fingers_pos_targets, max_steps=30) # move arm stable_steps = self._move_arm(arm_pos_wrt_world, count_stable_steps=True) done = obj_on_ground obs = self._get_obs() if self.task == HandSteppedTask.LIFT_ABOVE_TABLE: info = dict() reward = stable_steps elif self.task == HandSteppedTask.PICK_AND_PLACE: info = { 'is_success': self.sim_env._is_success(obs['achieved_goal'], self.goal) } dist_to_goal = np.linalg.norm(self.goal[:3] - self.sim_env._get_object_pose()[:3]) dist_reward = 100 / ( 1 + dist_to_goal * _smooth_step(dist_to_goal) * 100 ) # range: [0, 100] alpha = 0.25 reward = dist_reward * alpha + stable_steps * (1 - alpha) else: raise NotImplementedError return obs, reward, done, info
def predict(self, obs, **kwargs): if self._goal is None or np.any(self._goal != obs['desired_goal']): self._reset(obs) wrist_noise = 0. arm_pos_noise = 0. fingers_noise = 0. strategy = self._strategy action = np.zeros(self._env.action_space.shape) obj_pos = obs['achieved_goal'][:3] d = obj_pos - self._env.unwrapped._get_grasp_center_pose( no_rot=True)[:3] reached = np.linalg.norm(d) < 0.05 on_palm = False dropped = obj_pos[2] < 0.38 if dropped: return action * 0.0 if reached: contacts = self._env.unwrapped.get_object_contact_points() palm_contacts = len([ x for x in contacts if 'palm' in x['body1'] or 'palm' in x['body2'] ]) on_palm = palm_contacts > 0 if strategy == 0: wrist_ctrl = -1.0 self._hand_ctrl[:] = -1.0 self._hand_ctrl[[0, 3]] = 1.0 self._hand_ctrl[[6, 9]] = -1.0 self._hand_ctrl[13:] = (-1., -0.5, 1., -1., 0) d += np.r_[0., -0.030, 0.0] still = np.linalg.norm(d - self._prev_d) < 0.002 if self._grasp_steps > 10: still = np.linalg.norm(d - self._prev_d) < 0.005 self._prev_d = d.copy() arm_pos_ctrl = d * 1.0 if on_palm or still: self._hand_ctrl[:] = 1.0 self._hand_ctrl[[0, 3]] = 1.0 self._hand_ctrl[[6, 9]] = -1.0 self._hand_ctrl[13:] = (0.1, 0.5, 1., -1., 0) arm_pos_ctrl *= 0.0 self._grasp_steps += 1 if self._grasp_steps > 10: d = obs['desired_goal'][:3] - obs['achieved_goal'][:3] arm_pos_ctrl = d * 0.5 else: self._grasp_steps = 0 elif strategy in [1, 2]: d += np.r_[0., -0.035, 0.025] reached = np.linalg.norm(d) < 0.02 if self._grasp_steps > 10: reached = np.linalg.norm(d) < 0.04 wrist_ctrl = 0.0 self._hand_ctrl[:] = -1.0 self._hand_ctrl[13:] = (-1., 1., 1., -1., -1.) arm_pos_ctrl = d * 1.0 if reached: arm_pos_ctrl *= 0.0 self._grasp_steps += 1 if strategy == 1: self._hand_ctrl[13:] = (-0.5, 1., 1., -1., -1.) self._hand_ctrl[4] = 0.6 self._hand_ctrl[5] = 0.5 if strategy == 2: self._hand_ctrl[13:] = (-0.1, 1., 1., -1., -1.) self._hand_ctrl[6] = -0.7 self._hand_ctrl[7] = 0.6 self._hand_ctrl[8] = 0.5 if self._grasp_steps > 10: d = obs['desired_goal'][:3] - obs['achieved_goal'][:3] arm_pos_ctrl = d * 0.5 else: self._grasp_steps = 0 elif strategy == 3: wrist_ctrl = -0.5 d = np.zeros(3) self._hand_ctrl[:] = -1.0 self._hand_ctrl[13:] = (0.2, -0.2, 1., -1., 1.) obj_pose = self._env.unwrapped._get_object_pose() thdistal_pos = self._env.unwrapped.sim.data.get_body_xpos( 'robot0:thdistal') grasp_pose = tf.apply_tf( np.r_[0.015, -0.10, 0.075, 1., 0., 0., 0.], obj_pose) pregrasp_pose = tf.apply_tf(np.r_[-0.08, 0., 0., 1., 0., 0., 0.], grasp_pose) viewer = self._env.unwrapped.viewer if viewer is not None: tf.render_pose(obj_pose, viewer, size=0.4) tf.render_pose(grasp_pose, viewer, size=0.2) tf.render_pose(pregrasp_pose, viewer, size=0.2) k = 2.0 d_thresh = 0.008 wrist_noise = 0.1 arm_pos_noise = 0.01 fingers_noise = 0.2 if self._phase == 0: d = pregrasp_pose[:3] - thdistal_pos d[2] = 0.0 arm_pos_noise = 0.05 d_thresh = 0.02 elif self._phase == 1: d = pregrasp_pose[:3] - thdistal_pos arm_pos_noise = 0.01 d_thresh = 0.01 elif self._phase == 2: d = grasp_pose[:3] - thdistal_pos fingers_noise = 0.05 elif self._phase == 3: fingers_noise = 0.05 self._hand_ctrl[:] = 1.0 self._hand_ctrl[13:] = (0.1, 0.5, 1., -1., 0) self._grasp_steps += 1 if self._grasp_steps > 5: d = obs['desired_goal'][:3] - obs['achieved_goal'][:3] if self._phase < 3 and np.linalg.norm(d) < d_thresh: self._phase += 1 self._grasp_steps = 0 arm_pos_ctrl = d * k else: raise NotImplementedError action[1] = wrist_ctrl + np.random.randn() * wrist_noise action[-7:-4] = arm_pos_ctrl + np.random.randn( *arm_pos_ctrl.shape) * arm_pos_noise action[2:-7] = self._hand_ctrl + np.random.randn( *self._hand_ctrl.shape) * fingers_noise action = np.clip(action, self._env.action_space.low, self._env.action_space.high) return action