def _align_goal(self): tf_to_goal = tf.get_tf(np.r_[self._goal, 1., 0., 0., 0.], self.s_table_tf) t_goal_pose = tf.apply_tf(tf_to_goal, self.t_table_tf) self.teacher_env.unwrapped.goal[:3] = t_goal_pose[:3] if self._env.unwrapped.sim_env.has_button: pos = self._env.unwrapped.sim_env._object_xy_pos_to_sync.copy() self.teacher_env.unwrapped.sync_object_init_pos(pos, wrt_table=True, now=False) elif not self._env.unwrapped.sim_env.has_rotating_platform: tf_to_obj = tf.get_tf(self._env.unwrapped.get_object_pose(), self.s_table_tf) t_obj_pose = tf.apply_tf(tf_to_obj, self.t_table_tf) joint_name = self.teacher_obj_name + ":joint" object_pos = self.teacher_env.unwrapped.sim.data.get_joint_qpos( joint_name).copy() object_pos[:2] = t_obj_pose[:2] self.teacher_env.unwrapped.sim.data.set_joint_qpos( joint_name, object_pos) self.teacher_env.unwrapped.sim.forward() self.teacher_agent.reset()
def _sync_goals(*, t_env, s_env, **kwargs_): tf_to_goal = tf.get_tf(np.r_[s_env.goal, 1., 0., 0., 0.], s_table_tf) t_goal_pose = tf.apply_tf(tf_to_goal, t_table_tf) t_env.goal = np.r_[t_goal_pose[:3], np.zeros(4)] tf_to_obj = tf.get_tf(s_env.get_object_pose(), s_table_tf) t_obj_pose = tf.apply_tf(tf_to_obj, t_table_tf) object_pos = t_env.sim.data.get_joint_qpos('object:joint').copy() object_pos[:2] = t_obj_pose[:2] t_env.sim.data.set_joint_qpos('object:joint', object_pos) t_env.sim.forward() return dict(s_obs=s_env._get_obs(), t_obs=t_env._get_obs())
def sync_goal(self, goal: np.ndarray, wrt_table=False): goal = goal.copy() if wrt_table: if goal.size == 3: goal = np.r_[goal, 1., 0., 0., 0.] goal = tf.apply_tf(goal, self.get_table_surface_pose()) self.goal = goal[:3] else: self.goal = goal[:3]
def sync_object_init_pos(self, pos: np.ndarray, wrt_table=False, now=False): assert pos.size == 2 if wrt_table: pose = tf.apply_tf(np.r_[pos, 0., 1., 0., 0., 0.], self.get_table_surface_pose()) self._object_xy_pos_to_sync = pose[:2] else: self._object_xy_pos_to_sync = pos.copy() if now: object_qpos = self.sim.data.get_joint_qpos('object0:joint').copy() object_qpos[:2] = self._object_xy_pos_to_sync self.sim.data.set_joint_qpos('object0:joint', object_qpos) self.sim.forward()
def step(self, action: np.ndarray): def failure(): return self._get_obs(), 0.0, True, dict() action = np.clip(action, self.action_space.low, self.action_space.high) action = action.reshape(-1, 4) grippers_pos_wrt_obj = action[:, :3] * self.get_task_space() grippers_conf = action[:, 3] obj_pose = self.get_object_pose() grippers_pos_targets = np.array([ tf.apply_tf(np.r_[transf, 1., 0., 0., 0.], obj_pose)[:3] for transf in grippers_pos_wrt_obj ]) vec = grippers_pos_targets[0, :2] - grippers_pos_targets[1, :2] grasp_radius = np.linalg.norm(vec, ord=2) / 2.0 right_yaw = np.arctan2(vec[1], vec[0]) + np.pi / 2 left_yaw = np.arctan2(-vec[1], -vec[0]) + np.pi / 2 left_target_pose = np.r_[grippers_pos_targets[0], tf.rotations.euler2quat(np.r_[0, 0, left_yaw])] right_target_pose = np.r_[grippers_pos_targets[1], tf.rotations.euler2quat(np.r_[0, 0, right_yaw])] # - move arms to pregrasp1 pose # targets are farther and higher wrt object center max_pos_err = self._move_arms( left_target=tf.apply_tf(np.r_[0., 0.05, 0.1], left_target_pose)[:3], left_yaw=left_yaw, right_target=tf.apply_tf(np.r_[0., 0.05, 0.1], right_target_pose)[:3], right_yaw=right_yaw, left_grp_config=grippers_conf[0], right_grp_config=grippers_conf[1], max_steps=120, ) if max_pos_err > 0.05 or self.is_object_unreachable(): return failure() # - move arms to pregrasp2 pose # targets are farther and but aligned to object center max_pos_err = self._move_arms( left_target=tf.apply_tf(np.r_[0., 0.05, 0.01], left_target_pose)[:3], left_yaw=left_yaw, right_target=tf.apply_tf(np.r_[0., 0.05, 0.01], right_target_pose)[:3], right_yaw=right_yaw, left_grp_config=grippers_conf[0], right_grp_config=grippers_conf[1], max_steps=50, ) if max_pos_err > 0.05 or self.is_object_unreachable(): return failure() # - move arms to grasp pose # targets are as specified by the agent self._move_arms( left_target=tf.apply_tf(np.r_[0., 0., 0.01], left_target_pose)[:3], left_yaw=left_yaw, right_target=tf.apply_tf(np.r_[0., 0., 0.01], right_target_pose)[:3], right_yaw=right_yaw, left_grp_config=grippers_conf[0], right_grp_config=grippers_conf[1], max_steps=40, ) if self.is_object_unreachable(): return failure() # - lift # targets are as specified by the agent but a bit higher to go up stable_steps = self._move_arms( left_target=np.r_[0., grasp_radius * 0.9, 0.05], left_yaw=left_yaw, right_target=np.r_[0., -grasp_radius * 0.9, 0.05], right_yaw=right_yaw, count_stable_steps=True, targets_relative_to=self.get_object_pos, left_grp_config=grippers_conf[0], right_grp_config=grippers_conf[1], max_steps=200, ) obs = self._get_obs() reward = float(stable_steps) done = self.is_object_unreachable() info = dict() return obs, reward, done, info
def _move_arms(self, *, left_target: np.ndarray, right_target: np.ndarray, left_yaw=0.0, right_yaw=0.0, pos_threshold=0.02, rot_threshold=0.1, k=2.0, max_steps=100, count_stable_steps=False, targets_relative_to=None, left_grp_config=-1.0, right_grp_config=-1.0): targets = {'l': left_target, 'r': right_target} yaws = {'l': left_yaw, 'r': right_yaw} stable_steps = 0 prev_rel_pos = np.zeros(3) u = np.zeros(self.sim_env.action_space.shape) prev_err_l = np.zeros(7) prev_err_r = np.zeros(7) max_pos_err = -np.inf for i in range(max_steps): grasp_center_pos = np.zeros(3) max_rot_err = -np.inf max_pos_err = -np.inf for arm_i, arm in enumerate(('l', 'r')): curr_pose = self.get_gripper_pose(arm) curr_q = self.get_arm_config(arm) if arm == 'l': pitch = np.pi - 0.9 u_masked = u[:7] prev_err = prev_err_l else: pitch = np.pi - 0.9 u_masked = u[8:15] prev_err = prev_err_r if callable(targets_relative_to): reference = targets_relative_to() target_pos = tf.apply_tf(targets[arm], reference)[:3] else: target_pos = targets[arm] target_pose = np.r_[target_pos, tf.rotations.euler2quat(np.r_[0., 0., yaws[arm]])] target_pose = tf.apply_tf( np.r_[0., 0., 0., tf.rotations.euler2quat(np.r_[pitch, 0., 0.])], target_pose) grasp_center_pos += curr_pose[:3] max_pos_err = max( max_pos_err, np.abs(curr_pose[:3] - target_pose[:3]).max()) max_rot_err = max( max_rot_err, tf.quat_angle_diff(curr_pose[3:], target_pose[3:])) target_q = self.sim_env.mocap_ik(target_pose - curr_pose, arm) u_masked[:] = self._controller(curr_q - target_q, prev_err, k) if self.render_substeps: tf.render_pose(target_pos.copy(), self.viewer, label=f"{arm}_p", unique_label=True) tf.render_pose(target_pose.copy(), self.viewer, label=f"{arm}_t", unique_label=True) tf.render_pose(curr_pose.copy(), self.viewer, label=f"{arm}", unique_label=True) grasp_center_pos /= 2.0 u[7] = left_grp_config u[15] = right_grp_config u = np.clip(u, self.sim_env.action_space.low, self.sim_env.action_space.high) self.sim_env.step(u) if self.render_substeps: tf.render_pose(grasp_center_pos, self.viewer, label="grasp_center", unique_id=5554) self.render(keep_markers=True) if max_pos_err < pos_threshold and max_rot_err < rot_threshold: break if count_stable_steps: obj_pos = self.get_object_pos() rel_pos = obj_pos - grasp_center_pos still = prev_rel_pos is not None and np.all( np.abs(rel_pos - prev_rel_pos) < 0.002) obj_above_table = len( self.sim_env.get_object_contact_points( other_body='table')) == 0 if still and obj_above_table: stable_steps += 1 elif i > 10: break prev_rel_pos = rel_pos if count_stable_steps: return stable_steps return max_pos_err
def _move_arms_mocap(self, *, left_target: np.ndarray, right_target: np.ndarray, left_yaw=0.0, right_yaw=0.0, pos_threshold=0.02, rot_threshold=0.1, k=1.0, max_steps=1, left_grp_config=-1.0, right_grp_config=-1.0): targets = {'l': left_target, 'r': right_target} yaws = {'l': left_yaw, 'r': right_yaw} self.sim.model.eq_active[:] = 1 for i in range(max_steps): grasp_center_pos = np.zeros(3) max_rot_err = -np.inf max_pos_err = -np.inf d_above_table = self.get_object_pos( )[2] - self.sim_env._object_z_offset grp_xrot = 0.9 + d_above_table * 2.0 mocap_a = np.zeros((self.sim.model.nmocap, 7)) for arm_i, arm in enumerate(('l', 'r')): curr_pose = self.get_gripper_pose(arm) pitch = np.pi - grp_xrot target_pos = targets[arm] target_pose = np.r_[target_pos, tf.rotations.euler2quat(np.r_[0., 0., yaws[arm]])] target_pose = tf.apply_tf( np.r_[0., 0., 0., tf.rotations.euler2quat(np.r_[pitch, 0., 0.])], target_pose) grasp_center_pos += curr_pose[:3] if max_steps > 1: max_pos_err = max( max_pos_err, np.abs(curr_pose[:3] - target_pose[:3]).max()) max_rot_err = max( max_rot_err, tf.quat_angle_diff(curr_pose[3:], target_pose[3:])) mocap_a[arm_i] = target_pose - curr_pose if self.viewer is not None and self.render_poses: tf.render_pose(target_pos.copy(), self.viewer, label=f"{arm}_p", unique_label=True) tf.render_pose(target_pose.copy(), self.viewer, label=f"{arm}_t", unique_label=True) tf.render_pose(curr_pose.copy(), self.viewer, label=f"{arm}", unique_label=True) grasp_center_pos /= 2.0 # set config of grippers u = np.zeros(self.sim_env.action_space.shape) u[7] = left_grp_config u[15] = right_grp_config self.sim_env._set_action(u) # set pose of grippers _mocap_set_action(self.sim, mocap_a * k) # take simulation step self.sim.step() self.sim_env._step_callback() if self.viewer is not None: if self.render_poses: tf.render_pose(grasp_center_pos, self.viewer, label="grasp_center", unique_id=5554) self.render(keep_markers=True) if max_pos_err < pos_threshold and max_rot_err < rot_threshold: break
def step(self, action: np.ndarray): action = np.clip(action, self.action_space.low, self.action_space.high) (dist_between_grippers, grasp_center_pos_delta, grasp_center_quat_delta, l_fingers_ctrl, r_fingers_ctrl) = self._unpack_action(action) if self.mocap_ctrl: pos_k = 0.1 dist_range = [0.05, 0.20] else: pos_k = 0.2 dist_range = [0.05, 0.30] dist_between_grippers = np.interp(dist_between_grippers, [-1, 1], dist_range) curr_grasp_center_pose = self.get_grasp_center_pos( ) # TODO: Use pose rather than position only target_grasp_center_pose = np.r_[0., 0., 0., 1., 0., 0., 0.] target_grasp_center_pose[: 3] = curr_grasp_center_pose[: 3] + grasp_center_pos_delta * pos_k if self.rotation_ctrl_enabled: # target_grasp_center_pose[3:] = curr_grasp_center_pose[3:] + grasp_center_quat_delta * 0.1 raise NotImplementedError grippers_pos_targets = np.array([ tf.apply_tf( np.r_[0., dist_between_grippers / 2.0, 0., 1., 0., 0., 0.], target_grasp_center_pose)[:3], tf.apply_tf( np.r_[0., -dist_between_grippers / 2.0, 0., 1., 0., 0., 0.], target_grasp_center_pose)[:3] ]) vec = grippers_pos_targets[0, :2] - grippers_pos_targets[1, :2] right_yaw = np.arctan2(vec[1], vec[0]) + np.pi / 2 left_yaw = np.arctan2(-vec[1], -vec[0]) + np.pi / 2 grasp_radius = np.linalg.norm(vec, ord=2) / 2.0 assert np.allclose(grasp_radius, dist_between_grippers / 2.0) if self.mocap_ctrl: self._move_arms_mocap( left_target=grippers_pos_targets[0], left_yaw=left_yaw, right_target=grippers_pos_targets[1], right_yaw=right_yaw, left_grp_config=l_fingers_ctrl, right_grp_config=r_fingers_ctrl, max_steps=1, ) else: self._move_arms( left_target=grippers_pos_targets[0], left_yaw=left_yaw, right_target=grippers_pos_targets[1], right_yaw=right_yaw, left_grp_config=l_fingers_ctrl, right_grp_config=r_fingers_ctrl, max_steps=5, ) obs = self._get_obs() done = False # self.is_object_unreachable() info = { 'is_success': self._is_success(obs['achieved_goal'], self.goal) } reward = self.compute_reward(obs['achieved_goal'], self.goal, info) return obs, reward, done, info
def predict(self, obs, **kwargs): u = np.zeros(self._env.action_space.shape) new_goal = obs['desired_goal'] if self._goal is None or np.any(self._goal != new_goal): self.reset(new_goal) obj_radius = self._object_size[0] obj_achieved_alt = obs['achieved_goal'].item() obj_achieved_pose = obs['observation'][44:51] if np.all(obj_achieved_pose[3:] == 0): obj_achieved_pose[3] = 1.0 # tf.render_pose(obj_achieved_pose, self._raw_env.viewer, label="O") grp_xrot = 0.9 + obj_achieved_alt * 2.0 curr_grp_poses = { a: np.r_[self._raw_env.sim.data.get_site_xpos(f'gripper_{a}_center'), rotations.mat2quat( self._raw_env.sim.data. get_site_xmat(f'gripper_{a}_center')), ] for a in ('l', 'r') } pos_errors = [] for arm in ('l', 'r'): if arm == 'l': transf = np.r_[0., obj_radius, 0., 1., 0., 0., 0.] if self._phase == 3: transf[1] *= 0.9 transf[2] = 0.05 pose = tf.apply_tf(transf, obj_achieved_pose) # tf.render_pose(pose, self._raw_env.viewer, label="L") grp_target_pos = pose[:3] grp_target_rot = np.r_[np.pi - grp_xrot, 0.01, 0.01] target_pose = np.r_[grp_target_pos, rotations.euler2quat(grp_target_rot)] prev_err = self._prev_err_l curr_q = obs['observation'][:7] u_masked = u[:7] elif arm == 'r': transf = np.r_[0., -obj_radius, 0., 1., 0., 0., 0.] if self._phase == 3: transf[1] *= 0.9 transf[2] = 0.05 pose = tf.apply_tf(transf, obj_achieved_pose) # tf.render_pose(pose, self._raw_env.viewer, label="R") grp_target_pos = pose[:3] grp_target_rot = np.r_[-np.pi + grp_xrot, 0.01, np.pi] target_pose = np.r_[grp_target_pos, rotations.euler2quat(grp_target_rot)] prev_err = self._prev_err_r curr_q = obs['observation'][16:23] u_masked = u[8:15] else: continue u[7] = -1.0 u[15] = -1.0 if self._phase == 0: target_pose[2] += 0.1 target_pose[1] += 0.05 * np.sign(target_pose[1]) elif self._phase == 1: target_pose[2] += 0.01 target_pose[1] += 0.05 * np.sign(target_pose[1]) elif self._phase == 2: target_pose[2] += 0.01 elif self._phase == 3: target_pose[2] += 0.00 if self._raw_env.viewer is not None: tf.render_pose(target_pose.copy(), self._raw_env.viewer) curr_pose = curr_grp_poses[arm].copy() err_pose = curr_pose - target_pose err_pos = np.linalg.norm(err_pose[:3]) pos_errors.append(err_pos) controller_k = 2.0 err_rot = tf.quat_angle_diff(curr_pose[3:], target_pose[3:]) target_q = self._raw_env.mocap_ik(-err_pose, arm) err_q = curr_q - target_q u_masked[:] = self._controller(err_q, prev_err, controller_k) self._phase_steps += 1 if self._phase == 0 and np.all( np.array(pos_errors) < 0.03) and err_rot < 0.1: self._phase = 1 self._phase_steps = 0 elif self._phase == 1 and np.all( np.array(pos_errors) < 0.03) and err_rot < 0.1: self._phase = 2 self._phase_steps = 0 elif self._phase == 2: if self._phase_steps > 30: self._phase = 3 self._phase_steps = 0 u = np.clip(u, self._env.action_space.low, self._env.action_space.high) return u
def predict(self, obs, **kwargs): u = np.zeros(self._env.action_space.shape) new_goal = obs['desired_goal'] if self._goal is None or np.any(self._goal != new_goal): self.reset(new_goal) grasp_center_pos = obs['observation'][:3] object_pos = obs['observation'][18:21] object_rel_pos = obs['observation'][24:27] c_points = self._raw_env.sim_env.get_object_contact_points() # button_tf = tf.get_tf( # np.r_[self._raw_env.sim.data.get_geom_xpos('button_geom'), 1., 0., 0., 0.], # self._raw_env.get_table_surface_pose() # ) # print('button_tf', button_tf) if self._raw_env.sim_env.has_button and self._phase < 2: if np.linalg.norm(object_pos) > 0.55: button_pos = self._raw_env.sim.data.get_geom_xpos( 'button_geom') tf.render_pose(np.r_[button_pos, 1., 0., 0., 0.], self._raw_env.viewer) err = button_pos - grasp_center_pos u[0] = -1. u[1:4] = err * 5.0 else: u[0] = 0. u[3] = 0.5 self._phase_steps += 1 if self._phase_steps > 5: self._phase = 2 self._phase_steps = 0 elif self._raw_env.sim_env.has_rotating_platform: if np.linalg.norm(object_pos[:2]) > 0.12: # FIXME far_end_pose = np.r_[ self._raw_env.sim.data. get_site_xpos('rotating_platform:far_end'), tf.rotations.mat2quat( self._raw_env.sim.data. get_site_xmat('rotating_platform:far_end')), ] close_end_pose = tf.apply_tf( np.r_[-0.5, -0.02, 0., 1., 0., 0., 0.], far_end_pose) tf.render_pose(close_end_pose, self._raw_env.viewer) push_target = np.r_[0.2, 0.2, 0., 1., 0., 0., 0.] # push_target = tf.apply_tf(np.r_[0., 0.1, 0., 1., 0., 0., 0.], close_end_pose) tf.render_pose(push_target, self._raw_env.viewer) err = close_end_pose[:3] - grasp_center_pos if self._phase == 0: if np.linalg.norm(err) > 0.01: u[0] = 1. u[1:4] = err * 5.0 else: self._phase += 1 self._phase_steps = 0 if self._phase == 1: u[0] = 1. u[1:4] = (push_target[:3] - grasp_center_pos) * 0.5 self._phase_steps += 1 if self._phase_steps > 80: self._phase += 1 self._phase_steps = 0 elif self._phase < 2: self._phase = 2 self._phase_steps = 0 elif self._phase < 2: self._phase = 2 self._phase_steps = 0 if self._phase == 2: if np.linalg.norm(object_rel_pos) > 0.01: u[0] = 0.0 u[1:4] = object_rel_pos * 5.0 else: self._phase += 1 self._phase_steps = 0 if self._phase == 3: if len(c_points) < 3: u[0] = -self._phase_steps / 10.0 self._phase_steps += 1 else: self._phase += 1 self._phase_steps = 0 if self._phase == 4: if len(c_points) > 2: u[0] = -1.0 u[1:4] = (new_goal - object_pos) * 2.0 else: self._phase = 0 self._phase_steps = 0 return u
def predict(self, obs, **kwargs): # Modified from original implementation in OpenAI's baselines: # https://github.com/openai/baselines/blob/master/baselines/her/experiment/data_generation/fetch_data_generation.py goal = obs['desired_goal'] if self._goal is None or np.any(goal != self._goal): self._goal = goal.copy() self._phase = -1 self._phase_steps = 0 grasp_center_pos = obs['observation'][:3] object_pos = obs['observation'][3:6] object_rel_pos = obs['observation'][6:9] object_oriented_goal = object_rel_pos.copy() object_oriented_goal[ 2] += 0.03 # first make the gripper go slightly above the object raw_env = self._env.unwrapped table_tf = raw_env.get_table_surface_pose() if raw_env.has_button and self._phase < 2: u = np.zeros(4) if np.linalg.norm(object_pos[:2] - table_tf[:2]) > 0.38: button_pos = raw_env.sim.data.get_geom_xpos('button_geom') tf.render_pose(np.r_[button_pos, 1., 0., 0., 0.], raw_env.viewer) err = button_pos - grasp_center_pos u[3] = -1. u[:3] = err * 5.0 return u else: u[3] = 0. u[2] = 0.5 self._phase_steps += 1 if self._phase_steps > 5: self._phase = 2 self._phase_steps = 0 else: return u elif raw_env.has_rotating_platform: if np.linalg.norm(object_pos[:2] - table_tf[:2]) > 0.22: # FIXME far_end_pose = np.r_[ raw_env.sim.data.get_site_xpos('rotating_platform:far_end' ), tf.rotations.mat2quat( raw_env.sim.data. get_site_xmat('rotating_platform:far_end')), ] close_end_pose = tf.apply_tf( np.r_[-0.5, -0.09, 0., 1., 0., 0., 0.], far_end_pose) tf.render_pose(close_end_pose, raw_env.viewer, label="close_end") # push_target = np.r_[0.2, 0.2, 0., 1., 0., 0., 0.] # push_target = tf.apply_tf(table_tf, push_target) push_target = tf.apply_tf(np.r_[0., 0.1, 0., 1., 0., 0., 0.], close_end_pose) tf.render_pose(push_target, raw_env.viewer) if self._phase == -1: close_end_pose[2] += 0.05 err = close_end_pose[:3] - grasp_center_pos u = np.zeros(4) if self._phase < 1: if np.linalg.norm(err) > 0.015: u[3] = -1. u[:3] = err * 20.0 else: self._phase += 1 self._phase_steps = 0 if self._phase == 1: u[3] = -1. u[:3] = (push_target[:3] - grasp_center_pos) * 10 self._phase_steps += 1 if self._phase_steps > 80: self._phase += 1 self._phase_steps = 0 return u elif self._phase < 2: self._phase = 2 self._phase_steps = 0 elif self._phase < 2: self._phase = 2 self._phase_steps = 0 if np.abs(table_tf[2] - grasp_center_pos[2]) < 0.07 and self._phase == 2: action = [0, 0, 0.4, 0.05] return action elif self._phase == 2: self._phase = 3 self._phase_steps = 0 if np.linalg.norm(object_oriented_goal) >= 0.005 and self._phase == 3: action = [0, 0, 0, 0] object_oriented_goal = object_rel_pos.copy() object_oriented_goal[2] += 0.03 for i in range(len(object_oriented_goal)): action[i] = object_oriented_goal[i] * 6 action[len(action) - 1] = 0.05 return action elif self._phase == 3: self._phase = 4 self._phase_steps = 0 if np.linalg.norm(object_rel_pos) >= 0.005 and self._phase == 4: action = [0, 0, 0, 0] for i in range(len(object_rel_pos)): action[i] = object_rel_pos[i] * 6 action[len(action) - 1] = -0.2 return action elif self._phase == 4: self._phase = 5 self._phase_steps = 0 if np.linalg.norm(goal - object_pos) >= 0.01 and self._phase == 5: action = [0, 0, 0, 0] for i in range(len(goal - object_pos)): action[i] = (goal - object_pos)[i] * 6 action[len(action) - 1] = -0.2 return action elif self._phase == 5: self._phase = 6 self._phase_steps = 0 if self._phase == 6: action = [0, 0, 0, 0] action[len(action) - 1] = -0.2 # keep the gripper closed return action
def _step(self, action: np.ndarray): action = np.clip(action, self.action_space.low, self.action_space.high) fingers_pos_wrt_obj = action[:(3 * 5)].reshape( -1, 3) * np.r_[0.03, 0.03, 0.03] if self.task == HandSteppedTask.LIFT_ABOVE_TABLE: arm_pos_wrt_world = np.zeros(3) elif self.task == HandSteppedTask.PICK_AND_PLACE: arm_pos_wrt_world = action[(3 * 5):] else: raise NotImplementedError arm_bounds = np.array(self.sim_env.forearm_bounds).T if self.render_substeps: tf.render_box(self.viewer, bounds=arm_bounds) arm_pos_wrt_world *= np.abs(arm_bounds[:, 1] - arm_bounds[:, 0]) / 2.0 arm_pos_wrt_world += arm_bounds.mean(axis=1) obj_pose = self.sim_env._get_object_pose() obj_on_ground = obj_pose[2] < 0.37 fingers_pos_targets = np.array([ tf.apply_tf(np.r_[transf, 1., 0., 0., 0.], obj_pose) for transf in fingers_pos_wrt_obj ]) self.sim.model.eq_active[1:] = 0 # self.sim.data.mocap_pos[1:] = fingers_pos_targets[:, :3] if self.render_substeps: tf.render_pose(arm_pos_wrt_world.copy(), self.sim_env.viewer, label='arm_t') # for i, f in enumerate(fingers_pos_targets): # tf.render_pose(f, self.sim_env.viewer, label=f'f_{i}') pregrasp_palm_target = fingers_pos_targets[:, :3].mean(axis=0) pregrasp_palm_target = np.r_[pregrasp_palm_target, 1., 0., 0., 0.] pregrasp_palm_target = tf.apply_tf( np.r_[-0.01, 0., 0.015, 1., 0., 0., 0.], pregrasp_palm_target)[:3] # move hand if len(self.sim_env.get_object_contact_points( other_body='robot')) == 0: hand_action = np.r_[0., -.5, -np.ones(18)] hand_action[15:] = (-1., -0.5, 1., -1., 0) self._move_arm(pregrasp_palm_target, hand_action=hand_action) # move fingers self._move_fingers(fingers_pos_targets, max_steps=30) # move arm stable_steps = self._move_arm(arm_pos_wrt_world, count_stable_steps=True) done = obj_on_ground obs = self._get_obs() if self.task == HandSteppedTask.LIFT_ABOVE_TABLE: info = dict() reward = stable_steps elif self.task == HandSteppedTask.PICK_AND_PLACE: info = { 'is_success': self.sim_env._is_success(obs['achieved_goal'], self.goal) } dist_to_goal = np.linalg.norm(self.goal[:3] - self.sim_env._get_object_pose()[:3]) dist_reward = 100 / ( 1 + dist_to_goal * _smooth_step(dist_to_goal) * 100 ) # range: [0, 100] alpha = 0.25 reward = dist_reward * alpha + stable_steps * (1 - alpha) else: raise NotImplementedError return obs, reward, done, info
def predict(self, obs, **kwargs): if self._goal is None or np.any(self._goal != obs['desired_goal']): self._reset(obs) wrist_noise = 0. arm_pos_noise = 0. fingers_noise = 0. strategy = self._strategy action = np.zeros(self._env.action_space.shape) obj_pos = obs['achieved_goal'][:3] d = obj_pos - self._env.unwrapped._get_grasp_center_pose( no_rot=True)[:3] reached = np.linalg.norm(d) < 0.05 on_palm = False dropped = obj_pos[2] < 0.38 if dropped: return action * 0.0 if reached: contacts = self._env.unwrapped.get_object_contact_points() palm_contacts = len([ x for x in contacts if 'palm' in x['body1'] or 'palm' in x['body2'] ]) on_palm = palm_contacts > 0 if strategy == 0: wrist_ctrl = -1.0 self._hand_ctrl[:] = -1.0 self._hand_ctrl[[0, 3]] = 1.0 self._hand_ctrl[[6, 9]] = -1.0 self._hand_ctrl[13:] = (-1., -0.5, 1., -1., 0) d += np.r_[0., -0.030, 0.0] still = np.linalg.norm(d - self._prev_d) < 0.002 if self._grasp_steps > 10: still = np.linalg.norm(d - self._prev_d) < 0.005 self._prev_d = d.copy() arm_pos_ctrl = d * 1.0 if on_palm or still: self._hand_ctrl[:] = 1.0 self._hand_ctrl[[0, 3]] = 1.0 self._hand_ctrl[[6, 9]] = -1.0 self._hand_ctrl[13:] = (0.1, 0.5, 1., -1., 0) arm_pos_ctrl *= 0.0 self._grasp_steps += 1 if self._grasp_steps > 10: d = obs['desired_goal'][:3] - obs['achieved_goal'][:3] arm_pos_ctrl = d * 0.5 else: self._grasp_steps = 0 elif strategy in [1, 2]: d += np.r_[0., -0.035, 0.025] reached = np.linalg.norm(d) < 0.02 if self._grasp_steps > 10: reached = np.linalg.norm(d) < 0.04 wrist_ctrl = 0.0 self._hand_ctrl[:] = -1.0 self._hand_ctrl[13:] = (-1., 1., 1., -1., -1.) arm_pos_ctrl = d * 1.0 if reached: arm_pos_ctrl *= 0.0 self._grasp_steps += 1 if strategy == 1: self._hand_ctrl[13:] = (-0.5, 1., 1., -1., -1.) self._hand_ctrl[4] = 0.6 self._hand_ctrl[5] = 0.5 if strategy == 2: self._hand_ctrl[13:] = (-0.1, 1., 1., -1., -1.) self._hand_ctrl[6] = -0.7 self._hand_ctrl[7] = 0.6 self._hand_ctrl[8] = 0.5 if self._grasp_steps > 10: d = obs['desired_goal'][:3] - obs['achieved_goal'][:3] arm_pos_ctrl = d * 0.5 else: self._grasp_steps = 0 elif strategy == 3: wrist_ctrl = -0.5 d = np.zeros(3) self._hand_ctrl[:] = -1.0 self._hand_ctrl[13:] = (0.2, -0.2, 1., -1., 1.) obj_pose = self._env.unwrapped._get_object_pose() thdistal_pos = self._env.unwrapped.sim.data.get_body_xpos( 'robot0:thdistal') grasp_pose = tf.apply_tf( np.r_[0.015, -0.10, 0.075, 1., 0., 0., 0.], obj_pose) pregrasp_pose = tf.apply_tf(np.r_[-0.08, 0., 0., 1., 0., 0., 0.], grasp_pose) viewer = self._env.unwrapped.viewer if viewer is not None: tf.render_pose(obj_pose, viewer, size=0.4) tf.render_pose(grasp_pose, viewer, size=0.2) tf.render_pose(pregrasp_pose, viewer, size=0.2) k = 2.0 d_thresh = 0.008 wrist_noise = 0.1 arm_pos_noise = 0.01 fingers_noise = 0.2 if self._phase == 0: d = pregrasp_pose[:3] - thdistal_pos d[2] = 0.0 arm_pos_noise = 0.05 d_thresh = 0.02 elif self._phase == 1: d = pregrasp_pose[:3] - thdistal_pos arm_pos_noise = 0.01 d_thresh = 0.01 elif self._phase == 2: d = grasp_pose[:3] - thdistal_pos fingers_noise = 0.05 elif self._phase == 3: fingers_noise = 0.05 self._hand_ctrl[:] = 1.0 self._hand_ctrl[13:] = (0.1, 0.5, 1., -1., 0) self._grasp_steps += 1 if self._grasp_steps > 5: d = obs['desired_goal'][:3] - obs['achieved_goal'][:3] if self._phase < 3 and np.linalg.norm(d) < d_thresh: self._phase += 1 self._grasp_steps = 0 arm_pos_ctrl = d * k else: raise NotImplementedError action[1] = wrist_ctrl + np.random.randn() * wrist_noise action[-7:-4] = arm_pos_ctrl + np.random.randn( *arm_pos_ctrl.shape) * arm_pos_noise action[2:-7] = self._hand_ctrl + np.random.randn( *self._hand_ctrl.shape) * fingers_noise action = np.clip(action, self._env.action_space.low, self._env.action_space.high) return action