def apply_action(self, action): # process action and send it to the robot action = scale_gym_data(self.action_space, np.array(action)) for _ in range(self._action_repeat): robot_obs, _ = self._robot.get_observation() if self._use_IK: if not self._robot._control_orientation: action *= 0.005 new_action = np.add(self._hand_pose[:3], action) else: action[:3] *= 0.005 action[3:6] *= 0.01 new_action = np.add(self._hand_pose, action) # constraint rotation inside limits eu_lim = self._robot.get_rotation_lim() new_action[3:6] = [ min(eu_lim[0][1], max(eu_lim[0][0], new_action[3])), min(eu_lim[1][1], max(eu_lim[1][0], new_action[4])), min(eu_lim[2][1], max(eu_lim[2][0], new_action[5])) ] # constraint position inside workspace ws_lim = self._robot.get_workspace() new_action[:3] = [ min(ws_lim[0][1], max(ws_lim[0][0], new_action[0])), min(ws_lim[1][1], max(ws_lim[1][0], new_action[1])), min(ws_lim[2][1], max(ws_lim[2][0], new_action[2])) ] # Update hand_pose to new pose self._hand_pose = new_action else: action *= 0.05 n_tot_joints = len( self._robot._joint_name_to_ids.items()) # arm + fingers n_joints_to_control = self._robot.get_action_dim() # only arm new_action = np.add( robot_obs[-n_tot_joints:-(n_tot_joints - n_joints_to_control)], action) # -------------------------- # # --- send pose to robot --- # # -------------------------- # self._robot.apply_action(new_action) p.stepSimulation(physicsClientId=self._physics_client_id) time.sleep(self._timeStep) if self._termination(): break self._env_step_counter += 1
def reset(self): self.reset_simulation() if self._use_IK: self._hand_pose = self._robot._home_hand_pose obs, _ = self.get_extended_observation() scaled_obs = scale_gym_data(self.observation_space, obs) return scaled_obs
def reset(self): self.reset_simulation() # --- sample target pose --- # world_obs, _ = self._world.get_observation() self._target_pose = self.sample_tg_pose(world_obs[:3]) self.debug_gui() obs, _ = self.get_extended_observation() scaled_obs = scale_gym_data(self.observation_space, obs) return scaled_obs
def step(self, action): # apply action on the robot self.apply_action(action) obs, _ = self.get_extended_observation() scaled_obs = scale_gym_data(self.observation_space, obs) done = self._termination() reward = self._compute_reward() return scaled_obs, np.array(reward), np.array(done), {}
def reset(self): self.reset_simulation() # --- sample target pose --- # world_obs, _ = self._world.get_observation() self._target_pose = self.sample_tg_pose(world_obs[:3]) self.debug_gui() obs = self.get_goal_observation() scaled_obs = scale_gym_data(self.observation_space['observation'], obs['observation']) obs['observation'] = scaled_obs return obs
def step(self, action): # apply action on the robot self.apply_action(action) obs = self.get_goal_observation() scaled_obs = scale_gym_data(self.observation_space['observation'], obs['observation']) obs['observation'] = scaled_obs info = { 'is_success': self._is_success(obs['achieved_goal'], obs['desired_goal']), } done = self._termination() or info['is_success'] reward = self.compute_reward(obs['achieved_goal'], obs['desired_goal'], info) return obs, reward, done, info
def reset(self): self.reset_simulation() # --- sample target pose --- # world_obs, _ = self._world.get_observation() self._tg_pose = self.sample_tg_pose(world_obs[:3]) self.debug_gui() robot_obs, _ = self._robot.get_observation() world_obs, _ = self._world.get_observation() self._init_dist_hand_obj = goal_distance(np.array(robot_obs[:3]), np.array(world_obs[:3])) self._max_dist_obj_tg = goal_distance(np.array(world_obs[:3]), np.array(self._tg_pose)) obs = self.get_goal_observation() scaled_obs = scale_gym_data(self.observation_space['observation'], obs['observation']) obs['observation'] = scaled_obs return obs
def reset(self): self.reset_simulation() obs, _ = self.get_extended_observation() scaled_obs = scale_gym_data(self.observation_space, obs) return scaled_obs
def apply_action(self, action): # process action and send it to the robot if self._renders: # Sleep, otherwise the computation takes less time than real time, # which will make the visualization like a fast-forward video. time_spent = time.time() - self._last_frame_time self._last_frame_time = time.time() time_to_sleep = self._action_repeat * self._time_step - time_spent if time_to_sleep > 0: time.sleep(time_to_sleep) # ---------------------- # # --- set new action --- # # ---------------------- # action = scale_gym_data(self.action_space, np.array(action)) for _ in range(self._action_repeat): robot_obs, _ = self._robot.get_observation() if self._use_IK: if not self._control_orientation: action *= 0.005 new_action = np.add(self._hand_pose[:3], action) else: action[:3] *= 0.01 action[3:6] *= 0.02 new_action = np.add(self._hand_pose, action) # constraint rotation inside limits eu_lim = self._robot.get_rotation_lim() new_action[3:6] = [ min(eu_lim[0][1], max(eu_lim[0][0], new_action[3])), min(eu_lim[1][1], max(eu_lim[1][0], new_action[4])), min(eu_lim[2][1], max(eu_lim[2][0], new_action[5])) ] # constraint position inside workspace ws_lim = self._robot.get_workspace() new_action[:3] = [ min(ws_lim[0][1], max(ws_lim[0][0], new_action[0])), min(ws_lim[1][1], max(ws_lim[1][0], new_action[1])), min(ws_lim[2][1], max(ws_lim[2][0], new_action[2])) ] self._hand_pose = new_action else: action *= 0.05 new_action = np.add( robot_obs[-len(self._robot._joints_to_control):], action) # -------------------------- # # --- send pose to robot --- # # -------------------------- # self._robot.apply_action(new_action) p.stepSimulation(physicsClientId=self._physics_client_id) time.sleep(self._time_step) if self._termination(): break self._env_step_counter += 1