def reset(self): # reset simulation del self.platform # initialize simulation initial_robot_position = TriFingerPlatform.spaces.robot_position.default initial_object_pose=self.initializer.get_initial_state() goal_object_pose = self.initializer.get_goal() self.platform = TriFingerPlatform( visualization=self.visualization, initial_robot_position=initial_robot_position, initial_object_pose=initial_object_pose, ) self.goal = { "position": goal_object_pose.position, "orientation": goal_object_pose.orientation, } # visualize the goal if self.visualization: self.goal_marker = visual_objects.CubeMarker( width=0.065, position=goal_object_pose.position, orientation=goal_object_pose.orientation, ) self.info = {"difficulty": self.initializer.difficulty} self.step_count = 0 return self._create_observation(0)
def reset(self): # reset simulation del self.platform # initialize simulation if self.initializer is None: # if no initializer is given (which will be the case during training), # we can initialize in any way desired. here, we initialize the cube always # in the center of the arena, instead of randomly, as this appears to help # training initial_robot_position = TriFingerPlatform.spaces.robot_position.default default_object_position = ( TriFingerPlatform.spaces.object_position.default) default_object_orientation = ( TriFingerPlatform.spaces.object_orientation.default) initial_object_pose = move_cube.Pose( position=default_object_position, orientation=default_object_orientation, ) goal_object_pose = move_cube.sample_goal(difficulty=1) else: # if an initializer is given, i.e. during evaluation, we need to initialize # according to it, to make sure we remain coherent with the standard CubeEnv. # otherwise the trajectories produced during evaluation will be invalid. initial_robot_position = TriFingerPlatform.spaces.robot_position.default initial_object_pose = self.initializer.get_initial_state() goal_object_pose = self.initializer.get_goal() self.platform = TriFingerPlatform( visualization=self.visualization, initial_robot_position=initial_robot_position, initial_object_pose=initial_object_pose, ) self.goal = { "position": goal_object_pose.position, "orientation": goal_object_pose.orientation, } # visualize the goal if self.visualization: self.goal_marker = visual_objects.CubeMarker( width=0.065, position=goal_object_pose.position, orientation=goal_object_pose.orientation, physicsClientId=self.platform.simfinger._pybullet_client_id, ) self.info = {"difficulty": 1} self.step_count = 0 return self._create_observation(0)
def reset(self): # reset simulation del self.platform # initialize simulation initial_robot_position = ( TriFingerPlatform.spaces.robot_position.default ) initial_object_pose = self.initializer.get_initial_state() goal_object_pose = self.initializer.get_goal() self.platform = TriFingerPlatform( visualization=self.visualization, initial_robot_position=initial_robot_position, initial_object_pose=initial_object_pose, ) self.goal = { "position": goal_object_pose.position, "orientation": goal_object_pose.orientation, } # updates smoothing parameters self.update_smoothing() self.episode_count += 1 self.smoothed_action = None # visualize the goal if self.visualization: self.goal_marker = visual_objects.CubeMarker( width=0.065, position=goal_object_pose.position, orientation=goal_object_pose.orientation, physicsClientId=self.platform.simfinger._pybullet_client_id, ) self.info = {"difficulty": self.initializer.difficulty} self.step_count = 0 return self._create_observation(0)
def reset(self): # By changing the `_reset_*` method below you can switch between using # the platform frontend, which is needed for the submission system, and # the direct simulation, which may be more convenient if you want to # pre-train locally in simulation. self._reset_platform_frontend() if self.sim_platform: del self.sim_platform initial_robot_position = TriFingerPlatform.spaces.robot_position.default default_object_position = ( TriFingerPlatform.spaces.object_position.default) default_object_orientation = ( TriFingerPlatform.spaces.object_orientation.default) dummy_initial_object_pose = move_cube.Pose( position=default_object_position, orientation=default_object_orientation, ) self.sim_platform = TriFingerPlatform( visualization=False, initial_robot_position=initial_robot_position, initial_object_pose=dummy_initial_object_pose, ) # self.goal_marker = visual_objects.CubeMarker( # width=0.065, # position=self.goal["position"], # orientation=self.goal["orientation"], # physicsClientId=self.sim_platform.simfinger._pybullet_client_id, # ) # self._reset_direct_simulation() self.step_count = 0 # need to already do one step to get initial observation # TODO disable frameskip here? observation, _, _, _ = self.step(self._initial_action) return observation
class ExamplePushingTrainingEnv(gym.Env): """Gym environment for moving cubes with simulated TriFingerPro.""" def __init__( self, initializer=None, action_type=cube_env.ActionType.POSITION, frameskip=1, visualization=False, ): """Initialize. Args: initializer: Initializer class for providing initial cube pose and goal pose. If no initializer is provided, we will initialize in a way which is be helpful for learning. action_type (ActionType): Specify which type of actions to use. See :class:`ActionType` for details. frameskip (int): Number of actual control steps to be performed in one call of step(). visualization (bool): If true, the pyBullet GUI is run for visualization. """ # Basic initialization # ==================== self.initializer = initializer self.action_type = action_type self.visualization = visualization if frameskip < 1: raise ValueError("frameskip cannot be less than 1.") self.frameskip = frameskip # will be initialized in reset() self.platform = None # Create the action and observation spaces # ======================================== spaces = TriFingerPlatform.spaces if self.action_type == cube_env.ActionType.TORQUE: self.action_space = spaces.robot_torque.gym elif self.action_type == cube_env.ActionType.POSITION: self.action_space = spaces.robot_position.gym elif self.action_type == cube_env.ActionType.TORQUE_AND_POSITION: self.action_space = gym.spaces.Dict({ "torque": spaces.robot_torque.gym, "position": spaces.robot_position.gym, }) else: raise ValueError("Invalid action_type") self.observation_names = [ "robot_position", "robot_velocity", "robot_tip_positions", "object_position", "object_orientation", "goal_object_position", ] self.observation_space = gym.spaces.Dict({ "robot_position": spaces.robot_position.gym, "robot_velocity": spaces.robot_velocity.gym, "robot_tip_positions": gym.spaces.Box( low=np.array([spaces.object_position.low] * 3), high=np.array([spaces.object_position.high] * 3), ), "object_position": spaces.object_position.gym, "object_orientation": spaces.object_orientation.gym, "goal_object_position": spaces.object_position.gym, }) def step(self, action): if self.platform is None: raise RuntimeError("Call `reset()` before starting to step.") if not self.action_space.contains(action): raise ValueError( "Given action is not contained in the action space.") num_steps = self.frameskip # ensure episode length is not exceeded due to frameskip step_count_after = self.step_count + num_steps if step_count_after > move_cube.episode_length: excess = step_count_after - move_cube.episode_length num_steps = max(1, num_steps - excess) reward = 0.0 for _ in range(num_steps): self.step_count += 1 if self.step_count > move_cube.episode_length: raise RuntimeError("Exceeded number of steps for one episode.") # send action to robot robot_action = self._gym_action_to_robot_action(action) t = self.platform.append_desired_action(robot_action) previous_observation = self._create_observation(t) observation = self._create_observation(t + 1) reward += self._compute_reward( previous_observation=previous_observation, observation=observation, ) is_done = self.step_count == move_cube.episode_length return observation, reward, is_done, self.info def reset(self): # reset simulation del self.platform # initialize simulation if self.initializer is None: # if no initializer is given (which will be the case during training), # we can initialize in any way desired. here, we initialize the cube always # in the center of the arena, instead of randomly, as this appears to help # training initial_robot_position = TriFingerPlatform.spaces.robot_position.default default_object_position = ( TriFingerPlatform.spaces.object_position.default) default_object_orientation = ( TriFingerPlatform.spaces.object_orientation.default) initial_object_pose = move_cube.Pose( position=default_object_position, orientation=default_object_orientation, ) goal_object_pose = move_cube.sample_goal(difficulty=1) else: # if an initializer is given, i.e. during evaluation, we need to initialize # according to it, to make sure we remain coherent with the standard CubeEnv. # otherwise the trajectories produced during evaluation will be invalid. initial_robot_position = TriFingerPlatform.spaces.robot_position.default initial_object_pose = self.initializer.get_initial_state() goal_object_pose = self.initializer.get_goal() self.platform = TriFingerPlatform( visualization=self.visualization, initial_robot_position=initial_robot_position, initial_object_pose=initial_object_pose, ) self.goal = { "position": goal_object_pose.position, "orientation": goal_object_pose.orientation, } # visualize the goal if self.visualization: self.goal_marker = visual_objects.CubeMarker( width=0.065, position=goal_object_pose.position, orientation=goal_object_pose.orientation, ) self.info = dict() self.step_count = 0 return self._create_observation(0) def seed(self, seed=None): self.np_random, seed = gym.utils.seeding.np_random(seed) move_cube.random = self.np_random return [seed] def _create_observation(self, t): robot_observation = self.platform.get_robot_observation(t) object_observation = self.platform.get_object_pose(t) robot_tip_positions = self.platform.forward_kinematics( robot_observation.position) robot_tip_positions = np.array(robot_tip_positions) observation = { "robot_position": robot_observation.position, "robot_velocity": robot_observation.velocity, "robot_tip_positions": robot_tip_positions, "object_position": object_observation.position, "object_orientation": object_observation.orientation, "goal_object_position": self.goal["position"], } return observation @staticmethod def _compute_reward(previous_observation, observation): # calculate first reward term current_distance_from_block = np.linalg.norm( observation["robot_tip_positions"] - observation["object_position"]) previous_distance_from_block = np.linalg.norm( previous_observation["robot_tip_positions"] - previous_observation["object_position"]) reward_term_1 = (previous_distance_from_block - current_distance_from_block) # calculate second reward term current_dist_to_goal = np.linalg.norm( observation["goal_object_position"] - observation["object_position"]) previous_dist_to_goal = np.linalg.norm( previous_observation["goal_object_position"] - previous_observation["object_position"]) reward_term_2 = previous_dist_to_goal - current_dist_to_goal reward = 750 * reward_term_1 + 250 * reward_term_2 return reward def _gym_action_to_robot_action(self, gym_action): # construct robot action depending on action type if self.action_type == cube_env.ActionType.TORQUE: robot_action = self.platform.Action(torque=gym_action) elif self.action_type == cube_env.ActionType.POSITION: robot_action = self.platform.Action(position=gym_action) elif self.action_type == cube_env.ActionType.TORQUE_AND_POSITION: robot_action = self.platform.Action( torque=gym_action["torque"], position=gym_action["position"]) else: raise ValueError("Invalid action_type") return robot_action
class CubeEnv(gym.GoalEnv): """Gym environment for moving cubes with simulated TriFingerPro.""" def __init__( self, initializer, action_type=ActionType.POSITION, observation_type=None, frameskip=1, visualization=False, testing=False ): """Initialize. Args: initializer: Initializer class for providing initial cube pose and goal pose. See :class:`RandomInitializer` and :class:`FixedInitializer`. action_type (ActionType): Specify which type of actions to use. See :class:`ActionType` for details. frameskip (int): Number of actual control steps to be performed in one call of step(). visualization (bool): If true, the pyBullet GUI is run for visualization. """ # Basic initialization # ==================== self.initializer = initializer self.action_type = action_type self.visualization = visualization # TODO: The name "frameskip" makes sense for an atari environment but # not really for our scenario. The name is also misleading as # "frameskip = 1" suggests that one frame is skipped while it actually # means "do one step per step" (i.e. no skip). if frameskip < 1: raise ValueError("frameskip cannot be less than 1.") self.frameskip = frameskip # will be initialized in reset() self.platform = None # Create the action and observation spaces # ======================================== spaces = TriFingerPlatform.spaces object_state_space = gym.spaces.Dict( { "position": spaces.object_position.gym, "orientation": spaces.object_orientation.gym, } ) if self.action_type == ActionType.TORQUE: self.unscaled_action_space = spaces.robot_torque.gym self.action_space = gym.spaces.Box(low=-np.ones((9,)), high=np.ones((9,))) elif self.action_type == ActionType.POSITION: self.unscaled_action_space = spaces.robot_position.gym self.action_space = gym.spaces.Box(low=-np.ones((9,)), high=np.ones((9,))) elif self.action_type == ActionType.TORQUE_AND_POSITION: self.unscaled_action_space = gym.spaces.Dict( { "torque": spaces.robot_torque.gym, "position": spaces.robot_position.gym, } ) self.action_space = gym.spaces.Dict( { "torque": gym.spaces.Box(low=-np.ones((9,)), high=np.ones((9,))), "position": gym.spaces.Box(low=-np.ones((9,)), high=np.ones((9,))), } ) else: raise ValueError("Invalid action_type") self.observation_names = [ "robot_position", "robot_velocity", "robot_tip_positions", "object_position", "object_orientation", "goal_object_position", ] self.observation_space = gym.spaces.Dict( { "observation": gym.spaces.Dict( { "robot_position": spaces.robot_position.gym, "robot_velocity": spaces.robot_velocity.gym, "robot_tip_positions": gym.spaces.Box( low=np.array([spaces.object_position.low] * 3), high=np.array([spaces.object_position.high] * 3), ), "object_position": spaces.object_position.gym, "object_orientation": spaces.object_orientation.gym, "goal_object_position": spaces.object_position.gym, } ), "desired_goal": object_state_space, "achieved_goal": object_state_space, } ) smoothing_params = { "num_episodes": 1000, "start_after": 3.0 / 7.0, "final_alpha": 0.975, "stop_after": 5.0 / 7.0, } if testing: self.smoothing_start_episode = 0 self.smoothing_alpha = smoothing_params["final_alpha"] self.smoothing_increase_step = 0 self.smoothing_stop_episode = math.inf else: self.smoothing_stop_episode = int( smoothing_params["num_episodes"] * smoothing_params["stop_after"] ) self.smoothing_start_episode = int( smoothing_params["num_episodes"] * smoothing_params["start_after"] ) num_smoothing_increase_steps = ( self.smoothing_stop_episode - self.smoothing_start_episode ) self.smoothing_alpha = 0 self.smoothing_increase_step = ( smoothing_params["final_alpha"] / num_smoothing_increase_steps ) self.smoothed_action = None self.episode_count = 0 @staticmethod def compute_reward(previous_observation, observation): # calculate first reward term current_distance_from_block = np.linalg.norm( observation["robot_tip_positions"] - observation["object_position"] ) previous_distance_from_block = np.linalg.norm( previous_observation["robot_tip_positions"] - previous_observation["object_position"] ) reward_term_1 = ( previous_distance_from_block - current_distance_from_block ) # calculate second reward term current_dist_to_goal = np.linalg.norm( observation["goal_object_position"] - observation["object_position"] ) previous_dist_to_goal = np.linalg.norm( previous_observation["goal_object_position"] - previous_observation["object_position"] ) reward_term_2 = previous_dist_to_goal - current_dist_to_goal reward = 500 * reward_term_1 + 500 * reward_term_2 return reward def step(self, action): """Run one timestep of the environment's dynamics. When end of episode is reached, you are responsible for calling ``reset()`` to reset this environment's state. Args: action: An action provided by the agent (depends on the selected :class:`ActionType`). Returns: tuple: - observation (dict): agent's observation of the current environment. - reward (float) : amount of reward returned after previous action. - done (bool): whether the episode has ended, in which case further step() calls will return undefined results. - info (dict): info dictionary containing the difficulty level of the goal. """ if self.platform is None: raise RuntimeError("Call `reset()` before starting to step.") if not self.action_space.contains(action): raise ValueError( "Given action is not contained in the action space." ) num_steps = self.frameskip # ensure episode length is not exceeded due to frameskip step_count_after = self.step_count + num_steps if step_count_after > move_cube.episode_length: excess = step_count_after - move_cube.episode_length num_steps = max(1, num_steps - excess) unscaled_action = self.unscale_action(action, self.unscaled_action_space) if self.smoothed_action is None: # start with current position # self.smoothed_action = self.finger.observation.position self.smoothed_action = unscaled_action self.smoothed_action = ( self.smoothing_alpha * self.smoothed_action + (1 - self.smoothing_alpha) * action ) reward = 0.0 for _ in range(num_steps): self.step_count += 1 if self.step_count > move_cube.episode_length: raise RuntimeError("Exceeded number of steps for one episode.") # send action to robot robot_action = self._gym_action_to_robot_action(self.smoothed_action) t = self.platform.append_desired_action(robot_action) # Use observations of step t + 1 to follow what would be expected # in a typical gym environment. Note that on the real robot, this # will not be possible previous_observation = self._create_observation(t) observation = self._create_observation(t + 1) reward += self.compute_reward( previous_observation=previous_observation["observation"], observation=observation["observation"], ) is_done = self.step_count == move_cube.episode_length return observation, reward, is_done, self.info def reset(self): # reset simulation del self.platform # initialize simulation initial_robot_position = ( TriFingerPlatform.spaces.robot_position.default ) initial_object_pose = self.initializer.get_initial_state() goal_object_pose = self.initializer.get_goal() self.platform = TriFingerPlatform( visualization=self.visualization, initial_robot_position=initial_robot_position, initial_object_pose=initial_object_pose, ) self.goal = { "position": goal_object_pose.position, "orientation": goal_object_pose.orientation, } # updates smoothing parameters self.update_smoothing() self.episode_count += 1 self.smoothed_action = None # visualize the goal if self.visualization: self.goal_marker = visual_objects.CubeMarker( width=0.065, position=goal_object_pose.position, orientation=goal_object_pose.orientation, physicsClientId=self.platform.simfinger._pybullet_client_id, ) self.info = {"difficulty": self.initializer.difficulty} self.step_count = 0 return self._create_observation(0) def seed(self, seed=None): """Sets the seed for this env’s random number generator. .. note:: Spaces need to be seeded separately. E.g. if you want to sample actions directly from the action space using ``env.action_space.sample()`` you can set a seed there using ``env.action_space.seed()``. Returns: List of seeds used by this environment. This environment only uses a single seed, so the list contains only one element. """ self.np_random, seed = gym.utils.seeding.np_random(seed) move_cube.random = self.np_random return [seed] def _create_observation(self, t): robot_observation = self.platform.get_robot_observation(t) object_observation = self.platform.get_object_pose(t) robot_tip_positions = self.platform.forward_kinematics( robot_observation.position ) robot_tip_positions = np.array(robot_tip_positions) observation = { "observation": { "robot_position": robot_observation.position, "robot_velocity": robot_observation.velocity, "robot_tip_positions": robot_tip_positions, "object_position": object_observation.position, "object_orientation": object_observation.orientation, "goal_object_position": self.goal["position"], }, "desired_goal": self.goal, "achieved_goal": { "position": object_observation.position, "orientation": object_observation.orientation, }, } return observation def _gym_action_to_robot_action(self, gym_action): # construct robot action depending on action type if self.action_type == ActionType.TORQUE: robot_action = self.platform.Action(torque=gym_action) elif self.action_type == ActionType.POSITION: robot_action = self.platform.Action(position=gym_action) elif self.action_type == ActionType.TORQUE_AND_POSITION: robot_action = self.platform.Action( torque=gym_action["torque"], position=gym_action["position"] ) else: raise ValueError("Invalid action_type") return robot_action def update_smoothing(self): """ Update the smoothing coefficient with which the action to be applied is smoothed """ if ( self.smoothing_start_episode <= self.episode_count < self.smoothing_stop_episode ): self.smoothing_alpha += self.smoothing_increase_step print( "episode: {}, smoothing: {}".format( self.episode_count, self.smoothing_alpha ) ) @staticmethod def unscale_action(y, space): """ Unscale some input from [-1;1] to the range of another space """ return space.low + (y + 1.0) / 2.0 * (space.high - space.low)
class CubeEnv(gym.GoalEnv): """Gym environment for moving cubes with simulated TriFingerPro.""" def __init__( self, initializer, action_type=ActionType.POSITION, frameskip=1, visualization=False, ): """Initialize. Args: initializer: Initializer class for providing initial cube pose and goal pose. See :class:`RandomInitializer` and :class:`FixedInitializer`. action_type (ActionType): Specify which type of actions to use. See :class:`ActionType` for details. frameskip (int): Number of actual control steps to be performed in one call of step(). visualization (bool): If true, the pyBullet GUI is run for visualization. """ # Basic initialization # ==================== self.initializer = initializer self.action_type = action_type self.visualization = visualization # TODO: The name "frameskip" makes sense for an atari environment but # not really for our scenario. The name is also misleading as # "frameskip = 1" suggests that one frame is skipped while it actually # means "do one step per step" (i.e. no skip). if frameskip < 1: raise ValueError("frameskip cannot be less than 1.") self.frameskip = frameskip # will be initialized in reset() self.platform = None # Create the action and observation spaces # ======================================== spaces = TriFingerPlatform.spaces object_state_space = gym.spaces.Dict( { "position": spaces.object_position.gym, "orientation": spaces.object_orientation.gym, } ) if self.action_type == ActionType.TORQUE: self.action_space = spaces.robot_torque.gym elif self.action_type == ActionType.POSITION: self.action_space = spaces.robot_position.gym elif self.action_type == ActionType.TORQUE_AND_POSITION: self.action_space = gym.spaces.Dict( { "torque": spaces.robot_torque.gym, "position": spaces.robot_position.gym, } ) else: raise ValueError("Invalid action_type") self.observation_space = gym.spaces.Dict( { "observation": gym.spaces.Dict( { "position": spaces.robot_position.gym, "velocity": spaces.robot_velocity.gym, "torque": spaces.robot_torque.gym, } ), "desired_goal": object_state_space, "achieved_goal": object_state_space, } ) def compute_reward(self, achieved_goal, desired_goal, info): """Compute the reward for the given achieved and desired goal. Args: achieved_goal (dict): Current pose of the object. desired_goal (dict): Goal pose of the object. info (dict): An info dictionary containing a field "difficulty" which specifies the difficulty level. Returns: float: The reward that corresponds to the provided achieved goal w.r.t. to the desired goal. Note that the following should always hold true:: ob, reward, done, info = env.step() assert reward == env.compute_reward( ob['achieved_goal'], ob['desired_goal'], info, ) """ return -move_cube.evaluate_state( move_cube.Pose.from_dict(desired_goal), move_cube.Pose.from_dict(achieved_goal), info["difficulty"], ) def step(self, action): """Run one timestep of the environment's dynamics. When end of episode is reached, you are responsible for calling ``reset()`` to reset this environment's state. Args: action: An action provided by the agent (depends on the selected :class:`ActionType`). Returns: tuple: - observation (dict): agent's observation of the current environment. - reward (float) : amount of reward returned after previous action. - done (bool): whether the episode has ended, in which case further step() calls will return undefined results. - info (dict): info dictionary containing the difficulty level of the goal. """ if self.platform is None: raise RuntimeError("Call `reset()` before starting to step.") if not self.action_space.contains(action): raise ValueError( "Given action is not contained in the action space." ) num_steps = self.frameskip # ensure episode length is not exceeded due to frameskip step_count_after = self.step_count + num_steps if step_count_after > move_cube.episode_length: excess = step_count_after - move_cube.episode_length num_steps = max(1, num_steps - excess) reward = 0.0 for _ in range(num_steps): self.step_count += 1 if self.step_count > move_cube.episode_length: raise RuntimeError("Exceeded number of steps for one episode.") # send action to robot robot_action = self._gym_action_to_robot_action(action) t = self.platform.append_desired_action(robot_action) # Use observations of step t + 1 to follow what would be expected # in a typical gym environment. Note that on the real robot, this # will not be possible observation = self._create_observation(t + 1) reward += self.compute_reward( observation["achieved_goal"], observation["desired_goal"], self.info, ) is_done = self.step_count == move_cube.episode_length return observation, reward, is_done, self.info def reset(self): # reset simulation del self.platform # initialize simulation initial_robot_position = ( TriFingerPlatform.spaces.robot_position.default ) initial_object_pose = self.initializer.get_initial_state() goal_object_pose = self.initializer.get_goal() self.platform = TriFingerPlatform( visualization=self.visualization, initial_robot_position=initial_robot_position, initial_object_pose=initial_object_pose, ) self.goal = { "position": goal_object_pose.position, "orientation": goal_object_pose.orientation, } # visualize the goal if self.visualization: self.goal_marker = visual_objects.CubeMarker( width=0.065, position=goal_object_pose.position, orientation=goal_object_pose.orientation, physicsClientId=self.platform.simfinger._pybullet_client_id, ) self.info = {"difficulty": self.initializer.difficulty} self.step_count = 0 return self._create_observation(0) def seed(self, seed=None): """Sets the seed for this env’s random number generator. .. note:: Spaces need to be seeded separately. E.g. if you want to sample actions directly from the action space using ``env.action_space.sample()`` you can set a seed there using ``env.action_space.seed()``. Returns: List of seeds used by this environment. This environment only uses a single seed, so the list contains only one element. """ self.np_random, seed = gym.utils.seeding.np_random(seed) move_cube.random = self.np_random return [seed] def _create_observation(self, t): robot_observation = self.platform.get_robot_observation(t) object_observation = self.platform.get_object_pose(t) observation = { "observation": { "position": robot_observation.position, "velocity": robot_observation.velocity, "torque": robot_observation.torque, }, "desired_goal": self.goal, "achieved_goal": { "position": object_observation.position, "orientation": object_observation.orientation, }, } return observation def _gym_action_to_robot_action(self, gym_action): # construct robot action depending on action type if self.action_type == ActionType.TORQUE: robot_action = self.platform.Action(torque=gym_action) elif self.action_type == ActionType.POSITION: robot_action = self.platform.Action(position=gym_action) elif self.action_type == ActionType.TORQUE_AND_POSITION: robot_action = self.platform.Action( torque=gym_action["torque"], position=gym_action["position"] ) else: raise ValueError("Invalid action_type") return robot_action
def test_get_camera_observation_timeindex(self): platform = TriFingerPlatform(enable_cameras=True) # negative time index needs to be rejected with self.assertRaises(ValueError): platform.get_camera_observation(-1) t = platform.append_desired_action(platform.Action()) try: platform.get_camera_observation(t) platform.get_camera_observation(t + 1) except Exception: self.fail() with self.assertRaises(ValueError): platform.get_camera_observation(t + 2)
def test_get_object_pose_timeindex(self): platform = TriFingerPlatform() # negative time index needs to be rejected with self.assertRaises(ValueError): platform.get_object_pose(-1) t = platform.append_desired_action(platform.Action()) try: platform.get_object_pose(t) platform.get_object_pose(t + 1) except Exception: self.fail() with self.assertRaises(ValueError): platform.get_object_pose(t + 2)
def test_timestamps(self): platform = TriFingerPlatform(visualization=False, enable_cameras=True) action = platform.Action() # compute camera update step interval based on configured rates camera_update_step_interval = ( 1 / platform._camera_rate_fps) / platform._time_step # robot time step in milliseconds time_step_ms = platform._time_step * 1000 # First time step t = platform.append_desired_action(action) first_stamp_ms = platform.get_timestamp_ms(t) first_stamp_s = first_stamp_ms / 1000 object_pose = platform.get_object_pose(t) camera_obs = platform.get_camera_observation(t) self.assertEqual(first_stamp_ms, object_pose.timestamp) self.assertEqual(first_stamp_ms, camera_obs.cameras[0].timestamp) self.assertEqual(first_stamp_ms, camera_obs.cameras[1].timestamp) self.assertEqual(first_stamp_ms, camera_obs.cameras[2].timestamp) # Test time stamps of observations t+1 object_pose_next = platform.get_object_pose(t + 1) camera_obs_next = platform.get_camera_observation(t + 1) next_stamp_ms = first_stamp_ms + time_step_ms self.assertEqual(next_stamp_ms, object_pose_next.timestamp) self.assertEqual(next_stamp_ms, camera_obs_next.cameras[0].timestamp) self.assertEqual(next_stamp_ms, camera_obs_next.cameras[1].timestamp) self.assertEqual(next_stamp_ms, camera_obs_next.cameras[2].timestamp) # XXX =============================================================== # The following part of the test is disabled as currently everything is # updated in each step (i.e. no lower update rate for camera and # object). return # Second time step t = platform.append_desired_action(action) second_stamp_ms = platform.get_timestamp_ms(t) self.assertEqual(second_stamp_ms, first_stamp_ms + time_step_ms) # there should not be a new camera observation yet object_pose = platform.get_object_pose(t) camera_obs = platform.get_camera_observation(t) self.assertEqual(first_stamp_s, object_pose.timestamp) self.assertEqual(first_stamp_s, camera_obs.cameras[0].timestamp) self.assertEqual(first_stamp_s, camera_obs.cameras[1].timestamp) self.assertEqual(first_stamp_s, camera_obs.cameras[2].timestamp) # do several steps until a new camera/object update is expected for _ in range(int(camera_update_step_interval)): t = platform.append_desired_action(action) nth_stamp_ms = platform.get_timestamp_ms(t) nth_stamp_s = nth_stamp_ms / 1000 self.assertGreater(nth_stamp_ms, second_stamp_ms) object_pose = platform.get_object_pose(t) camera_obs = platform.get_camera_observation(t) self.assertEqual(nth_stamp_s, object_pose.timestamp) self.assertEqual(nth_stamp_s, camera_obs.cameras[0].timestamp) self.assertEqual(nth_stamp_s, camera_obs.cameras[1].timestamp) self.assertEqual(nth_stamp_s, camera_obs.cameras[2].timestamp)