def reset(self) -> Observation: # Initialize pybullet if not yet done p = self.pybullet assert p, "PyBullet object not valid" if self._hard_reset and self.task.has_robot(): if not self._first_run: logger.debug("Hard reset: deleting the robot") self.task.robot.delete_simulated_robot() logger.debug("Hard reset: creating new robot") self.task.robot = self._get_robot() else: self._first_run = False # Reset the environment ok_reset = self.task.reset_task() assert ok_reset, "Failed to reset the task" # Get the observation observation = self.task.get_observation() if not self.observation_space.contains(observation): logger.warn( "The observation does not belong to the observation space") return Observation(observation)
def step(self, action: Action) -> State: if not self.action_space.contains(action): logger.warn("The action does not belong to the action space") # Set the action self.task.set_action(action) # Step the simulator ok_gazebo = self.gazebo.run() assert ok_gazebo, "Failed to step gazebo" # Get the observation observation = self.task.get_observation() assert isinstance(observation, np.ndarray) if not self.observation_space.contains(observation): logger.warn( "The observation does not belong to the observation space") # Get the reward reward = self.task.get_reward() assert isinstance(reward, float), "Failed to get the reward" # Check termination done = self.task.is_done() # Get info info = self.task.get_info() return State( (Observation(observation), Reward(reward), Done(done), Info(info)))
def get_reward(self) -> Reward: # Calculate the reward if not self.is_done(): reward = 1.0 else: if self._steps_beyond_done is None: # Pole just fell self._steps_beyond_done = 0 reward = 1.0 else: self._steps_beyond_done += 1 reward = 0.0 # Warn the user to call reset if self._steps_beyond_done == 1: logger.warn( "You are calling 'step()' even though this environment " "has already returned done = True. You should always " "call 'reset()' once you receive 'done = True' -- any " "further steps are undefined behavior.") if self._reward_cart_at_center: # Get the observation observation = self.get_observation() x = observation[0] x_dot = observation[1] # Update the reward reward = reward \ - np.abs(x) \ - np.abs(x_dot) \ - 10 * (x >= self._x_threshold) return reward
def reset_task(self) -> bool: # Sample the angular velocity from the observation space _, _, theta_dot = self.observation_space.sample().tolist() # Sample the angular position from an uniform rng theta = self.np_random.uniform(0, 2 * np.pi) try: desired_control_mode = robot_joints.JointControlMode.TORQUE if self.robot.joint_control_mode("pivot") != desired_control_mode: ok_mode = self.robot.set_joint_control_mode( "pivot", desired_control_mode) assert ok_mode, "Failed to set pendulum control mode" except Exception: logger.warn( "Failed to set control mode. Is it supported by the runtime?") pass # Reset the robot state ok_reset = self.robot.reset_joint("pivot", theta, theta_dot) assert ok_reset, "Failed to reset the pendulum" # Clean the last applied force self._last_a = None return True
def reset(self) -> Observation: # Initialize gazebo if not yet done gazebo = self.gazebo assert gazebo, "Gazebo object not valid" # Remove the model and insert it again. This is the reset strategy for # floating-base robots. Resetting the joint state, instead, is sufficient to # reset fixed-based robots. Though, while avoiding the model deletion might # provide better performance, we should be sure that all the internal buffers # (e.g. those related to the low-level PIDs) are correctly re-initialized. if self._hard_reset and self.task.has_robot(): if not self._first_run: logger.debug("Hard reset: deleting the robot") self.task.robot.delete_simulated_robot() logger.debug("Hard reset: creating new robot") self.task.robot = self._get_robot() else: self._first_run = False # Reset the environment ok_reset = self.task.reset_task() assert ok_reset, "Failed to reset the task" # Get the observation observation = self.task.get_observation() if not self.observation_space.contains(observation): logger.warn( "The observation does not belong to the observation space") return Observation(observation)
def step(self, action: Action) -> State: if not self.action_space.contains(action): logger.warn("The action does not belong to the action space") # Set the action ok_action = self.task.set_action(action) assert ok_action, "Failed to set the action" # Step the simulator self.pybullet.stepSimulation() # Get the observation observation = self.task.get_observation() if not self.observation_space.contains(observation): logger.warn( "The observation does not belong to the observation space") # Get the reward reward = self.task.get_reward() assert reward is not None, "Failed to get the reward" # Check termination done = self.task.is_done() # Enforce the real-time factor self._enforce_rtf() return State((observation, reward, done, {}))
def reset(self) -> Observation: # Initialize gazebo if not yet done gazebo = self.gazebo assert gazebo, "Gazebo object not valid" # Remove the robot and insert a new one if not self._first_run: logger.debug("Hard reset: deleting the robot") self.task.robot.delete_simulated_robot() # Execute a dummy step to process model removal self.gazebo.run() logger.debug("Hard reset: creating new robot") self.task.robot = self._get_robot() else: self._first_run = False # Reset the environment ok_reset = self.task.reset_task() assert ok_reset, "Failed to reset the task" # Get the observation observation = self.task.get_observation() if not self.observation_space.contains(observation): logger.warn( "The observation does not belong to the observation space") return Observation(observation)
def gazebo(self) -> scenario.GazeboSimulator: if self._gazebo is not None: assert self._gazebo.initialized() return self._gazebo # Compute the number of physics iteration to execute at every environment step num_of_steps_per_run = self._physics_rate / self.agent_rate if num_of_steps_per_run != int(num_of_steps_per_run): logger.warn( "Rounding the number of iterations to {} from the nominal {}". format(int(num_of_steps_per_run), num_of_steps_per_run)) # Create the simulator gazebo = scenario.GazeboSimulator(1.0 / self._physics_rate, self._real_time_factor, int(num_of_steps_per_run)) # Store the simulator self._gazebo = gazebo # Insert the world (it will initialize the simulator) _ = self.world assert self._gazebo.initialized() return self._gazebo
def step(self, action: Action) -> State: if not self.action_space.contains(action): logger.warn("The action does not belong to the action space") # Set the action ok_action = self.task.set_action(action) assert ok_action, "Failed to set the action" # Step the simulator ok_gazebo = self.gazebo.run() assert ok_gazebo, "Failed to step gazebo" # Get the observation observation = self.task.get_observation() if not self.observation_space.contains(observation): logger.warn( "The observation does not belong to the observation space") # Get the reward # TODO: use the wrapper method? reward = self.task.get_reward() assert reward is not None, "Failed to get the reward" # Check termination done = self.task.is_done() return State((observation, reward, done, {}))
def reset_task(self) -> bool: # Initialize the environment with a new random state using the random number # generator provided by the Task. new_state = self.np_random.uniform(low=-0.05, high=0.05, size=(4, )) new_state[2] = self.np_random.uniform(low=-np.deg2rad(10), high=np.deg2rad(10)) # Set the joints in torque control mode for joint in {"linear", "pivot"}: desired_control_mode = robot_joints.JointControlMode.TORQUE # TODO: temporary workaround for robot implementation without this method, # e.g. FactoryRobot, which does not need to call it at the moment. try: if self.robot.joint_control_mode( joint) != desired_control_mode: ok_mode = self.robot.set_joint_control_mode( joint, desired_control_mode) assert ok_mode, f"Failed to set control mode for joint '{joint}'" except Exception: logger.warn( "This runtime does not support setting the control mode") pass # Reset position and velocity ok1 = self.robot.reset_joint("linear", new_state[0], new_state[1]) ok2 = self.robot.reset_joint("pivot", new_state[2], new_state[3]) # Reset the flag that assures reset is properly called self._steps_beyond_done = None return ok1 and ok2
def delete_simulated_robot(self) -> None: if not self._pybullet or self._robot_id is None: logger.warn("Failed to delete robot from the simulation. " "Simulator not running.") return # Remove the robot from the simulation self._pybullet.removeBody(self._robot_id)
def delete_simulated_robot(self) -> None: if not self._gazebo or not self._gympp_robot: logger.warn("Failed to delete robot from the simulation. " "Simulator not running.") return # Remove the robot from the simulation ok_model = self._gazebo.removeModel(self._robot_name) assert ok_model, f"Failed to remove the model '{self._robot_name}' from gazebo"
def is_done(self) -> bool: if not self.observation_space.contains(self.get_observation()): logger.warn( "Observation is outside its space. Marking the episode as done." ) return True # This environment is episodic and always reach the max_episode_steps return False
def blacklist_model(self, model_path, reason='Unknown'): if self._enable_blacklisting: bl_file = open(self.get_blacklisted_path(model_path), 'w') bl_file.write(reason) bl_file.close() logger.warn( '%s model "%s". Reason: %s.' % ('Blacklisting' if self._enable_blacklisting else 'Skipping', model_path, reason))
def gazebo(self) -> bindings.GazeboWrapper: if self._gazebo_wrapper: assert self._gazebo_wrapper.getPhysicsData().rtf == self._rtf, \ "The RTF of gazebo does not match the configuration" assert 1 / self._gazebo_wrapper.getPhysicsData().maxStepSize == \ self._physics_rate, \ "The physics rate of gazebo does not match the configuration" return self._gazebo_wrapper # ================= # INITIALIZE GAZEBO # ================= assert self._rtf, "Real Time Factor was not set" assert self.agent_rate, "Agent rate was not set" assert self._physics_rate, "Physics rate was not set" logger.debug("Starting gazebo") logger.debug(f"Agent rate: {self.agent_rate} Hz") logger.debug(f"Physics rate: {self._physics_rate} Hz") logger.debug(f"Simulation RTF: {self._rtf}") # Compute the number of physics iteration to execute at every environment step num_of_iterations_per_gazebo_step = self._physics_rate / self.agent_rate if num_of_iterations_per_gazebo_step != int( num_of_iterations_per_gazebo_step): logger.warn( "Rounding the number of iterations to {} from the nominal {}". format(int(num_of_iterations_per_gazebo_step), num_of_iterations_per_gazebo_step)) else: logger.debug("Setting {} iteration per simulator step".format( int(num_of_iterations_per_gazebo_step))) # Create the GazeboWrapper object self._gazebo_wrapper = bindings.GazeboWrapper( int(num_of_iterations_per_gazebo_step), self._rtf, self._physics_rate) # Set the verbosity logger.set_level(gym.logger.MIN_LEVEL) # Initialize the world world_ok = self._gazebo_wrapper.setupGazeboWorld(self._world) assert world_ok, "Failed to initialize the gazebo world" # Initialize the ignition gazebo wrapper gazebo_initialized = self._gazebo_wrapper.initialize() assert gazebo_initialized, "Failed to initialize ignition gazebo" return self._gazebo_wrapper
def add_path(data_path: str) -> None: if not exists(data_path): logger.warn(f"The path '{data_path}' does not exist. Not added to the data path.") return global GYM_IGNITION_DATA_PATH for path in GYM_IGNITION_DATA_PATH: if path == data_path: logger.debug(f"The path '{data_path}' is already present in the data path") return logger.debug(f"Adding new search path: '{data_path}'") GYM_IGNITION_DATA_PATH.append(data_path)
def add_path_from_env_var(env_variable: str) -> None: if env_variable not in os.environ: logger.warn(f"Failed to find '{env_variable}' environment variable") return # Get the environment variable env_var_content = os.environ[env_variable] # Remove leading ':' characters if env_var_content[0] == ':': env_var_content = env_var_content[1:] # Split multiple value env_var_paths = env_var_content.split(":") for path in env_var_paths: add_path(path)
def reset(self) -> Observation: # Reset the task self.task.reset_task() # Execute a paused step ok_run = self.gazebo.run(paused=True) if not ok_run: raise RuntimeError("Failed to run Gazebo") # Get the observation observation = self.task.get_observation() assert isinstance(observation, np.ndarray) if not self.observation_space.contains(observation): logger.warn("The observation does not belong to the observation space") return Observation(observation)
def set_joint_pid(self, joint_name: str, pid: robot.PID) -> bool: mode = self._jointname2jointcontrolinfo[joint_name].mode assert mode != JointControlMode.POSITION_INTERPOLATED if mode == JointControlMode.TORQUE: logger.warn(f"Joint '{joint_name}' is torque controlled. " f"Setting the PID has no effect") return False if mode == JointControlMode.POSITION and pid.i != 0.0: raise Exception("Integral term not supported for POSITION mode") elif mode == JointControlMode.VELOCITY and pid.d != 0.0: raise Exception("Derivative term not supported for VELOCITY mode") # Store the new PID self._jointname2jointcontrolinfo[joint_name]._replace(PID=pid) # Update the PIDs setting again the control mode ok_mode = self.set_joint_control_mode(joint_name, mode) assert ok_mode, "Failed to set the control mode" return True
def render(self, mode: str = 'human', **kwargs) -> None: if mode != "human": raise Exception(f"Render mode '{mode}' not yet supported") # If render has been already called once, and the simulator is ok, return if self._render_enabled and self._pybullet: return # Enable rendering self._render_enabled = True # If the simulator is already allocated (in DIRECT mode, headless), we have to # disconnect it, starting it in GUI mode, and recreate the robot object. if self._pybullet is not None: logger.warn( "The simulator was allocated in DIRECT mode before calling render. " "The simulation has been reset. All changes to the simulation state " "will be lost.") # Disconnect the simulator and deletes the robot object self.close() # Create a new simulator and robot object self.task._robot = self._get_robot()
def __init__(self, task_cls: type, robot_cls: type, model: str, rtf: float, agent_rate: float, physics_rate: float, world: str = "plane_implicit.urdf", hard_reset: bool = True, **kwargs): # Save the keyworded arguments. # We use them to build the task and the robot objects, and allow custom class # to accept user-defined parameters. self._kwargs = kwargs # Store the type of the class that provides Robot interface self._robot_cls = robot_cls # Delete and create a new robot every environment reset self._first_run = True self._hard_reset = hard_reset # URDF or SDF model files self._world = world self._model = model # Simulation attributes self._rtf = rtf self._now = None self._bias = 0.0 self._physics_rate = physics_rate self._plane_id = None self._pybullet = None self._render_enabled = False self._render_called = False # Calculate the rate between agent and physics rate physics_steps_per_agent_update = physics_rate / agent_rate self._num_of_physics_steps = int(physics_steps_per_agent_update) assert physics_rate >= agent_rate, "Agent cannot run faster than physics" # If there is an incompatibility between the agent and physics rates, round the # iterations and notify the user if physics_steps_per_agent_update != round( physics_steps_per_agent_update): self._num_of_physics_steps = round(physics_steps_per_agent_update) logger.warn("The real physics rate will be {} Hz".format( agent_rate * self._num_of_physics_steps)) logger.debug(f"RTF = {rtf}") logger.debug(f"Agent rate = {agent_rate} Hz") logger.debug( f"Physics rate = {agent_rate * self._num_of_physics_steps} Hz") logger.debug("Initializing the Task") task = task_cls(**kwargs) assert isinstance(task, base.task.Task), \ "'task_cls' object must inherit from Task" # Wrap the task with this runtime super().__init__(task=task, agent_rate=agent_rate) # Initialize the simulator and the robot self.task.robot = self._get_robot() # Initialize the spaces self.task.action_space, self.task.observation_space = self.task.create_spaces( ) # Seed the environment self.seed()