def template_pendulum_wrt_ground_truth(env_name: str, max_error_in_deg: float): # Create the pendulum env = gym.make(env_name) # Create the environment with the equations and initialize its time step env_equation = PendulumEnv() env_equation.dt = 1.0 / env.unwrapped.spec._kwargs['agent_rate'] # Render the environment # env.render('human') # time.sleep(5) # Seed the environment env.seed(42) for epoch in range(10): # Reset the environment logger.info("Resetting the environment") observation = env.reset() env_equation.set_state_from_obs(observation) # Initialize intermediate variables iteration = 0 done = False while not done: iteration += 1 # Sample a random action from the environment action = env.action_space.sample() # Step the environments observation, _, done, _ = env.step(action) observation_equation, _, _, _ = env_equation.step(action) theta = np.rad2deg(theta_from_obs(observation)) theta_equation = np.rad2deg(theta_from_obs(observation_equation)) # Compute the error taking care of the change of sign if np.sign(theta_equation) * np.sign(theta) == -1: error = (180 % abs(theta)) + (180 % abs(theta_equation)) else: error = abs(theta - theta_equation) print(iteration, error) if error > max_error_in_deg: print("===================") print(f"Environment name: {env_name}") print(f"Iteration: #{iteration}") print(f"Error: {error}") print(f"Theta Equation (deg): {theta_equation}") print(f"Theta Environment (deg): {theta}") print("===================") assert False, "Error in pendulum angle is bigger then the threshold"
def template_run_environment(env_name): logger.info(f"Testing environment '{env_name}'") env = gym.make(env_name) assert env, f"Failed to create '{env_name}' environment" observation = env.observation_space.sample() assert observation.size > 0, "The sampled observation is empty" observation = env.reset() assert observation.size > 0, "The observation is empty" for _ in range(10): action = env.action_space.sample() state, reward, done, _ = env.step(action) assert state.size > 0, "The environment didn't return a valid state" env.close()
def pybullet(self) -> bullet_client.BulletClient: if self._pybullet is not None: return self._pybullet logger.debug("Creating PyBullet simulator") if self._render_enabled: self._pybullet = bullet_client.BulletClient(pybullet.GUI) else: # Connects to an existing instance or, if it fails, creates an headless # simulation (DIRECT) self._pybullet = bullet_client.BulletClient() assert self._pybullet, "Failed to create the bullet client" # Find the ground plane resource_finder.add_path(pybullet_data.getDataPath()) world_abs_path = resource_finder.find_resource(self._world) # Load the ground plane self._plane_id = self._load_model(world_abs_path) # Configure the physics engine self._pybullet.setGravity(0, 0, -9.81) self._pybullet.setPhysicsEngineParameter(numSolverIterations=10) # Configure the physics engine with a single big time step divided in multiple # substeps. As an alternative, we could use a single substep and step the # simulation multiple times. self._pybullet.setTimeStep(1.0 / self._physics_rate / self._num_of_physics_steps) self._pybullet.setPhysicsEngineParameter( numSubSteps=self._num_of_physics_steps) # Disable real-time update. We step the simulation when needed. self._pybullet.setRealTimeSimulation(0) logger.info("PyBullet Physic Engine Parameters:") logger.info(str(self._pybullet.getPhysicsEngineParameters())) step_time = 1.0 / self._physics_rate / self._rtf logger.info(f"Nominal step time: {step_time} seconds") logger.debug("PyBullet simulator created") return self._pybullet
for epoch in range(10): # Reset the environment observation = env.reset() # Initialize returned values done = False totalReward = 0 while not done: # Execute a random action action = env.action_space.sample() observation, reward, done, _ = env.step(action) # Render the environment # It is not required to call this in the loop # env.render('human') # Accumulate the reward totalReward += reward # Print the observation msg = "" for value in observation: msg += "\t%.6f" % value logger.debug(msg) logger.info("Total reward for episode #{}: {}".format(epoch, totalReward)) env.close()
for epoch in range(30): # Reset the environment observation = env.reset() # Initialize returned values done = False totalReward = 0 while not done: # Execute a random action action = env.action_space.sample() observation, reward, done, _ = env.step(action) # Render the environment # It is not required to call this in the loop # env.render('human') # Accumulate the reward totalReward += reward # Print the observation msg = "" for value in observation: msg += "\t%.6f" % value logger.debug(msg) logger.info(f"Total reward for episode #{epoch}: {totalReward}") env.close() time.sleep(5)