def check_reset_assert_error(env, new_reset_return): """ Helper to check that the error is caught. :param env: (gym.Env) :param new_reset_return: (Any) """ def wrong_reset(): return new_reset_return # Patch the reset method with a wrong one env.reset = wrong_reset with pytest.raises(AssertionError): check_env(env)
def test_high_dimension_action_space(): """ Test for continuous action space with more than one action. """ env = FakeImageEnv() # Patch the action space env.action_space = spaces.Box(low=-1, high=1, shape=(20,), dtype=np.float32) # Patch to avoid error def patched_step(_action): return env.observation_space.sample(), 0.0, False, {} env.step = patched_step check_env(env)
def check_step_assert_error(env, new_step_return=()): """ Helper to check that the error is caught. :param env: (gym.Env) :param new_step_return: (tuple) """ def wrong_step(_action): return new_step_return # Patch the step method with a wrong one env.step = wrong_step with pytest.raises(AssertionError): check_env(env)
def PendulumExample(): builder = DiagramBuilder() plant = builder.AddSystem(PendulumPlant()) scene_graph = builder.AddSystem(SceneGraph()) PendulumGeometry.AddToBuilder(builder, plant.get_state_output_port(), scene_graph) MeshcatVisualizerCpp.AddToBuilder( builder, scene_graph, meshcat, MeshcatVisualizerParams(publish_period=np.inf)) builder.ExportInput(plant.get_input_port(), "torque") # Note: The Pendulum-v0 in gym outputs [cos(theta), sin(theta), thetadot] builder.ExportOutput(plant.get_state_output_port(), "state") # Add a camera (I have sugar for this in the manip repo) X_WC = RigidTransform(RotationMatrix.MakeXRotation(-np.pi / 2), [0, -1.5, 0]) rgbd = AddRgbdSensor(builder, scene_graph, X_WC) builder.ExportOutput(rgbd.color_image_output_port(), "camera") diagram = builder.Build() simulator = Simulator(diagram) def reward(system, context): plant_context = plant.GetMyContextFromRoot(context) state = plant_context.get_continuous_state_vector() u = plant.get_input_port().Eval(plant_context)[0] theta = state[0] % 2 * np.pi # Wrap to 2*pi theta_dot = state[1] return (theta - np.pi)**2 + 0.1 * theta_dot**2 + 0.001 * u max_torque = 3 env = DrakeGymEnv(simulator, time_step=0.05, action_space=gym.spaces.Box(low=np.array([-max_torque]), high=np.array([max_torque])), observation_space=gym.spaces.Box( low=np.array([-np.inf, -np.inf]), high=np.array([np.inf, np.inf])), reward=reward, render_rgb_port_id="camera") check_env(env) if show: env.reset() image = env.render(mode='rgb_array') fig, ax = plt.subplots() ax.imshow(image) plt.show()
def test_gym_env(kwargs): server = FakeServer() server.start() env = gym.make("SpaceEngineers-WalkingRobot-IK-v2", detach=True, verbose=2, **kwargs) check_env(env, warn=True) env.reset() for _ in range(10): _, _, done, _ = env.step(env.action_space.sample()) if done: env.reset() env.close()
def test_dqn(): env = gym.make("fishing-v0") check_env(env) model = DQN("MlpPolicy", env, verbose=1) model.learn(total_timesteps=200) # Simulate a run with the trained model, visualize result df = env.simulate(model) env.plot(df, "dqn-test.png") # Evaluate model mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=5) df = env.policyfn(model) env.plot_policy(df, "policy-test.png")
def test_env(env_id): """ Check that environmnent integrated in Gym pass the test. :param env_id: (str) """ env = gym.make(env_id) with pytest.warns(None) as record: check_env(env) # Pendulum-v0 will produce a warning because the action space is # in [-2, 2] and not [-1, 1] if env_id == "Pendulum-v0": assert len(record) == 1 else: # The other environments must pass without warning assert len(record) == 0
def test_all_environments(): envs_id = [ env_spec.id for env_spec in gym.envs.registry.all() if env_spec.id.startswith('Eplus') ] envs_id = [env_id for env_id in envs_id if 'IWMullion' not in env_id] for env_id in envs_id: # Create env with TEST name env = gym.make(env_id) # stable_baselines 3 environment checker. Check if environment follows # Gym API. check_env(env) # Rename directory with name TEST for future remove os.rename( env.simulator._env_working_dir_parent, 'Eplus-env-TEST' + env.simulator._env_working_dir_parent.split('/')[-1])
def main(): # Initialize the environment env = WebotsStickEnv() check_env(env) # Train model = PPO('MlpPolicy', env, n_steps=2048, verbose=1) model.learn(total_timesteps=1e5) # Replay print('Training is finished, press `Y` for replay...') env.wait_keyboard() obs = env.reset() for t in range(100000): action, _states = model.predict(obs) obs, reward, done, info = env.step(action) print(obs) if done: obs = env.reset()
def __init__(self, rap, log_dir="/tmp/gym", training_config=None, algorithm="A2C", checkpoint_results=None): super(ResourceManager, self).__init__(rap, log_dir=log_dir, algorithm=algorithm, checkpoint_results=checkpoint_results) self.model_name = rap["name"] + "_baseline" self.environment = ResourceAllocationEnvironment(self.ra_problem) check_env(self.environment, warn=True) self.vector_environment = make_vec_env(lambda: self.environment, n_envs=1, monitor_dir=self.log_dir) self.training_steps = training_config["stage1_training_steps"]
def test_tipping(): np.random.seed(0) env = gym.make("fishing-v2", sigma=0, init_state=0.75) check_env(env) env.reset() # increases above tipping point obs, reward, done, info = env.step(env.get_action(0)) assert env.get_fish_population(obs) >= 0.75 # Decreases below the tipping point env.init_state = 0.3 env.reset() obs, reward, done, info = env.step(env.get_action(0)) assert env.get_fish_population(obs) <= 0.3 # model = user_action(env) model = msy(env) df = env.simulate(model) env.plot(df, "tip_msy-test.png") model = escapement(env) df = env.simulate(model) env.plot(df, "tip_escapement-test.png")
def test_metro_network_env(self): # config = get_config(AGENT_CONFIG) # # simulator = Simulator(NETWORK, SERVICE, USER_CONFIG) # env = gym.make('metro_network-env-v1', agent_config=config, network_file=NETWORK, service_file=SERVICE, user_trace_file = USER_TRACE) # np.random.seed(123) # env.seed(123) # obs = env.reset() # logger.info(f"obs {obs}") # action = env.action_space.sample() # env.step(action) config = get_config(AGENT_CONFIG) env = MetroNetworkEnv( agent_config=config, network_file=NETWORK_TRIANGLE, service_file=SERVICE_TRIANGLE, user_trace_file=USER_TRACE, service_requirement_file=SERVICE_REQUIREMENT_TRIANGLE, ingress_distribution_file=ingress_distribution_file_path, container_client_file=docker_client_services_path, container_server_file=docker_server_services_path, container_lb_file=docker_lb_container_path) check_env(env)
def train(env, type, timesteps): env.reset() print(check_env(env)) env = FlattenObservation(env) print(env.reward_range) print(env.action_space) if type == "DQN": model = DQN('MlpPolicy', exploration_fraction=0.999, env=env, verbose=1) elif type == "A2C": model = A2C('MlpPolicy', env=env, verbose=1) elif type == "PPO": model = PPO('MlpPolicy', env=env, verbose=1) model.learn(total_timesteps=timesteps) model.save("model_cups")
from stable_baselines3.common.env_checker import check_env from swarmsim_env import SingleExplorer print(check_env(SingleExplorer()))
def test_check_env_dict_action(): test_env = ActionDictTestEnv() with pytest.warns(Warning): check_env(env=test_env, warn=True)
from stable_baselines3.common.env_checker import check_env import gym import envs ENV_ID = "CustomEnv-v0" my_env = gym.make(ENV_ID) # It will check your custom environment and output additional warnings if needed check_env(my_env)
self.task.reset() self.reward_calculator.reset() self._steps_curr_episode = 0 obs, _ = self.observation_collector.get_observations() return obs # reward, done, info can't be included def close(self): pass if __name__ == '__main__': rospy.init_node('flatland_gym_env', anonymous=True) print("start") flatland_env = FlatlandEnv() check_env(flatland_env, warn=True) # init env obs = flatland_env.reset() # run model n_steps = 200 for step in range(n_steps): # action, _states = model.predict(obs) action = flatland_env.action_space.sample() obs, rewards, done, info = flatland_env.step(action) time.sleep(0.1)
""" Test env with random actions """ import time import gym import widowx_env from stable_baselines3.common.env_checker import check_env env = gym.make('widowx_reacher-v14') print("any warnings?", check_env(env)) # Comment this for goal environments # print("Action space: ", env.action_space) # print(env.action_space.high) # print(env.action_space.low) # print("Observation space: ", env.observation_space) # print(env.observation_space.high) # print(env.observation_space.low) # env.render() for episode in range(2): obs = env.reset() rewards = [] for t in range(100): action = env.action_space.sample() obs, reward, done, info = env.step(action) print("action: ", action) print("obs: ", obs)
raise ValueError(f'Unrecognized action {action}') self._state = np.clip(self._state, 0, self._grid_size - 1) done = bool(self._state == self._grid_size - 1) reward = 1 if done else 0 return np.array([self._state]).astype(np.float32), reward, done, {} def reset(self): self._state = 0 return np.array([self._state]).astype(np.float32) def render(self, mode='human'): pass if __name__ == '__main__': check_env(GridWorld(10)) env = make_vec_env(lambda: GridWorld(10), n_envs=1) model = PPO('MlpPolicy', env, verbose=1).learn(5000) state = env.reset() for _ in range(20): action, _ = model.predict(state, deterministic=True) # action = 0 next_state, reward, done, info = env.step(action) print(f'{state} -> {action} -> {next_state}: {reward}') state = next_state if done: break
def test_by_stablebaseline(): from stable_baselines3.common.env_checker import check_env env = gym.make(GYM_VERSION_NAME, **env_kwargs) check_env(env)
def test_custom_envs(env_class): env = env_class() with pytest.warns(None) as record: check_env(env) # No warnings for custom envs assert len(record) == 0
def _on_training_end(self) -> None: pass def make_env(): def _init(): env = Monitor(CoinrunEnv()) return env return _init if __name__ == '__main__': eval_env = CoinrunEnv() check_env(eval_env) # vectorize env (1 per cpu) n_cpus = 4 env = SubprocVecEnv([make_env() for _ in range(n_cpus)]) # env = Monitor(env, filename='logs.txt') # init model model = PPO( policy='CnnPolicy', env=env, gamma=0.999, gae_lambda=0.95, learning_rate=5e-4, n_steps= 2048, # 2048, # true train batch size is n_steps * n_cpus (n_cpus being number of envs running in parallel)
def test_custom_envs(env_class): env = env_class() check_env(env)
from stable_baselines3 import A2C from gym_quarto import QuartoEnvV0, OnePlayerWrapper, RandomPlayer, HumanPlayer, random_action logging.basicConfig(level=logging.INFO) def make_env(player=None): env = gym.make('quarto-v1') if player is None: player = RandomPlayer(env) env = OnePlayerWrapper(env, player) return env check_env(make_env()) def random_vs_random(): env = make_env() NB_EPISODE = 1 for episode in range(NB_EPISODE): obs = env.reset() done = False while not done: action = random.choice(list(env.legal_actions)) obs, reward, done, info = env.step(action) #print(f"{info['turn']: <4} | ") env.render() print("done")
import gym import numpy as np from stable_baselines3.common.env_checker import check_env from stable_baselines3 import PPO from environment import EggnoggEnv from game import Game import time game = Game() env = EggnoggEnv(game, 1, 0) print(check_env(env)) # set the screenshot to find the window automatically model = PPO("CnnPolicy", env, verbose=1) model.learn(total_timesteps=1) time.sleep(3) model.save("ppo_eggnogg") obs = env.reset() #for i in range(2000): # action, _states = model.predict(obs) # obs, rewards, done, info = env.step(action) # env.render()
gui=ARGS.gui, record=ARGS.record_video, obstacles=ARGS.obstacles, user_debug_gui=ARGS.user_debug_gui, goal_xyz=GOAL_XYZ, collision_point=COLLISION_POINT, protected_radius=protected_radius, goal_radius=ARGS.goal_radius, collision_time=ARGS.collision_time, ) #### Obtain the PyBullet Client ID from the environment #### PYB_CLIENT = env.getPyBulletClient() DRONE_IDS = env.getDroneIds() check_env(env, warn=True, skip_render_check=True) #### Compute number of control steps in the simlation ###### PERIOD = ARGS.duration_sec NUM_WP = ARGS.control_freq_hz * PERIOD #### Initialize the logger ################################# logger = Logger(logging_freq_hz=int(ARGS.simulation_freq_hz / AGGR_PHY_STEPS), num_drones=ARGS.num_drones) #### Run the simulation #################################### CTRL_EVERY_N_STEPS = int(np.floor(env.SIM_FREQ / ARGS.control_freq_hz)) START = time.time() #Start the model learning
self.task.reset() self.reward_calculator.reset() self._steps_curr_episode = 0 obs, _ = self.observation_collector.get_observations() return obs # reward, done, info can't be included def close(self): pass if __name__ == '__main__': rospy.init_node('wp3_gym_env', anonymous=True) print("start") wp3_env = wp3Env() check_env(wp3_env, warn=True) # init env obs = wp3_env.reset() # run model n_steps = 200 for step in range(n_steps): # action, _states = model.predict(obs) action = wp3_env.action_space.sample() obs, rewards, done, info = wp3_env.step(action) time.sleep(0.1)
from stable_baselines3.common.env_checker import check_env from constructionSite import ConstructionSite env = ConstructionSite() # If the environment don't follow the interface, an error will be thrown obs = env.reset() print(env.observation_space) print(env.action_space) print(env.action_space.sample()) for action in range(0, 6) : observation, reward, done, info = env.step(action) print(f"observation.shape : {observation.shape}\t\treward : {reward}\t\tdone : {done}\t\taction : {action}") check_env(env, warn=True)
froms, tos, results = rayTest(self.robot.robot, ray_length=self.LASER_LENGTH, ray_num=self.LASER_NUM) for index, result in enumerate(results): color = self.MISS_COLOR if result[0] == -1 else self.HIT_COLOR self.rayDebugLineIds.append( p.addUserDebugLine(froms[index], tos[index], color, self.RAY_DEBUG_LINE_WIDTH)) return np.array(state) def render(self, mode='human'): pass def seed(self, seed=None): self.np_random, seed = gym.utils.seeding.np_random(seed) return [seed] def close(self): if self._physics_client_id >= 0: p.disconnect() self._physics_client_id = -1 if __name__ == "__main__": env = MaplessNaviEnv() # pprint([attr for attr in dir(env) if attr[:2] != "__"]) from stable_baselines3.common.env_checker import check_env check_env(env)
import gym import gym_fishing from stable_baselines3.common.env_checker import check_env env0 = gym.make('fishing-v0') env1 = gym.make('fishing-v1') env2 = gym.make('fishing-v2') #env3 = gym.make('fishing-v3') env4 = gym.make('fishing-v4') check_env(env0) check_env(env1) check_env(env2) #check_env(env3) check_env(env4) env0.close()