def test_mutliproc_single_proc_equivalency(): env = gym.make("Pendulum-v0") num_envs = 3 venv1 = concat_vec_envs_v0( env, num_envs, num_cpus=0) # uses single threaded vector environment venv2 = concat_vec_envs_v0( env, num_envs, num_cpus=4) # uses multiprocessing vector environment check_vec_env_equivalency(venv1, venv2)
def test_multiagent_mutliproc_single_proc_equivalency(): env = simple_spread_v2.parallel_env() env = pettingzoo_env_to_vec_env_v0(env) num_envs = 3 venv1 = concat_vec_envs_v0( env, num_envs, num_cpus=0) # uses single threaded vector environment venv2 = concat_vec_envs_v0( env, num_envs, num_cpus=4) # uses multiprocessing vector environment check_vec_env_equivalency(venv1, venv2)
# TRY NOT TO MODIFY: seeding device = torch.device( 'cuda' if torch.cuda.is_available() and args.cuda else 'cpu') random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.backends.cudnn.deterministic = args.torch_deterministic # petting zoo env = pistonball_v4.parallel_env() env = ss.color_reduction_v0(env, mode='B') env = ss.resize_v0(env, x_size=84, y_size=84) env = ss.frame_stack_v1(env, 3) env = ss.pettingzoo_env_to_vec_env_v0(env) envs = ss.concat_vec_envs_v0(env, args.num_envs, num_cpus=0, base_class='stable_baselines3') envs = VecMonitor(envs) if args.capture_video: envs = VecVideoRecorder(envs, f'videos/{experiment_name}', record_video_trigger=lambda x: x % 150000 == 0, video_length=400) envs = VecPyTorch(envs, device) args.num_envs = envs.num_envs args.batch_size = int(args.num_envs * args.num_steps) args.minibatch_size = int(args.batch_size // args.n_minibatch) assert isinstance(envs.action_space, Box), "only continuous action space is supported"
def test_gym_supersuit_equivalency(): env = gym.make("Pendulum-v0") num_envs = 3 venv1 = concat_vec_envs_v0(env, num_envs) venv2 = gym_vec_env_v0(env, num_envs) check_vec_env_equivalency(venv1, venv2)
def test_env_is_wrapped_true(): env = gym.make("Pendulum-v0") env = flatten(env) num_envs = 3 venv1 = concat_vec_envs_v0(env, num_envs) assert venv1.env_is_wrapped(flatten) == [True] * 3
def test_env_is_wrapped_pettingzoo(): env = simple_spread_v2.parallel_env() venv1 = pettingzoo_env_to_vec_env_v0(env) num_envs = 3 venv1 = concat_vec_envs_v0(venv1, num_envs) assert venv1.env_is_wrapped(flatten) == [False] * 9
def test_env_is_wrapped_false(): env = gym.make("Pendulum-v0") num_envs = 3 venv1 = concat_vec_envs_v0(env, num_envs) assert venv1.env_is_wrapped(flatten) == [False] * 3
from stable_baselines.common.policies import CnnPolicy from stable_baselines import PPO2 from pettingzoo.butterfly import pistonball_v4 import supersuit as ss env = pistonball_v4.parallel_env(n_pistons=20, local_ratio=0, time_penalty=-0.1, continuous=True, random_drop=True, random_rotate=True, ball_mass=0.75, ball_friction=0.3, ball_elasticity=1.5, max_cycles=125) env = ss.color_reduction_v0(env, mode='B') env = ss.resize_v0(env, x_size=84, y_size=84) env = ss.frame_stack_v1(env, 3) env = ss.pettingzoo_env_to_vec_env_v0(env) env = ss.concat_vec_envs_v0(env, 8, num_cpus=4, base_class='stable_baselines') model = PPO2(CnnPolicy, env, verbose=3, gamma=0.99, n_steps=125, ent_coef=0.01, learning_rate=0.00025, vf_coef=0.5, max_grad_norm=0.5, lam=0.95, nminibatches=4, noptepochs=4, cliprange=0.2, cliprange_vf=1) model.learn(total_timesteps=2000000) model.save("policy") # Rendering env = pistonball_v4.env() env = ss.color_reduction_v0(env, mode='B') env = ss.resize_v0(env, x_size=84, y_size=84) env = ss.frame_stack_v1(env, 3) model = PPO2.load("policy") env.reset() for agent in env.agent_iter(): obs, reward, done, info = env.last() act = model.predict(obs)[0] if not done else None env.step(act) env.render()
n_envs = 4 n_timesteps = 8000000 # n agents, n timesteps, docs, make, PZ import in test file # The main class SumoEnvironment inherits MultiAgentEnv from RLlib. base_env = make_env(net_file='nets/4x4-Lucas/4x4.net.xml', route_file='nets/4x4-Lucas/4x4c1c2c1c2.rou.xml', out_csv_name='outputs/4x4grid/test', use_gui=False, num_seconds=80000) env = base_env.copy().parallel_env() env = ss.frame_stack_v1(env, 3) env = ss.pettingzoo_env_to_vec_env_v0(env) env = ss.concat_vec_envs_v0(env, n_envs, num_cpus=1, base_class='stable_baselines3') env = VecMonitor(env) eval_env = base_env.copy().parallel_env() eval_env = ss.frame_stack_v1(eval_env, 3) eval_env = ss.pettingzoo_env_to_vec_env_v0(eval_env) eval_env = ss.concat_vec_envs_v0(eval_env, 1, num_cpus=1, base_class='stable_baselines3') eval_env = VecMonitor(eval_env) eval_freq = int(n_timesteps / n_evaluations) eval_freq = max(eval_freq // (n_envs*n_agents), 1) model = PPO("MlpPolicy", env, verbose=3, gamma=0.95, n_steps=256, ent_coef=0.0905168, learning_rate=0.00062211, vf_coef=0.042202, max_grad_norm=0.9, gae_lambda=0.99, n_epochs=5, clip_range=0.3, batch_size=256) eval_callback = EvalCallback(eval_env, best_model_save_path='./logs/', log_path='./logs/', eval_freq=eval_freq, deterministic=True, render=False) model.learn(total_timesteps=n_timesteps, callback=eval_callback)