Ejemplo n.º 1
0
def test_mutliproc_single_proc_equivalency():
    env = gym.make("Pendulum-v0")
    num_envs = 3
    venv1 = concat_vec_envs_v0(
        env, num_envs, num_cpus=0)  # uses single threaded vector environment
    venv2 = concat_vec_envs_v0(
        env, num_envs, num_cpus=4)  # uses multiprocessing vector environment
    check_vec_env_equivalency(venv1, venv2)
Ejemplo n.º 2
0
def test_multiagent_mutliproc_single_proc_equivalency():
    env = simple_spread_v2.parallel_env()
    env = pettingzoo_env_to_vec_env_v0(env)
    num_envs = 3
    venv1 = concat_vec_envs_v0(
        env, num_envs, num_cpus=0)  # uses single threaded vector environment
    venv2 = concat_vec_envs_v0(
        env, num_envs, num_cpus=4)  # uses multiprocessing vector environment
    check_vec_env_equivalency(venv1, venv2)
Ejemplo n.º 3
0
# TRY NOT TO MODIFY: seeding
device = torch.device(
    'cuda' if torch.cuda.is_available() and args.cuda else 'cpu')
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
torch.backends.cudnn.deterministic = args.torch_deterministic

# petting zoo
env = pistonball_v4.parallel_env()
env = ss.color_reduction_v0(env, mode='B')
env = ss.resize_v0(env, x_size=84, y_size=84)
env = ss.frame_stack_v1(env, 3)
env = ss.pettingzoo_env_to_vec_env_v0(env)
envs = ss.concat_vec_envs_v0(env,
                             args.num_envs,
                             num_cpus=0,
                             base_class='stable_baselines3')
envs = VecMonitor(envs)
if args.capture_video:
    envs = VecVideoRecorder(envs,
                            f'videos/{experiment_name}',
                            record_video_trigger=lambda x: x % 150000 == 0,
                            video_length=400)
envs = VecPyTorch(envs, device)
args.num_envs = envs.num_envs
args.batch_size = int(args.num_envs * args.num_steps)
args.minibatch_size = int(args.batch_size // args.n_minibatch)
assert isinstance(envs.action_space,
                  Box), "only continuous action space is supported"

Ejemplo n.º 4
0
def test_gym_supersuit_equivalency():
    env = gym.make("Pendulum-v0")
    num_envs = 3
    venv1 = concat_vec_envs_v0(env, num_envs)
    venv2 = gym_vec_env_v0(env, num_envs)
    check_vec_env_equivalency(venv1, venv2)
Ejemplo n.º 5
0
def test_env_is_wrapped_true():
    env = gym.make("Pendulum-v0")
    env = flatten(env)
    num_envs = 3
    venv1 = concat_vec_envs_v0(env, num_envs)
    assert venv1.env_is_wrapped(flatten) == [True] * 3
Ejemplo n.º 6
0
def test_env_is_wrapped_pettingzoo():
    env = simple_spread_v2.parallel_env()
    venv1 = pettingzoo_env_to_vec_env_v0(env)
    num_envs = 3
    venv1 = concat_vec_envs_v0(venv1, num_envs)
    assert venv1.env_is_wrapped(flatten) == [False] * 9
Ejemplo n.º 7
0
def test_env_is_wrapped_false():
    env = gym.make("Pendulum-v0")
    num_envs = 3
    venv1 = concat_vec_envs_v0(env, num_envs)
    assert venv1.env_is_wrapped(flatten) == [False] * 3
Ejemplo n.º 8
0
from stable_baselines.common.policies import CnnPolicy
from stable_baselines import PPO2
from pettingzoo.butterfly import pistonball_v4
import supersuit as ss

env = pistonball_v4.parallel_env(n_pistons=20, local_ratio=0, time_penalty=-0.1, continuous=True, random_drop=True, random_rotate=True, ball_mass=0.75, ball_friction=0.3, ball_elasticity=1.5, max_cycles=125)
env = ss.color_reduction_v0(env, mode='B')
env = ss.resize_v0(env, x_size=84, y_size=84)
env = ss.frame_stack_v1(env, 3)
env = ss.pettingzoo_env_to_vec_env_v0(env)
env = ss.concat_vec_envs_v0(env, 8, num_cpus=4, base_class='stable_baselines')

model = PPO2(CnnPolicy, env, verbose=3, gamma=0.99, n_steps=125, ent_coef=0.01, learning_rate=0.00025, vf_coef=0.5, max_grad_norm=0.5, lam=0.95, nminibatches=4, noptepochs=4, cliprange=0.2, cliprange_vf=1)
model.learn(total_timesteps=2000000)
model.save("policy")

# Rendering

env = pistonball_v4.env()
env = ss.color_reduction_v0(env, mode='B')
env = ss.resize_v0(env, x_size=84, y_size=84)
env = ss.frame_stack_v1(env, 3)

model = PPO2.load("policy")

env.reset()
for agent in env.agent_iter():
    obs, reward, done, info = env.last()
    act = model.predict(obs)[0] if not done else None
    env.step(act)
    env.render()
Ejemplo n.º 9
0
n_envs = 4
n_timesteps = 8000000

# n agents, n timesteps, docs, make, PZ import in test file
# The main class SumoEnvironment inherits MultiAgentEnv from RLlib.

base_env = make_env(net_file='nets/4x4-Lucas/4x4.net.xml',
                    route_file='nets/4x4-Lucas/4x4c1c2c1c2.rou.xml',
                    out_csv_name='outputs/4x4grid/test',
                    use_gui=False,
                    num_seconds=80000)

env = base_env.copy().parallel_env()
env = ss.frame_stack_v1(env, 3)
env = ss.pettingzoo_env_to_vec_env_v0(env)
env = ss.concat_vec_envs_v0(env, n_envs, num_cpus=1, base_class='stable_baselines3')
env = VecMonitor(env)

eval_env = base_env.copy().parallel_env()
eval_env = ss.frame_stack_v1(eval_env, 3)
eval_env = ss.pettingzoo_env_to_vec_env_v0(eval_env)
eval_env = ss.concat_vec_envs_v0(eval_env, 1, num_cpus=1, base_class='stable_baselines3')
eval_env = VecMonitor(eval_env)

eval_freq = int(n_timesteps / n_evaluations)
eval_freq = max(eval_freq // (n_envs*n_agents), 1)

model = PPO("MlpPolicy", env, verbose=3, gamma=0.95, n_steps=256, ent_coef=0.0905168, learning_rate=0.00062211, vf_coef=0.042202, max_grad_norm=0.9, gae_lambda=0.99, n_epochs=5, clip_range=0.3, batch_size=256)
eval_callback = EvalCallback(eval_env, best_model_save_path='./logs/', log_path='./logs/', eval_freq=eval_freq, deterministic=True, render=False)
model.learn(total_timesteps=n_timesteps, callback=eval_callback)