Esempio n. 1
0
def env_creator(args):
    env = pistonball_v4.parallel_env(n_pistons=20,
                                     local_ratio=0,
                                     time_penalty=-0.1,
                                     continuous=True,
                                     random_drop=True,
                                     random_rotate=True,
                                     ball_mass=0.75,
                                     ball_friction=0.3,
                                     ball_elasticity=1.5,
                                     max_cycles=125)
    env = ss.color_reduction_v0(env, mode='B')
    env = ss.dtype_v0(env, 'float32')
    env = ss.resize_v0(env, x_size=84, y_size=84)
    env = ss.frame_stack_v1(env, 3)
    env = ss.normalize_obs_v0(env, env_min=0, env_max=1)
    #env = ss.flatten_v0(env)
    return env
Esempio n. 2
0
               config=vars(args),
               name=experiment_name,
               monitor_gym=True,
               save_code=True)
    writer = SummaryWriter(f"/tmp/{experiment_name}")

# TRY NOT TO MODIFY: seeding
device = torch.device(
    'cuda' if torch.cuda.is_available() and args.cuda else 'cpu')
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
torch.backends.cudnn.deterministic = args.torch_deterministic

# petting zoo
env = pistonball_v4.parallel_env()
env = ss.color_reduction_v0(env, mode='B')
env = ss.resize_v0(env, x_size=84, y_size=84)
env = ss.frame_stack_v1(env, 3)
env = ss.pettingzoo_env_to_vec_env_v0(env)
envs = ss.concat_vec_envs_v0(env,
                             args.num_envs,
                             num_cpus=0,
                             base_class='stable_baselines3')
envs = VecMonitor(envs)
if args.capture_video:
    envs = VecVideoRecorder(envs,
                            f'videos/{experiment_name}',
                            record_video_trigger=lambda x: x % 150000 == 0,
                            video_length=400)
envs = VecPyTorch(envs, device)
Esempio n. 3
0
from stable_baselines.common.policies import CnnPolicy
from stable_baselines import PPO2
from pettingzoo.butterfly import pistonball_v4
import supersuit as ss

env = pistonball_v4.parallel_env(n_pistons=20, local_ratio=0, time_penalty=-0.1, continuous=True, random_drop=True, random_rotate=True, ball_mass=0.75, ball_friction=0.3, ball_elasticity=1.5, max_cycles=125)
env = ss.color_reduction_v0(env, mode='B')
env = ss.resize_v0(env, x_size=84, y_size=84)
env = ss.frame_stack_v1(env, 3)
env = ss.pettingzoo_env_to_vec_env_v0(env)
env = ss.concat_vec_envs_v0(env, 8, num_cpus=4, base_class='stable_baselines')

model = PPO2(CnnPolicy, env, verbose=3, gamma=0.99, n_steps=125, ent_coef=0.01, learning_rate=0.00025, vf_coef=0.5, max_grad_norm=0.5, lam=0.95, nminibatches=4, noptepochs=4, cliprange=0.2, cliprange_vf=1)
model.learn(total_timesteps=2000000)
model.save("policy")

# Rendering

env = pistonball_v4.env()
env = ss.color_reduction_v0(env, mode='B')
env = ss.resize_v0(env, x_size=84, y_size=84)
env = ss.frame_stack_v1(env, 3)

model = PPO2.load("policy")

env.reset()
for agent in env.agent_iter():
    obs, reward, done, info = env.last()
    act = model.predict(obs)[0] if not done else None
    env.step(act)
    env.render()
Esempio n. 4
0
def make_env():
    env = pistonball_v4.parallel_env()
    env = pettingzoo_env_to_vec_env_v0(env)
    return env