def env_creator(): env = pistonball_v6.env(n_pistons=20, time_penalty=-0.1, continuous=True, random_drop=True, random_rotate=True, ball_mass=0.75, ball_friction=0.3, ball_elasticity=1.5, max_cycles=125) env = ss.color_reduction_v0(env, mode='B') env = ss.dtype_v0(env, 'float32') env = ss.resize_v0(env, x_size=84, y_size=84) env = ss.normalize_obs_v0(env, env_min=0, env_max=1) env = ss.frame_stack_v1(env, 3) return env
def main(): env = pistonball_v6.env(n_pistons=20, time_penalty=-0.1, continuous=True, random_drop=True, random_rotate=True, ball_mass=0.75, ball_friction=0.3, ball_elasticity=1.5, max_cycles=125) total_reward = 0 obs_list = [] NUM_RESETS = 1 i = 0 for i in range(NUM_RESETS): env.reset() for agent in env.agent_iter(): obs, rew, done, info = env.last() act = policy(obs) if not done else None env.step(act) total_reward += rew i += 1 if i % (len(env.possible_agents)+1) == 0: obs_list.append(np.transpose(env.render(mode='rgb_array'), axes=(1, 0, 2))) env.close() print("average total reward: ", total_reward/NUM_RESETS) write_gif(obs_list, 'pistonball_ben.gif', fps=15)
def env_creator(config): env = pistonball_v6.env() env = dtype_v0(env, dtype=np.float32) env = color_reduction_v0(env, mode="R") env = normalize_obs_v0(env) return env
def get_env(args: argparse.Namespace = get_args()): return PettingZooEnv( pistonball_v6.env(continuous=False, n_pistons=args.n_pistons))
verbose=3, gamma=0.95, n_steps=256, ent_coef=0.0905168, learning_rate=0.00062211, vf_coef=0.042202, max_grad_norm=0.9, gae_lambda=0.99, n_epochs=5, clip_range=0.3, batch_size=256, ) model.learn(total_timesteps=2000000) model.save("policy") # Rendering env = pistonball_v6.env() env = ss.color_reduction_v0(env, mode="B") env = ss.resize_v1(env, x_size=84, y_size=84) env = ss.frame_stack_v1(env, 3) model = PPO.load("policy") env.reset() for agent in env.agent_iter(): obs, reward, done, info = env.last() act = model.predict(obs, deterministic=True)[0] if not done else None env.step(act) env.render()