def evaluate(env, model: PPO2, districts_ids, num_steps):
    obs = env.reset()
    sus_before = districts_susceptibles(env, districts_ids)
    for _ in range(num_steps):
        action, _states = model.predict(obs)
        obs, _, _, _ = env.step(action)
    sus_after = districts_susceptibles(env, districts_ids)
    attack_rate = 1.0 - (sus_after / sus_before)

    assert total_school_closures(env) <= len(districts_ids)*args.budget_in_weeks

    return attack_rate
Exemple #2
0
    """
    global n_steps
    # Print stats every 1000 calls
    if (n_steps + 1) % 5 == 0:
        # Set Masks
        piece_mask = [1] * 16
        position_mask = [1] * 64
        updated_masks = {'action_mask' : [piece_mask, position_mask]}
        env.infos.update(updated_masks)
    n_steps += 1
    return True


model = PPO(MlpPolicy, env, verbose=1, tensorboard_log="run/")
model.learn(250000)

# model.save("expert_model")

# Enjoy trained agent
for _ in range(25):
    obs, done, action_masks = env.reset(), [False], []
    for i in range(1000):
        action, _states = model.predict(obs, action_mask=action_masks)
        obs, _, done, infos = env.step(action)

        action_masks.clear()
        for info in infos:
            env_action_mask = info.get('action_mask')
            action_masks.append(env_action_mask)
        env.render()