Esempio n. 1
0
def train(load_path):
    env = LoveLetterMultiAgentEnv(num_players=4)
    env.seed(SEED)

    # take mujoco hyperparams (but doubled timesteps_per_actorbatch to cover more steps.)
    # model = PPO(MlpPolicy, env, timesteps_per_actorbatch=4096, clip_param=0.2, entcoeff=0.0, optim_epochs=10,
    #             optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear', verbose=2)
    if load_path:
        model = PPO.load(load_path, env)
    else:
        model = PPO(MlpPolicy, env)

    random_agents = [RandomAgent(env, SEED + i) for i in range(3)]
    agents = [model, *random_agents]
    env.set_agents(agents)

    eval_callback = EvalCallback(env,
                                 best_model_save_path=LOGDIR,
                                 log_path=LOGDIR,
                                 eval_freq=EVAL_FREQ,
                                 n_eval_episodes=EVAL_EPISODES)

    model.learn(total_timesteps=NUM_TIMESTEPS, callback=eval_callback)

    model.save(os.path.join(
        LOGDIR, "final_model"))  # probably never get to this point.

    env.close()
Esempio n. 2
0
def train(output_folder, load_path):
    base_output = Path(output_folder)
    full_output = base_output / datetime.datetime.now().isoformat(
        timespec="seconds")
    # latest = base_output / "latest"
    # latest.symlink_to(full_output)

    logger.configure(folder=str(full_output))

    env = LoveLetterMultiAgentEnv(num_players=4,
                                  reward_fn=Rewards.fast_elimination_reward)
    env.seed(SEED)

    # take mujoco hyperparams (but doubled timesteps_per_actorbatch to cover more steps.)
    # model = PPO(MlpPolicy, env, timesteps_per_actorbatch=4096, clip_param=0.2, entcoeff=0.0, optim_epochs=10,
    #             optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear', verbose=2)
    if load_path:
        model = PPO.load(load_path, env)
    else:
        # def test_fn(env):
        #     return env.valid_action_mask()
        #
        model = PPO(MlpPolicy, env, verbose=1,
                    ent_coef=0.05)  #, action_mask_fn=test_fn)

    other_agents = [RandomAgent(env, SEED + i) for i in range(3)]
    # other_agents = [
    #     PPO.load("zoo/ppo_logging/2020-12-27T15:51:49/final_model", env),
    # ]
    #     PPO.load("zoo/ppo_reward_bugfix2/latest/best_model", env),
    #     PPO.load("zoo/ppo_reward_bugfix2/latest/best_model", env),
    # ]
    agents = [model, *other_agents]
    env.set_agents(agents)

    eval_callback = EvalCallback(
        env,
        best_model_save_path=str(full_output),
        log_path=str(full_output),
        eval_freq=EVAL_FREQ,
        n_eval_episodes=EVAL_EPISODES,
    )

    model.learn(total_timesteps=NUM_TIMESTEPS, callback=eval_callback)

    model.save(str(full_output / "final_model"))

    env.close()
Esempio n. 3
0
def make_agents(env):
    new_load_path = "zoo/ppo_kl/2020-12-27T16:28:42/final_model"
    new_model = PPO.load(new_load_path, env)

    old_load_path = "zoo/ppo_logging/2020-12-27T15:51:49/final_model"
    # old_load_path = "zoo/ppo_headsup/latest/best_model"
    old_model = PPO.load(old_load_path, env)

    # random1 = RandomAgent(env)
    # random2 = RandomAgent(env)

    return [new_model, old_model]


env = LoveLetterMultiAgentEnv(num_players=2, make_agents_cb=make_agents)


GAME_LIMIT = 20000
STEP_LIMIT = 100


wins_by_pos = {p.position: 0 for p in env.players}
starts_by_pos = {p.position: 0 for p in env.players}
invalid_games = 0


for i in range(GAME_LIMIT):
    if i % 100 == 0:
        print(i)
        print("Wins by position")
Esempio n. 4
0
from gym_love_letter.interface.api import make_api

app = Flask(__name__, static_url_path="/static", static_folder="dist")


def make_agents(env):
    human = HumanAgent()
    # load_path = "zoo/ppo_reward_bugfix4/latest/best_model"
    # load_path = "zoo/ppo_logging/2020-12-27T15:51:49/final_model"
    # load_path = "zoo/ppo_kl/2020-12-27T16:28:42/final_model"
    # model = PPO.load(load_path, env)
    random1 = RandomAgent(env)
    # random2 = RandomAgent(env)

    return [human, random1]  # model]  # random1, random2]


env = LoveLetterMultiAgentEnv(num_players=2, make_agents_cb=make_agents)


@app.route('/')
def index():
    return app.send_static_file("index.html")


api = make_api(env)
app.register_blueprint(api, url_prefix="/api")

# Launch the game
webbrowser.open("http://localhost:5000")