コード例 #1
0
def resume_self_play():
    env = TicTacToeEnv()
    saves = [f for f in listdir(save_dir) if isfile(join(save_dir, f))]
    recent_file = max(saves)
    policy = EpsilonGreedy(QConvTicTacToe(env), 0)
    opposing_policy = EpsilonGreedy(QConvTicTacToe(env), 1)
    self_play = SelfPlay(policy, opposing_policy)
    policy.q.policy_net.load_state_dict(torch.load(join(save_dir, recent_file)))
    self_play.evaluate_policy(100)
コード例 #2
0
def resume_self_play():
    env = Connect4Env()
    saves = [f for f in listdir(save_dir) if isfile(join(save_dir, f))]
    recent_file = max(saves)
    policy = EpsilonGreedy(QLinear(env), 0)
    opposing_policy = EpsilonGreedy(QLinear(env), 0)  # Acts greedily
    self_play = SelfPlay(policy, opposing_policy)
    policy.q.policy_net.load_state_dict(torch.load(join(save_dir, recent_file)))
    self_play.evaluate_policy(100)
コード例 #3
0
def run_training():
    env = TicTacToeEnv()
    policy = EpsilonGreedy(QConvTicTacToe(env, buffer_size=5000, batch_size=64), 0.1)
    opposing_policy = EpsilonGreedy(
        QConvTicTacToe(env), 1
    )  # Make it not act greedily for the moment- exploration Acts greedily
    self_play = SelfPlay(policy, opposing_policy, env=env)
    self_play.train_model(20000, resume=False)
    print("Training Done")

    saved_name = os.path.join(save_dir, datetime.datetime.now().isoformat())
    torch.save(self_play.policy.q.policy_net.state_dict(), saved_name)
コード例 #4
0
def run_training():
    env = Connect4Env()
    # policy = EpsilonGreedy(QConvTicTacToe(env, buffer_size=5000, batch_size=64), 0.1)
    policy = MCTreeSearch(
        ConvNetConnect4(),
        Connect4Env,
        temperature_cutoff=3,
        iterations=200,
        min_memory=64,
    )
    opposing_policy = EpsilonGreedy(
        QConvConnect4(env), 1
    )  # Make it not act greedily for the moment- exploration Acts greedily
    self_play = SelfPlay(policy, opposing_policy, env=env, swap_sides=True)
    self_play.train_model(20000, resume=False)
    print("Training Done")

    saved_name = os.path.join(save_dir, datetime.datetime.now().isoformat())
    torch.save(self_play.policy.q.policy_net.state_dict(), saved_name)