from agents import CommandLineAgent, DeepQLearningAgent, PPOAgent, RandomAgent
from environments.tictactoe import TicTacToeGameState
from runners import run_for_n_games_and_print_stats, run_step

if __name__ == "__main__":
    gs = TicTacToeGameState()
    agent0 = PPOAgent(state_space_size=gs.get_vectorized_state().shape[0],
                      action_space_size=gs.get_action_space_size())
    agent1 = RandomAgent()

    for i in range(100):
        run_for_n_games_and_print_stats([agent0, agent1], gs, 5000)

    run_for_n_games_and_print_stats([agent0, agent1], gs, 100)

    gs_clone = gs.clone()
    while not gs_clone.is_game_over():
        run_step([agent0, CommandLineAgent()], gs_clone)
        print(gs_clone)

    gs_clone = gs.clone()
    while not gs_clone.is_game_over():
        run_step([CommandLineAgent(), agent1], gs_clone)
        print(gs_clone)
from agents import TabularLikeDeepQLearningAgent
from environments import GridWorldGameState
from runners import run_for_n_games_and_print_stats, run_step

if __name__ == "__main__":
    gs = GridWorldGameState()
    agent = TabularLikeDeepQLearningAgent(action_space_size=4)

    for i in range(500):
        run_for_n_games_and_print_stats([agent], gs, 100)

    agent.epsilon = -1.0
    run_for_n_games_and_print_stats([agent], gs, 100)

    gs = gs.clone()
    while not gs.is_game_over():
        run_step([agent], gs)
        print(gs)
from agents import RandomRolloutAgent, MOMCTSAgent, RandomAgent, HalfAlphaZeroAgent
from environments.tictactoe import TicTacToeGameState
from runners import run_to_the_end, run_for_n_games_and_print_stats

if __name__ == "__main__":

    import tensorflow as tf

    tf.compat.v1.disable_eager_execution()

    gs = TicTacToeGameState()
    agent0 = HalfAlphaZeroAgent(10, gs.get_action_space_size(), keep_memory=True)
    agent1 = RandomAgent()

    for _ in range(1000):
        run_for_n_games_and_print_stats([agent0, agent1], gs, 100, shuffle_players=True)