Beispiel #1
0
                              PLAYER)
        self.ai_oppoenet = MCTS(None, P2, 128, OPPONENT)
        self.current_user = None

    def select_action(self, state):
        if self.current_user == PLAYER:
            self.ai_player.current_user = PLAYER
            action = self.ai_player.simulation(state)
        else:
            self.ai_oppoenet.current_user = OPPONENT
            action = self.ai_oppoenet.simulation(state)
        return action


if __name__ == '__main__':
    env = tictactoe_env.TicTacToeEnv()
    manager = AiVsAi()
    result = {-1: 0, 0: 0, 1: 0}

    for game in range(GAMES):
        print('##########    Game: {}    ##########\n'.format(game + 1))
        player_color = (MARK_O + game) % 2
        state = env.reset(player_color=player_color)
        done = False
        step_play = 0

        while not done:
            current_user = ((PLAYER if player_color == MARK_O else OPPONENT) +
                            step_play) % 2
            print('- BOARD -')
            print(env.board[PLAYER] + env.board[OPPONENT] * 2)
Beispiel #2
0
        state_memory.appendleft(self.root)
        pi_memory.appendleft(pi.flatten())

        return tuple(action)


if __name__ == '__main__':
    start = time.time()

    train_dataset_store = []
    state_memory = deque(maxlen=102400)
    pi_memory = deque(maxlen=102400)
    z_memory = deque(maxlen=102400)

    env_game = tictactoe_env.TicTacToeEnv()
    env_simul = tictactoe_env.TicTacToeEnv()

    result_game = {-1: 0, 0: 0, 1: 0}
    win_mark_o = 0
    step_game = 0
    step_total_simul = 0

    print("=" * 30, " Game Start ", "=" * 30, '\n')

    for game in range(GAMES):
        player_color = (MARK_O + game) % 2
        state_game = env_game.reset(player_color=player_color)
        mcts = MCTS()
        done_game = False
        step_play = 0