Esempio n. 1
0
def main(algo):
    game = TicTacToe()
    player_wins = {1: 0, 2: 0}
    exp_step = EXPLORATION / (EPISODES * 0.75)
    for i in range(EPISODES):
        algo.exploration -= exp_step
        if algo.exploration < 0:
            algo.exploration = 0
        player_moves = {1: [], 2: []}
        winner = None
        if (i % 1000 == 0):
            print(f'Running episode {i}')
            print(f'Exploration: {round(algo.exploration,2)}')
            #print(f'Player1 wins: {player_wins[1]}\nPlayer2 wins: {player_wins[2]}')
            print(f'Total wins: {player_wins[1] + player_wins[2]}')
            player_wins = {1: 0, 2: 0}
        nr_of_moves = 0
        while game.get_gamestate() == GameState.ONGOING:
            for player in range(1, 3):
                if player == 2:
                    board = game.get_inv_board()
                else:
                    board = game.board

                legal_move = False
                while not legal_move:
                    move = algo.make_move(board)
                    old_board = deepcopy(board)
                    legal_move = game.play(player, move)
                    if player == 2:
                        board = game.get_inv_board()
                    else:
                        board = game.board
                    if not legal_move:
                        algo.train_move((old_board, deepcopy(board), move),
                                        illegal=True)
                    else:
                        player_moves[player].append(
                            (old_board, deepcopy(board), move))

                if game.get_gamestate() == GameState.WON:
                    winner = player
                    player_wins[player] += 1
                    break
                nr_of_moves += 1
                if nr_of_moves >= 9:
                    break
        if game.get_gamestate() == GameState.WON:
            algo.train(player_moves[1],
                       winner=(winner == 1),
                       loser=(winner != 1))
            algo.train(player_moves[2],
                       winner=(winner == 2),
                       loser=(winner != 2))
        else:
            algo.train(player_moves[1])
            algo.train(player_moves[2])
        game.clear_board()

    nr_of_moves = 0
    while game.get_gamestate() == GameState.ONGOING:
        #Human player
        print(game)
        move = int(input('Move: ')) - 1
        game.play(1, move)
        if game.get_gamestate() == GameState.WON:
            winner = 1
            continue
        nr_of_moves += 1
        if nr_of_moves > 9:
            break
        #AI player
        move = algo.make_move(game.get_inv_board(), exploration=False)
        game.play(2, move)
        if game.get_gamestate() == GameState.WON:
            winner = 2
        nr_of_moves += 1
        if nr_of_moves > 9:
            break
    print(game)