Example #1
0
def play(agent_1, agent_2):
    # initialize 3x3 tic tac toe board
    board = Board(3)
    while board.move_still_possible():

        if board.player == 1:  # X-player
            move = agent_1(board)
        else:  # O-player
            move = agent_2(board)
        board.make_move(move)
        # evaluate game state
        if board.game_is_over():
            # return winner
            return board.player
    # return 'game ended in a draw'
    return 0
Example #2
0
def learn_probabilities():

    # Global counters for winning moves of X and O
    winX = np.zeros((3, 3))
    winO = np.zeros((3, 3))

    # Learn through 50000 plays
    for i in range(5000):
        board = Board(3)

        # Local counters
        countX = np.zeros((3, 3))
        countO = np.zeros((3, 3))
        while board.move_still_possible():
            move = get_random_move(board)
            x, y = move

            # Update corresponding local counter
            if board.player == 1:
                countX[x, y] += 1
            else:
                countO[x, y] += 1
            board.make_move(move)

            if board.move_was_winning_move(board.player):
                winner = board.player

                # Update winner's global counter
                if winner == 1:
                    winX += countX
                elif winner == -1:
                    winO += countO
                break

    # Collect statistics of winning moves of both players
    win = winX + winO

    # Normalize to obtain probabilities
    win_normalized = preprocessing.normalize(win, norm='l2')

    # Write probabilities to file
    f = open('probabilities', 'w')
    np.savetxt(f, win_normalized)
    print "Learning finished!"
    return win_normalized