Beispiel #1
0
def basic_policy(board, computerLetter):
    '''
    Given a board and the computer's letter, return a probability distribution for the next move
    '''
    if computerLetter == 'X':
        playerLetter = 'O'
    else:
        playerLetter = 'X'

    response_probabilities = [0.0 for i in range(10)]

    # Here is our algorithm for our Tic Tac Toe AI:
    # First, check if we can win in the next move
    for i in range(1, 10):
        copy = tic_tac_toe.getBoardCopy(board)
        if tic_tac_toe.isSpaceFree(copy, i):
            tic_tac_toe.makeMove(copy, computerLetter, i)
            if tic_tac_toe.isWinner(copy, computerLetter):
                response_probabilities[i] = 1.0
                return response_probabilities

    # Check if the player could win on his next move, and block them.
    for i in range(1, 10):
        copy = tic_tac_toe.getBoardCopy(board)
        if tic_tac_toe.isSpaceFree(copy, i):
            tic_tac_toe.makeMove(copy, playerLetter, i)
            if tic_tac_toe.isWinner(copy, playerLetter):
                response_probabilities[i] = 1.0
                return response_probabilities

    # Try to take one of the corners, if they are free.
    movesList = [1, 3, 7, 9]
    for i in movesList:
        if tic_tac_toe.isSpaceFree(board, i):
            response_probabilities[i] = 1.0
    if sum(response_probabilities) > 0.1:
        return [
            x / sum(response_probabilities) for x in response_probabilities
        ]

    # Try to take the center, if it is free.
    if tic_tac_toe.isSpaceFree(board, 5):
        response_probabilities[5] = 1.0
        return response_probabilities

    # Move on one of the sides.
    movesList = [2, 4, 6, 8]
    for i in movesList:
        if tic_tac_toe.isSpaceFree(board, i):
            response_probabilities[i] = 1.0
    if sum(response_probabilities) > 0.1:
        return [
            x / sum(response_probabilities) for x in response_probabilities
        ]
def main():
    # Create a state transition model
    state_model = StateModel(basic_policy)

    # Initialise
    reward_function = {}
    policy_function = {}
    for s in range(state_model.num_labels):
        board = state_model.label_board_lookup[s]
        policy_function[s] = (get_team_from_state(board), random_action(board))
        reward_function[s] = {
            'O': 1.0 if isWinner(board, 'O') else 0.0,
            'X': 1.0 if isWinner(board, 'X') else 0.0
        }
    value_function = evaluate(policy_function, reward_function, state_model)
    xwin = [' ', 'X', 'X', 'X', ' ', 'O', ' ', 'O', ' ', ' ']
    drawBoard(xwin)
    s = state_model.board_labels_lookup[','.join(xwin)]
    print s, reward_function[s]['O'], reward_function[s]['X']
    print value_function[s]
    prev = [' ', 'X', 'X', ' ', ' ', 'O', ' ', 'O', ' ', ' ']
    drawBoard(prev)
    s = state_model.board_labels_lookup[','.join(prev)]
    print s, reward_function[s]['O'], reward_function[s]['X']
    print value_function[s], policy_function[s]

    print 'Now trying:'
    policy_function, value_function = policy_iteration(state_model)

    print 'Quick test'
    prev = [' ', 'X', 'X', ' ', ' ', 'O', ' ', 'O', ' ', ' ']
    drawBoard(prev)
    s = state_model.board_labels_lookup[','.join(prev)]
    print s, reward_function[s]['O'], reward_function[s]['X']
    print value_function[s], policy_function[s]

    early = [' ', 'X', ' ', ' ', ' ', 'O', ' ', ' ', ' ', ' ']
    drawBoard(early)
    s = state_model.board_labels_lookup[','.join(early)]
    print value_function[s], policy_function[s]

    empty = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
    drawBoard(empty)
    print get_team_from_state(empty)
    s = state_model.board_labels_lookup[','.join(empty)]
    print state_model.num_labels
    print value_function[s], policy_function[s]
def evaluate(policy_function, reward_function, state_model):
    '''
    Evaluate value function for a given policy
    :param policy_function:
    :param reward_function:
    :param state_model:
    :return: value function
    '''
    # Initialise
    value_function = {}
    for s in range(state_model.num_labels):
        value_function[s] = 0.

    # evaluate
    tol = 1.E-6
    delta = 1E6
    while delta > tol:
        delta = 0.
        for s in range(state_model.num_labels):
            if isWinner(state_model.label_board_lookup[s], 'X') or isWinner(
                    state_model.label_board_lookup[s], 'O'):
                # Game-end state: no rewards possible, leave value function zero
                continue
            action = policy_function[s]
            if action is None:
                print 'yo', s
                print policy_function
            if action[1] is None:
                # No possible moves: game end state
                continue
            v = deepcopy(value_function[s])
            value_function[s] = expected_action_value(s, action,
                                                      value_function,
                                                      reward_function,
                                                      state_model)
            delta = max(delta, abs(v - value_function[s]))
        # print 'bing',delta
    return value_function
def policy_iteration(state_model):
    # Initialise
    reward_function = {}
    policy_function = {}
    for s in range(state_model.num_labels):
        board = state_model.label_board_lookup[s]
        policy_function[s] = (get_team_from_state(board), random_action(board))
        reward_function[s] = {
            'O': 1.0 if isWinner(board, 'O') else 0.0,
            'X': 1.0 if isWinner(board, 'X') else 0.0
        }

    converged = False
    ctr = 0
    while not converged:
        value_function = evaluate(policy_function, reward_function,
                                  state_model)
        new_policy_function = improve(value_function, reward_function,
                                      state_model)
        converged = policy_stable(policy_function, new_policy_function)
        policy_function = new_policy_function
        ctr += 1
        print 'Iteration :', ctr
    return policy_function, value_function
Beispiel #5
0
 def testsIsWinner5(self):
     board = [' ', 'X', 'O', 'X', 'O', 'O', 'O', 'X', 'X', ' ']
     self.assertTrue(isWinner(board, self.O), 'Should return True')
Beispiel #6
0
 def testsIsWinner4(self):
     board = [' ', 'X', 'O', 'X', ' ', 'O', 'O', 'X', 'X', ' ']
     self.assertFalse(isWinner(board, self.X), 'Should return False')
import tic_tac_toe

board = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
current_turn = "player"
game_not_over = True

while game_not_over:
    if current_turn == "player":
        tic_tac_toe.drawBoard(board)
        tic_tac_toe.playerMove(board)
        if tic_tac_toe.isWinner(board, 'X'):
            tic_tac_toe.drawBoard(board)
            print("You won the game!")
            game_not_over = False
        elif tic_tac_toe.isBoardFull(board):
            tic_tac_toe.drawBoard(board)
            print("The game is a tie!")
            game_not_over = False
        else:
            current_turn = "computer"

    elif current_turn == "computer":
        if tic_tac_toe.AI_move(board) is False:
            tic_tac_toe.drawBoard(board)
            print("The AI won the game!")
            game_not_over = False
        else:
            if tic_tac_toe.isBoardFull(board):
                print("The game is a tie!")
            else:
                current_turn = "player"