def basic_policy(board, computerLetter): ''' Given a board and the computer's letter, return a probability distribution for the next move ''' if computerLetter == 'X': playerLetter = 'O' else: playerLetter = 'X' response_probabilities = [0.0 for i in range(10)] # Here is our algorithm for our Tic Tac Toe AI: # First, check if we can win in the next move for i in range(1, 10): copy = tic_tac_toe.getBoardCopy(board) if tic_tac_toe.isSpaceFree(copy, i): tic_tac_toe.makeMove(copy, computerLetter, i) if tic_tac_toe.isWinner(copy, computerLetter): response_probabilities[i] = 1.0 return response_probabilities # Check if the player could win on his next move, and block them. for i in range(1, 10): copy = tic_tac_toe.getBoardCopy(board) if tic_tac_toe.isSpaceFree(copy, i): tic_tac_toe.makeMove(copy, playerLetter, i) if tic_tac_toe.isWinner(copy, playerLetter): response_probabilities[i] = 1.0 return response_probabilities # Try to take one of the corners, if they are free. movesList = [1, 3, 7, 9] for i in movesList: if tic_tac_toe.isSpaceFree(board, i): response_probabilities[i] = 1.0 if sum(response_probabilities) > 0.1: return [ x / sum(response_probabilities) for x in response_probabilities ] # Try to take the center, if it is free. if tic_tac_toe.isSpaceFree(board, 5): response_probabilities[5] = 1.0 return response_probabilities # Move on one of the sides. movesList = [2, 4, 6, 8] for i in movesList: if tic_tac_toe.isSpaceFree(board, i): response_probabilities[i] = 1.0 if sum(response_probabilities) > 0.1: return [ x / sum(response_probabilities) for x in response_probabilities ]
def main(): # Create a state transition model state_model = StateModel(basic_policy) # Initialise reward_function = {} policy_function = {} for s in range(state_model.num_labels): board = state_model.label_board_lookup[s] policy_function[s] = (get_team_from_state(board), random_action(board)) reward_function[s] = { 'O': 1.0 if isWinner(board, 'O') else 0.0, 'X': 1.0 if isWinner(board, 'X') else 0.0 } value_function = evaluate(policy_function, reward_function, state_model) xwin = [' ', 'X', 'X', 'X', ' ', 'O', ' ', 'O', ' ', ' '] drawBoard(xwin) s = state_model.board_labels_lookup[','.join(xwin)] print s, reward_function[s]['O'], reward_function[s]['X'] print value_function[s] prev = [' ', 'X', 'X', ' ', ' ', 'O', ' ', 'O', ' ', ' '] drawBoard(prev) s = state_model.board_labels_lookup[','.join(prev)] print s, reward_function[s]['O'], reward_function[s]['X'] print value_function[s], policy_function[s] print 'Now trying:' policy_function, value_function = policy_iteration(state_model) print 'Quick test' prev = [' ', 'X', 'X', ' ', ' ', 'O', ' ', 'O', ' ', ' '] drawBoard(prev) s = state_model.board_labels_lookup[','.join(prev)] print s, reward_function[s]['O'], reward_function[s]['X'] print value_function[s], policy_function[s] early = [' ', 'X', ' ', ' ', ' ', 'O', ' ', ' ', ' ', ' '] drawBoard(early) s = state_model.board_labels_lookup[','.join(early)] print value_function[s], policy_function[s] empty = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '] drawBoard(empty) print get_team_from_state(empty) s = state_model.board_labels_lookup[','.join(empty)] print state_model.num_labels print value_function[s], policy_function[s]
def evaluate(policy_function, reward_function, state_model): ''' Evaluate value function for a given policy :param policy_function: :param reward_function: :param state_model: :return: value function ''' # Initialise value_function = {} for s in range(state_model.num_labels): value_function[s] = 0. # evaluate tol = 1.E-6 delta = 1E6 while delta > tol: delta = 0. for s in range(state_model.num_labels): if isWinner(state_model.label_board_lookup[s], 'X') or isWinner( state_model.label_board_lookup[s], 'O'): # Game-end state: no rewards possible, leave value function zero continue action = policy_function[s] if action is None: print 'yo', s print policy_function if action[1] is None: # No possible moves: game end state continue v = deepcopy(value_function[s]) value_function[s] = expected_action_value(s, action, value_function, reward_function, state_model) delta = max(delta, abs(v - value_function[s])) # print 'bing',delta return value_function
def policy_iteration(state_model): # Initialise reward_function = {} policy_function = {} for s in range(state_model.num_labels): board = state_model.label_board_lookup[s] policy_function[s] = (get_team_from_state(board), random_action(board)) reward_function[s] = { 'O': 1.0 if isWinner(board, 'O') else 0.0, 'X': 1.0 if isWinner(board, 'X') else 0.0 } converged = False ctr = 0 while not converged: value_function = evaluate(policy_function, reward_function, state_model) new_policy_function = improve(value_function, reward_function, state_model) converged = policy_stable(policy_function, new_policy_function) policy_function = new_policy_function ctr += 1 print 'Iteration :', ctr return policy_function, value_function
def testsIsWinner5(self): board = [' ', 'X', 'O', 'X', 'O', 'O', 'O', 'X', 'X', ' '] self.assertTrue(isWinner(board, self.O), 'Should return True')
def testsIsWinner4(self): board = [' ', 'X', 'O', 'X', ' ', 'O', 'O', 'X', 'X', ' '] self.assertFalse(isWinner(board, self.X), 'Should return False')
import tic_tac_toe board = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '] current_turn = "player" game_not_over = True while game_not_over: if current_turn == "player": tic_tac_toe.drawBoard(board) tic_tac_toe.playerMove(board) if tic_tac_toe.isWinner(board, 'X'): tic_tac_toe.drawBoard(board) print("You won the game!") game_not_over = False elif tic_tac_toe.isBoardFull(board): tic_tac_toe.drawBoard(board) print("The game is a tie!") game_not_over = False else: current_turn = "computer" elif current_turn == "computer": if tic_tac_toe.AI_move(board) is False: tic_tac_toe.drawBoard(board) print("The AI won the game!") game_not_over = False else: if tic_tac_toe.isBoardFull(board): print("The game is a tie!") else: current_turn = "player"