def monte_carlo_sample(board_state, side): """Sample a single rollout from the current board_state and side. Moves are made to the current board_state until we reach a terminal state then the result and the first move made to get there is returned. Args: board_state (3x3 tuple of int): state of the board side (int): side currently to play. +1 for the plus player, -1 for the minus player Returns: (result(int), move(int,int)): The result from this rollout, +1 for a win for the plus player -1 for a win for the minus player, 0 for a draw """ result = has_winner(board_state) if result != 0: return result, None moves = list(available_moves(board_state)) if not moves: return 0, None # select a random move move = random.choice(moves) result, next_move = monte_carlo_sample(apply_move(board_state, move, side), -side) return result, move
def min_max_alpha_beta(board_state, side, max_depth, evaluation_func=evaluate, alpha=-sys.float_info.max, beta=sys.float_info.max): """Runs the min_max_algorithm on a given board_sate for a given side, to a given depth in order to find the best move Args: board_state (3x3 tuple of int): The board state we are evaluating side (int): either +1 or -1 max_depth (int): how deep we want our tree to go before we use the evaluate method to determine how good the position is. evaluation_func (board_state -> int): Function used to evaluate the position for the plus player alpha (float): Used when this is called recursively, normally ignore beta (float): Used when this is called recursively, normally ignore Returns: (best_score(int), best_score_move((int, int)): the move found to be best and what it's min-max score was """ best_score_move = None moves = list(available_moves(board_state)) if not moves: return 0, None for move in moves: new_board_state = apply_move(board_state, move, side) winner = has_winner(new_board_state) if winner != 0: return winner * 10000, move else: if max_depth <= 1: score = evaluation_func(new_board_state) else: score, _ = min_max_alpha_beta(new_board_state, -side, max_depth - 1, alpha, beta) if side > 0: if score > alpha: alpha = score best_score_move = move else: if score < beta: beta = score best_score_move = move if alpha >= beta: break return alpha if side > 0 else beta, best_score_move
def min_max(board_state, side, max_depth, evaluation_func=evaluate): """Runs the min_max_algorithm on a given board_sate for a given side, to a given depth in order to find the best move Args: board_state (3x3 tuple of int): The board state we are evaluating side (int): either +1 or -1 max_depth (int): how deep we want our tree to go before we use the evaluate method to determine how good the position is. evaluation_func (board_state -> int): Function used to evaluate the position for the plus player Returns: (best_score(int), best_score_move((int, int)): the move found to be best and what it's min-max score was """ best_score = None best_score_move = None moves = list(available_moves(board_state)) if not moves: # this is a draw return 0, None for move in moves: new_board_state = apply_move(board_state, move, side) winner = has_winner(new_board_state) if winner != 0: return winner * 10000, move else: if max_depth <= 1: score = evaluation_func(new_board_state) else: score, _ = min_max(new_board_state, -side, max_depth - 1) if side > 0: if best_score is None or score > best_score: best_score = score best_score_move = move else: if best_score is None or score < best_score: best_score = score best_score_move = move return best_score, best_score_move
def monte_carlo_tree_search_uct(board_state, side, number_of_samples): """Evaluate the best from the current board_state for the given side using monte carlo sampling with upper confidence bounds for trees. Args: board_state (3x3 tuple of int): state of the board side (int): side currently to play. +1 for the plus player, -1 for the minus player number_of_samples (int): number of samples rollouts to run from the current position, the higher the number the better the estimation of the position Returns: (result(int), move(int,int)): The average result for the best move from this position and what that move was. """ state_results = collections.defaultdict(float) state_samples = collections.defaultdict(float) for _ in range(number_of_samples): current_side = side current_board_state = board_state first_unvisited_node = True rollout_path = [] result = 0 while result == 0: move_states = { move: apply_move(current_board_state, move, current_side) for move in available_moves(current_board_state) } if not move_states: result = 0 break if all((state in state_samples) for _, state in move_states): log_total_samples = math.log( sum(state_samples[s] for s in move_states.values())) move, state = max( move_states, key=lambda _, s: _upper_confidence_bounds( state_results[s], state_samples[s], log_total_samples)) else: move = random.choice(list(move_states.keys())) current_board_state = move_states[move] if first_unvisited_node: rollout_path.append((current_board_state, current_side)) if current_board_state not in state_samples: first_unvisited_node = False current_side = -current_side result = has_winner(current_board_state) for path_board_state, path_side in rollout_path: state_samples[path_board_state] += 1. result *= path_side # normalize results to be between 0 and 1 before this it between -1 and 1 result /= 2. result += .5 state_results[path_board_state] += result move_states = { move: apply_move(board_state, move, side) for move in available_moves(board_state) } move = max(move_states, key=lambda x: state_results[move_states[x]] / state_samples[ move_states[x]]) return state_results[move_states[move]] / state_samples[ move_states[move]], move