def hmin_alpha_beta(state, player, pits, initial_max, alpha, beta, d): global count count = count + 1 if d == 0: # Reached depth threshold, evaluate heuristic return evaluate(state, pits, initial_max) if terminal_test(state, pits): return utility(state, pits, initial_max) u = math.inf # max possible utility actions = available_actions(state, player, pits) for a in actions: # Check each possible action for minimmum utility player_copy = player state_copy = [row[:] for row in state] result_player, result_state = action(player_copy, a, state_copy, pits) if result_player == player: # Run min_value if next player is MIN u = min([ u, hmin_alpha_beta(result_state, result_player, pits, initial_max, alpha, beta, d - 1) ]) if u <= alpha: return u beta = min(beta, u) else: # Run max_value if next player is MAX u = min([ u, hmax_alpha_beta(result_state, result_player, pits, initial_max, alpha, beta, d - 1) ]) return u
def halpha_beta(state, player, pits, d): global count actions = available_actions(state, player, pits) max_action = 0 initial_max = player # Define initial MAX player for purpose of defining utility u = -math.inf alpha = -math.inf beta = math.inf for a in actions: player_copy = player state_copy = [row[:] for row in state] result_player, result_state = action( player_copy, a, state_copy, pits) # Check resulting state of each action if result_player == player: # Run max_value if next player is MAX u_a = hmax_alpha_beta(result_state, result_player, pits, initial_max, alpha, beta, d) if u < u_a: u = u_a max_action = a alpha = max(u_a, alpha) else: # Run min_value if next player is MIN u_a = hmin_alpha_beta( result_state, result_player, pits, initial_max, alpha, beta, d ) # Get the utility of the resulting state assuming player is minimum if u < u_a: u = u_a max_action = a alpha = max(u_a, alpha) count_final = count count = 0 return max_action, count_final
def hmaxvalue(state, player, pits, p_o, d): global count count = count + 1 v = -(math.inf) player_initial = player if d == 0: # Reached depth threshold, evaluate heuristic return evaluate(state, pits, p_o) if not terminal_test(state, pits): actions = available_actions(state, player, pits) for move in actions: new_state = [row[:] for row in state] player, new_state_1 = action( player_initial, move, new_state, pits) # Check resulting state of each action if player == player_initial: # If same player, perform hmaxvalue again v = max(v, hmaxvalue(new_state_1, player, pits, p_o, d - 1)) else: # If player changes, perform hminvalue v = max(v, hminvalue(new_state_1, player, pits, p_o, d - 1)) else: # Terminal case reached, return utility return utility(state, pits, p_o) return v
def max_alpha_beta(state, player, pits, initial_max, alpha, beta): global count count = count + 1 if terminal_test(state, pits): return utility(state, pits, initial_max) u = -math.inf # min possible utility actions = available_actions(state, player, pits) for a in actions: # Check each possible action for maximum utility player_copy = player state_copy = [row[:] for row in state] result_player, result_state = action(player_copy, a, state_copy, pits) if result_player == player: # Run max_value if next player is MAX u = max([ u, max_alpha_beta(result_state, result_player, pits, initial_max, alpha, beta) ]) if u >= beta: return u alpha = max(alpha, u) else: # Run min_value if next player is MIN u = max([ u, min_alpha_beta(result_state, result_player, pits, initial_max, alpha, beta) ]) return u
def min_value(state, player, pits, initial_max): global count count = count + 1 if terminal_test(state, pits): return utility(state, pits, initial_max) u = math.inf # Set maximum possible utility actions = available_actions(state, player, pits) for a in actions: # Check each possible action for minimmum utility player_copy = player state_copy = [row[:] for row in state] result_player, result_state = action(player_copy, a, state_copy, pits) if result_player == player: # Run min_value if next player is MIN u = min( [u, min_value(result_state, result_player, pits, initial_max)]) else: # Run max_value if next player is MAX u = min( [u, max_value(result_state, result_player, pits, initial_max)]) return u
def play_mancala(player, pits, board, f): print_board(board, pits) print("Player ", player, " turn") possible_moves = available_actions(board, player, pits) print("You can play the following moves", *possible_moves, sep=', ') if not terminal_test(board, pits): if board[player][0] == 'hu': # Human player while True: move = input("Please enter your move\n") if int(move) in possible_moves: break else: print("Invalid move, you can play the following moves", *possible_moves, sep=',') continue elif board[player][0] == 'ra': # Random player move = random.choice(possible_moves) elif board[player][0] == 'mi': # Minimax player if len(possible_moves) == 1: move = possible_moves[0] count = 0 else: move, count = minimax(board, player, pits) f.write("Number of states searched is " + str(count) + "\n") elif board[player][0] == 'ab': # Alpha Beta Pruning player if len(possible_moves) == 1: move = possible_moves[0] count = 0 else: move, count = alpha_beta(board, player, pits) f.write("Number of states searched is " + str(count) + "\n") elif board[player][0] == 'hm': # Heuristic Minimax player if len(possible_moves) == 1: move = possible_moves[0] count = 0 else: move, count = hminimax_decision(board, player, possible_moves, pits, 10) f.write("Number of states searched is " + str(count) + "\n") elif board[player][0] == 'ha': # Heuristic Alpha Beta Pruning player if len(possible_moves) == 1: move = possible_moves[0] count = 0 else: move, count = halpha_beta(board, player, pits, 10) f.write("Number of states searched is " + str(count) + "\n") print("Player", player, " plays ->", int(move)) f.write("Player " + str(player) + " plays -> " + str(move) + "\n\n") move = int(move) player, state = action(player, move, board, pits) board = state play_mancala(player, pits, board, f) # Play next move else: # Terminal case reached p0, p1 = calculate_score(board, pits) if p0 > p1: print("p0 wins with score", p0, " - ", p1) f.write("player 0 wins with score " + str(p0) + " - " + str(p1) + "\n\n") elif p0 < p1: print("p1 wins with score", p1, " - ", p0) f.write("player 1 wins with score " + str(p1) + " - " + str(p0) + "\n\n") else: print("scores tied", p0, " - ", p1) f.write("Scores tied " + str(p1) + " - " + str(p0) + "\n\n")