def check_terminal(board: np.ndarray, _last_action: Optional[PlayerAction] = None) -> bool: ''' check if the board is a "terminal" board: a win or a draw''' board1 = board.copy() board2 = board.copy() board1[board1 == PLAYER1] = NO_PLAYER board1[board1 == PLAYER2] = BoardPiece(1) for kernel in (col_kernel, row_kernel, dia_l_kernel, dia_r_kernel): result = _convolve2d(board1, kernel, 1, 0, 0, BoardPiece(0)) if np.any(result == CONNECT_N): return True board2[board2 == PLAYER2] = NO_PLAYER board2[board2 == PLAYER1] = BoardPiece(1) for kernel in (col_kernel, row_kernel, dia_l_kernel, dia_r_kernel): result = _convolve2d(board2, kernel, 1, 0, 0, BoardPiece(0)) if np.any(result == CONNECT_N): return True if np.count_nonzero(board) == board.shape[0] * board.shape[1]: return True return False
def evaluate_position(array_from_board: np.ndarray, player: BoardPiece) -> int: """ This function calculates the heuristic value of the node. Called by evaluate_curr_board. Arguments: array_from_board: ndarray that represents specific column/diagonal/row of the board player: the player whose moves have to be evaluated Return: int: sum of the evaluated values for each subarray of array_from_board """ # how many sequences of length 4 does the array contain index = len(array_from_board) - 3 sum_val = 0 # value of the current array for i in range(index): tmp = array_from_board[i:i + 4] # when there are 3 pieces and space in between if np.count_nonzero(tmp == player) == 3 and np.count_nonzero( tmp == BoardPiece(0)) == 1: sum_val += 1000 # when there are 2 pieces and two spaces in between if np.count_nonzero(tmp == player) == 2 and np.count_nonzero( tmp == BoardPiece(0)) == 2: sum_val += 100 # when there is 1 piece and three spaces if np.count_nonzero(tmp == player) == 1 and np.count_nonzero( tmp == BoardPiece(0)) == 3: sum_val += 1 return sum_val
def other_player(player: BoardPiece) -> BoardPiece: """ Function returning the opponent of current player Arguments: player: current player Return: BoardPiece: opponent player """ if player == BoardPiece(1): return BoardPiece(2) else: return BoardPiece(1)
def position_value(board: np.ndarray, player: BoardPiece, _last_action: Optional[PlayerAction] = None) -> bool: """ Returns the heuristic value to the given plaer of a complete board """ board1 = board.copy() board2 = board.copy() other_player = BoardPiece(player % 2 + 1) board1[board1 == other_player] = 5 board1[board1 == player] = BoardPiece(1) board2[board2 == player] = BoardPiece(5) board2[board2 == other_player] = BoardPiece(1) value = 0 # scoring central positions center = board[:, board.shape[1] // 2] value += (center == player).sum() * 10 value += (center == other_player).sum() * -5 # checking remainin positions for kernel in (col_kernel, row_kernel, dia_l_kernel, dia_r_kernel): result = _convolve2d(board1, kernel, 1, 0, 0, BoardPiece(0)) for i in result: for sum in i: if sum == CONNECT_N: value += 200 if sum == CONNECT_N - 1: value += 50 if sum == CONNECT_N - 2: value += 10 for kernel in (col_kernel, row_kernel, dia_l_kernel, dia_r_kernel): result = _convolve2d(board2, kernel, 1, 0, 0, 0) for i in result: for sum in i: if sum == CONNECT_N: value += -250 if sum == CONNECT_N - 1: value += -55 if sum == CONNECT_N - 2: value += -12 return int(value)
def connected_four_convolve( board: np.ndarray, player: BoardPiece, _last_action: Optional[PlayerAction] = None ) -> bool: board = board.copy() other_player = BoardPiece(player % 2 + 1) board[board == other_player] = NO_PLAYER board[board == player] = BoardPiece(1) for kernel in (col_kernel, row_kernel, dia_l_kernel, dia_r_kernel): result = _convolve2d(board, kernel, 1, 0, 0, BoardPiece(0)) if np.any(result == CONNECT_N): return True return False
def opponent(player: BoardPiece) -> BoardPiece: """ Returns opponent player to current given player :param player: BoardPiece Player for whom opponent is being calculated :return: BoardPiece opponent of current player """ if player == BoardPiece(1): oppo = BoardPiece(2) else: oppo = BoardPiece(1) return oppo
def negamax_alpha_beta(board: np.ndarray, player: BoardPiece, depth: int, alpha: float, beta: float) -> float: """ Search game tree using alpha-beta pruning with negamax. :param board: current board state :param player: current player :param depth: max depth to search in game tree :param alpha: alpha value for pruning :param beta: beta value for pruning :return: """ # if we're at an end state, if (depth == 0) or check_game_over(board): return evaluate_end_state(board, player) # otherwise loop over child nodes other_player = BoardPiece(player % 2 + 1) value = -np.inf for move in get_valid_moves(board): value = max( value, -negamax_alpha_beta( apply_player_action(board, move, player, copy=True), other_player, depth - 1, -beta, -alpha)) alpha = max(alpha, value) if alpha >= beta: break # print(f'value:{value}') # print(f'depth = {depth}; end state = {check_game_over(board)}; player = {player}') # print(f'move:{move}; max value:{value}') return value
def negamax( board: np.ndarray, player: BoardPiece, depth: int, ) -> float: """ Search game tree using plain negamax. This is "colorless" negamax -- it assumes the heuristic value is from the perspective of the player its called on :param board: current board state :param player: current player :param depth: max depth to search in game tree :return: """ # if we're at an end state, if (depth == 0) or check_game_over(board): return evaluate_end_state(board, player) # otherwise loop over child nodes other_player = BoardPiece(player % 2 + 1) value = -np.inf for move in get_valid_moves(board): value = max( value, -negamax(apply_player_action(board, move, player, copy=True), other_player, depth - 1)) # print(f'value:{value}') # print(f'depth = {depth}; end state = {check_game_over(board)}; player = {player}') # print(f'move:{move}; max value:{value}') return value
def alphabeta(board: np.ndarray, alpha: np.int8, beta: np.int8, MaximisingPlayer: bool, player: BoardPiece, depth=4): ''' depth limited minimax with alpha-beta pruning: returns the value of the given board based on the minimax algorithm with alpha beta pruning ''' MinPiece = 3 - player.copy() MaxPiece = player.copy() terminalboard = check_terminal(board) if depth == 0 or terminalboard == True: return position_value(board, player) * (depth + 1) if MaximisingPlayer: value = -999 valid_actions = get_player_actions(board) for action in valid_actions: child_board = board.copy() child_board = apply_player_action(child_board, action, MaxPiece) value = max( value, alphabeta(child_board, alpha, beta, False, player, depth - 1)) alpha = max(alpha, value) if alpha >= beta: break #β cut-off return value else: value = 999 valid_actions = get_player_actions(board) for action in valid_actions: child_board = board.copy() child_board = apply_player_action(child_board, action, MinPiece) value = min( value, alphabeta(child_board, alpha, beta, True, player, depth - 1)) beta = min(beta, value) if beta <= alpha: break #α cut-off return value
def window_value(window, player: BoardPiece): """ :param window: The window in which the heuristic value of the board is calculated :param player: Current player playing the game of type BoardPiece :return: heuristic_value: heuristic value of the board position in the given window of type float """ heuristic_value = 0 if player == BoardPiece(1): opp_player = BoardPiece(2) else: opp_player = BoardPiece(1) if window.count(player) == 4: heuristic_value += 1000 elif window.count(player) == 3 and window.count(BoardPiece(0)) == 1: heuristic_value += 10 elif window.count(player) == 2 and window.count(BoardPiece(0)) == 2: heuristic_value += 5 if window.count(opp_player) == 3 and window.count(BoardPiece(0)) == 1: heuristic_value -= 90 elif window.count(opp_player) == 2 and window.count( BoardPiece(0)) == 2: heuristic_value -= 20 return heuristic_value
def test_apply_player_action(): from agents.common import apply_player_action, PlayerAction board = np.zeros((6, 7), dtype=BoardPiece) action = PlayerAction(2) player = BoardPiece(2) copy = True ret = apply_player_action(board, action, player, copy) assert isinstance(ret, np.ndarray)
def test_alpha_beta(): from agents.agent_minimax import alpha_beta board = np.zeros((6, 7), dtype=BoardPiece) player = BoardPiece(2) depth = 5 alpha = -math.inf beta = math.inf maximizingPlayer = True ret = alpha_beta(board, player, depth, alpha, beta, maximizingPlayer) assert isinstance(ret, tuple())
def check_result(board: np.ndarray, player: BoardPiece, _last_action: Optional[PlayerAction] = None) -> bool: ''' check if the board is a "terminal" board: a win or a draw and assigns a value to each option that can be used by an evaluation function''' board1 = board.copy() board2 = board.copy() MinPiece = 3 - player MaxPiece = player board1[board1 == MinPiece] = NO_PLAYER board1[board1 == MaxPiece] = BoardPiece(1) for kernel in (col_kernel, row_kernel, dia_l_kernel, dia_r_kernel): result = _convolve2d(board1, kernel, 1, 0, 0, BoardPiece(0)) if np.any(result == CONNECT_N): # print(time) # print(MaxPiece) # print("won") return 1 #self wins board2[board2 == MaxPiece] = NO_PLAYER board2[board2 == MinPiece] = BoardPiece(1) for kernel in (col_kernel, row_kernel, dia_l_kernel, dia_r_kernel): result = _convolve2d(board2, kernel, 1, 0, 0, BoardPiece(0)) if np.any(result == CONNECT_N): # print(time) # print(MinPiece) # print("lost") return -0.1 #opponent wins if np.count_nonzero(board) == board.shape[0] * board.shape[1]: # print(board) # print("draw") return 0 #draw return False
def generate_move_random( board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState] ) -> Tuple[PlayerAction, Optional[SavedState]]: action = PlayerAction(-1) # Choose a valid, non-full column randomly and return it as `action` if player == BoardPiece(2): valid_columns = [] for col in range(COLUMNS): if board[ROWS - 1][col] == 0: valid_columns.append(col) action = PlayerAction(random.sample(valid_columns, 1)) return action, saved_state
def test_random(): from agents.agents_random.random import generate_move_random board = np.array([[1, 2, 2, 0, 1, 2, 2], [2, 1, 1, 2, 1, 2, 2], [2, 2, 1, 1, 1, 2, 2], [2, 1, 2, 2, 2, 1, 1], [1, 2, 1, 1, 1, 2, 2], [1, 1, 2, 1, 2, 1, 2]]) action, saved_state = generate_move_random(board,BoardPiece(1),saved_state=0) assert isinstance(action,PlayerAction) assert action == PlayerAction(3) #Taking the empty one
def evaluate_window(window: list, player: BoardPiece) -> float: """ Calculates score for heuristic minimax by counting pieces for agent and opponent player :param window: list List containing board snippets (windows) of length window_length as defined in heuristic function [For connect 4, window_length is 4] :param player: BoardPiece Current player for whom board window is being calculated :return: float Returns float value of window for agent """ score = 0 if player == BoardPiece(1): opponent = BoardPiece(2) else: opponent = BoardPiece(1) if window.count(1) == 4: score += 100 elif window.count(player) == 3 and window.count(BoardPiece(0)) == 1: score += 5 elif window.count(player) == 2 and window.count(BoardPiece(0)) == 2: score += 2 if window.count(opponent) == 3 and window.count(BoardPiece(0)) == 1: score -= 4 return score
def expand(self, move: PlayerAction): """ Expand the child node for a move; creates a new MonteCarloNode associated with the resulting state. :param move: a valid move for this node's state. :return: the resulting node """ new_board = apply_player_action(board=self.board, action=move, player=self.to_play, copy=True) new_node = MonteCarloNode( new_board, to_play=BoardPiece(self.to_play % 2 + 1), last_move=move, parent=self) self.children[move] = new_node self.expanded_moves.append(move) self.unexpanded_moves.pop(self.unexpanded_moves.index(move)) return new_node
def negamax_heuristic(board: np.ndarray, player: BoardPiece) -> float: """ A heuristic for negamax -- the weighted sum of n-in-a-row for the current board. :param board: current board :param player: the player to play :return: selected move """ board = board.copy() other_player = BoardPiece(player % 2 + 1) board[board == other_player] = -1 board[board == player] = 1 score = 0 # if a move results in blocking a loss, return it for n in range(2, CONNECT_N+1): weight = weights[n-1] for _, kernel in enumerate(kernels[n]): result = _convolve2d(board, kernel, 1, 0, 0, BoardPiece(0)) score += weight * np.sum(result == (n - 1)) return score
def test_expand(): tree = MonteCarlo(player) tree.make_node(initial_state, player) key = hash(initial_state.tostring()) + hash(player) root = tree.nodes[key] for _ in root.unexpanded_moves: child = tree.expand(root) assert isinstance(child, MonteCarloNode) assert child.last_move in root.legal_moves assert child.last_move in root.expanded_moves assert child.parent == root assert child.to_play == BoardPiece(player % 2 + 1) child_key = hash(child.board.tostring()) + hash(child.to_play) assert tree.nodes[child_key] == child
def low_row_heuristic(board:np.ndarray, player:BoardPiece) -> float: """ A dumb heuristic to play the move with lowest open row. :param board: current board :param player: the player to play :return: selected move """ board = board.copy() xx, yy = np.meshgrid(np.arange(board.shape[0]), np.arange(board.shape[1])) # xx.T is row other_player = BoardPiece(player % 2 + 1) board[board == other_player] = 0 board[board == player] = 1 weights = xx.T[::-1, :] return float(np.sum(board*weights))
def test_expand_node(): node = copy.deepcopy(initial_node) for move in node.unexpanded_moves: child = node.expand(move) assert isinstance(child, MonteCarloNode) assert move == child.last_move assert child.parent == node assert child.to_play == BoardPiece(node.to_play % 2 + 1) # test that attributes are all equal for manually-expanded and method-expanded children same_child = fully_expanded_node.get_child(move) d1 = vars(child) d2 = vars(same_child) for attribute, value in d1.items(): if attribute in ['parent']: continue assert np.all(d2[attribute] == value)
def minimax_value(board: np.ndarray, player: BoardPiece, maxing: bool, depth: int) -> float: """ :param board: :param player: :param maxing: :param depth: :return: """ other_player = BoardPiece(player % 2 + 1) valid_moves = get_valid_moves(board) value = 0 if depth == 0 or check_game_over(board): return evaluate_end_state(board, player) elif maxing is True: value = -np.inf for _, move in enumerate(valid_moves): # print('Maxing') # print('move:', move) MMv = minimax_value(board=apply_player_action(board, move, player, copy=True), player=player, maxing=False, depth=depth - 1) # print('MM value:', MMv) value = max(value, MMv) else: value = np.inf for _, move in enumerate(valid_moves): # print('Mining') # print('move:', move) MMv = minimax_value(board=apply_player_action(board, move, player, copy=True), player=player, maxing=True, depth=depth - 1) # print('MM value:', MMv) value = min(value, MMv) return value
def simulate(self, node: MonteCarloNode) -> Union[BoardPiece, GameState]: """ Simulate a game from a given node -- outcome is either player or GameState.IS_DRAW :param node: :return: """ current_rollout_state = node.board.copy() curr_player = node.to_play while not check_game_over(current_rollout_state): possible_moves = get_valid_moves(current_rollout_state) if possible_moves.size > 1: action = np.random.choice(list(possible_moves)) else: action = possible_moves current_rollout_state = apply_player_action(current_rollout_state, action, curr_player, copy=True) curr_player = BoardPiece(curr_player % 2 + 1) return evaluate_end_state(current_rollout_state)
def rollout(self, board: np.ndarray, player: BoardPiece) -> BoardPiece: """ Recursive call with opponent, determineWin() check for and return win :param board: :param player: :return: player if won, otherwise continue the rollout with recursive call """ if self.isTerminal(board, player): if self.determineWin(board, player): # if win return player elif self.determineWin(board, opponent(player)): # if loss return opponent(player) else: # if draw return BoardPiece(0) # None else: # generate a random move, create a new board state, apply random move random_move, saved_state = generate_move_random(board, player) random_board = apply_player_action(board, random_move, player, True) return self.rollout(random_board, opponent( player)) # recursive call, passing in new board & opponent
def evaluate_end_state(board: np.ndarray, player: BoardPiece, heuristic=negamax_heuristic) -> float: """ :param heuristic: :param board: :param player: :return: """ end = check_end_state(board, player) other_player = BoardPiece(player % 2 + 1) if end == GameState.IS_WIN: # win state return np.inf elif end == GameState.IS_DRAW: # draw state return 0 # TODO: workaround to exclude checking the end state twice elif check_end_state(board, other_player) == GameState.IS_WIN: return -np.inf else: return heuristic(board, player)
def alpha_beta_value(board: np.ndarray, player: BoardPiece, maxing: bool, depth: int, alpha, beta) -> float: other_player = BoardPiece(player % 2 + 1) valid_moves = get_valid_moves(board) if depth == 0 or check_game_over(board): return evaluate_end_state(board, player) elif maxing is True: value = -np.inf for _, move in enumerate(valid_moves): ABv = alpha_beta_value(board=apply_player_action(board, move, player, copy=True), player=player, maxing=False, depth=depth - 1, alpha=alpha, beta=beta) value = max(value, ABv) alpha = max(alpha, value) if alpha >= beta: break return value else: value = np.inf for _, move in enumerate(valid_moves): ABv = alpha_beta_value(board=apply_player_action(board, move, player, copy=True), player=player, maxing=True, depth=depth - 1, alpha=alpha, beta=beta) value = min(value, ABv) beta = min(beta, value) if beta <= alpha: break return value
def evaluate_end_state(board: np.ndarray, player: BoardPiece, heuristic=negamax_heuristic) -> float: """ Return +/- inf for a win, loss for the given player at the board; 0 for a draw; the hueristic value for an ongoing game. :param heuristic: heuristic function to call on unfinished game boards :param board: current board state :param player: current player :return: """ other_player = BoardPiece(player % 2 + 1) end = check_end_state(board, player) if end == GameState.IS_WIN: # win state return np.inf elif end == GameState.IS_DRAW: # draw state return 0 # TODO: avoid checking the end state twice elif check_end_state(board, other_player) == GameState.IS_WIN: # lose state return -np.inf else: # still playing, use heuristic return heuristic(board, player)
import timeit import numpy as np from agents.common import connected_four, connected_four_convolve, connected_four_iter, initialize_game_state, BoardPiece, PlayerAction, NO_PLAYER, CONNECT_N """performance evaluation of connected 4 functions""" board = initialize_game_state() number = 10**4 res = timeit.timeit("connected_four_iter(board, player)", setup="connected_four_iter(board, player)", number=number, globals=dict(connected_four_iter=connected_four_iter, board=board, player=BoardPiece(1))) print(f"Python iteration-based: {res/number*1e6 : .1f} us per call") res = timeit.timeit("connected_four_convolve(board, player)", number=number, globals=dict( connected_four_convolve=connected_four_convolve, board=board, player=BoardPiece(1))) print(f"Convolve2d-based: {res/number*1e6 : .1f} us per call") res = timeit.timeit("connected_four(board, player)", setup="connected_four(board, player)", number=number,
import numpy as np from agents.common import GameState, BoardPiece, PlayerAction NO_PLAYER = BoardPiece( 0) # board[i, j] == NO_PLAYER where the position is empty PLAYER1 = BoardPiece(1) # board[i, j] == PLAYER1 where player 1 has a piece PLAYER2 = BoardPiece(2) # board[i, j] == PLAYER2 where player 2 has a piece def test_initialize_game_state(): from agents.common import initialize_game_state ret = initialize_game_state() assert isinstance(ret, np.ndarray) assert ret.dtype == BoardPiece assert ret.shape == (6, 7) assert np.all(ret == NO_PLAYER) """ assert CONDITON , "OutputString" """ def test_pretty_print_board(): from agents.common import pretty_print_board, initialize_game_state board = initialize_game_state() board_str = pretty_print_board(board) nlines = 9 assert len(board_str.splitlines()) == nlines
def minimax_action(board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState]): """Minimax agent getting a board and the corresponding player turn and returning the best non-full column for the player according to the algorithm. Enter the current state of the board, and performs a top-bottom search on different positions of the board, so that the most optimal according the heuristics used is found. Args: board: Current state of the board player: Whose turn is it. saved_state: Pre-computation work Returns: action: Best column to use. saved_state_out: Tree structure """ global BOARD_VALUES tree = minmax_tree() # Weights tree initialization. other_player = None if player == BoardPiece(1): # Finding out which player is who. other_player = BoardPiece(2) elif player == BoardPiece(2): other_player = BoardPiece(1) idx1 = [] start = -1 for i in range(0, 7): # Player plays cumul1 = 0 # Initialization of the cumulative variable. old_board = board.copy() # Optimal way to start: central column. if sum(sum(old_board[:, :]) == 0) == 7: start = 10 break game, board_val = assign_weight(old_board, i, player, BOARD_VALUES) break_y, cumul1 = eval_heu(cumul1, board_val, i, idx1, game, node_type=np.array([-1])) if break_y and cumul1 > 10000: # Already a winning position, break the search. tree.child[i].value = cumul1 break elif break_y and cumul1 < 10000: # Full column, do not go down its branches. tree.child[i].value = cumul1 continue idx2 = [] for j in range(0, 7): # other player plays old_board1 = old_board.copy() game, board_val = assign_weight(old_board1, j, other_player, BOARD_VALUES) break_y, cumul2 = eval_heu(cumul1, board_val, j, idx2, game) if break_y: # Either a full-column (worst value given) or a win (best one given). tree.child[i].child[j].value = cumul2 continue idx3 = [] for k in range(0, 7): # player plays old_board2 = old_board1.copy() game, board_val = assign_weight(old_board2, k, player, BOARD_VALUES) break_y, cumul3 = eval_heu(cumul2, board_val, k, idx3, game, np.array([-1])) if break_y: # Either a full-column (worst value given) or a win (best one given). tree.child[i].child[j].child[k].value = cumul3 continue idx4 = [] for v in range(0, 7): # other player plays old_board3 = old_board2.copy() game, board_val = assign_weight(old_board3, v, other_player, BOARD_VALUES) break_y, cumul4 = eval_heu(cumul3, board_val, v, idx4, game) # Last layers' nodes assigned the top-down cumulative heuristic value. tree.child[i].child[j].child[k].child[v].value = cumul4 _, val_4 = min_child(tree.child[i].child[j].child[k], idx4) tree.child[i].child[j].child[k].value = val_4 # Assigning the value to father of minimal node. _, val_3 = max_child(tree.child[i].child[j], idx3) tree.child[i].child[j].value = val_3 # Assigning the value to father of maximal node. _, val_2 = min_child(tree.child[i], idx2) tree.child[i].value = val_2 # Assigning the value to father of minimal node. action, tree.value = max_child(tree, idx1) action = PlayerAction(action) # Action to be taken in the Class PlayerAction if start == 10: # If it is the 1st movement, 1st action performed is the optimal: column 3. action = 3 saved_state_out = tree return action, saved_state_out