def minimax(board: np.ndarray, depth: int, alpha: int, beta: int, player: BoardPiece, maximizing_player: bool) -> Tuple[int, int]: # check which player is the agent so that we don't max/min for wrong player if player == PLAYER1: opponent = PLAYER2 else: opponent = PLAYER1 # check NO_PLAYER columns finding_moves = find_moves(board) # check if depth is 0 if depth == 0: score = heuristic(board, player) return None, score # check if we're at a leaf/terminal node if check_end_state(board, player) != GameState.STILL_PLAYING: if connected_four(board, player): # agent won return None, 10000000 if connected_four(board, opponent): # opponent won return None, -10000000 else: # must be a draw return None, 0 if maximizing_player: # get max score for agent score = -math.inf for column in finding_moves: board, board_copy = apply_player_action(board, column, player, True) next_score = minimax(board_copy, depth - 1, alpha, beta, player, False)[1] if next_score > score: score = next_score action_column = column alpha = max(alpha, score) if alpha >= beta: break return action_column, score else: score = math.inf for column in finding_moves: board, action_board = apply_player_action(board, column, opponent, True) next_score = minimax(action_board, depth - 1, alpha, beta, player, True)[1] if next_score < score: score = next_score action_column = column beta = min(beta, score) # get min score for opponent if alpha >= beta: break return action_column, score
def test_connected_four(): win = connected_four(board=initialize_game_state(), player=np.int8(np.random.choice([1, 2]))) win_board_1 = np.zeros((6,7)) win_board_1[0, 0:4] = np.array([1,1,1,1], dtype=np.int8) win_player_1 = connected_four(board = win_board_1 , player = np.int8(1)) win_player_2 = connected_four(board = win_board_1 , player = np.int8(2)) assert win is False assert win_player_1 is True assert win_player_2 is False
def minimax( board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState] = None ) -> Tuple[int, Optional[SavedState]]: """ This function returns the best position for the agent to play by returning the appropriate column index. The agent checks to see if there is a win in any of the available columns, and if so, makes that move. If not, it iterates through every column, makes a move there, and checks whether the opposing player can win subsequently. If the opposing player can win given the current player's first move, the current player chooses not to make that move in the first place. Keyword arguments: board: the board that the player is playing and trying to win player: current player saved_state: Optional Saved State Returns: Tuple: consisting of the location of the column of the best move, and the Optional Saved State """ danger_col = [] columns = [0, 1, 2, 3, 4, 5, 6] score = 0 other_player = opponent(player) for i in available_columns(board): board_i = apply_player_action(board, i, player, True) if connected_four(board_i, player) == True: return i, saved_state else: danger_col = [] for j in available_columns(board_i): board_i_j = apply_player_action(board_i, j, other_player, True) if connected_four(board_i_j, other_player) == True: danger_col.append( j ) # these columns will lead to the opposing player's win if len(danger_col) != 0: columns.remove( i ) # don't use columns i in random.choice if they will lead to an other_player win cols = np.array(columns) action = np.random.choice( cols) # randomly choose a column that will avoid a loss in the # opposing player's next move return action, saved_state
def check_winner(board: np.ndarray): winner = None if connected_four(board, PLAYER1): winner = 10 if connected_four(board, PLAYER2): winner = -10 moves_left = len(get_valid_moves(board)) if winner == None and moves_left == 0: return 0 elif winner == None and moves_left > 0: return None else: return winner
def compute_score(board: np.ndarray, player: BoardPiece) -> float: """ This method is a dummy heuristic in minimax. The scores returned are 100 (for winning) and -100 (for loosing). ) 0 score for any other case. :param board: the board state that needs computing the score :param player: the player for whom is the score computed :return: the score, an int """ if connected_four(board, player): return 100 opponent = find_opponent(player) if connected_four(board, opponent): return -100 return 0
def recursive(board: np.ndarray, player: BoardPiece, depth: int): columns = [0, 1, 2, 3, 4, 5, 6] max = 0 min = 0 if GameState.IS_DRAW or GameState.IS_WIN: return else: if player == PLAYER1: other_player = PLAYER2 else: other_player = PLAYER1 for i in avail_cols(board): danger_col = [] board_i = apply_player_action(board, i, player, True) if connected_four(board_i, player) == True: danger_col.append(i) return i else: recursive(board_i, other_player, depth - 1) if len(danger_col) != 0: columns.remove( i ) # don't use columns i in random.choice if they will lead to an other_player win cols = np.array(columns) action = np.random.choice(cols)
def test_MCTS(): # Selection board = initialize_game_state() child_board = initialize_game_state() child_board[0, 0] = PLAYER1 current_node = Node(state=board) child_node = Node(state=child_board, parent=current_node) current_node.untriedMoves = [0, 3, 4] current_node.children = [child_node] selected_node = Node.selection(current_node) assert selected_node == current_node # Expand current_node.untriedMoves = [0, 3, 4] explored_node = Node.expand(current_node) assert len(current_node.untriedMoves) == 2 assert explored_node != current_node # rollout current_node = Node(state=board, player=PLAYER1) won = connected_four(current_node.state, PLAYER1) assert won #backpropagate Node.update(current_node, result=[-1, 1]) assert current_node.visits == 1 selectedColumn = MCTS(board) assert selectedColumn
def test_connected_four_horizontal(self): c4_yes = common.initialize_game_state() common.apply_player_action(c4_yes, PlayerAction(0), common.PLAYER1) common.apply_player_action(c4_yes, PlayerAction(1), common.PLAYER1) common.apply_player_action(c4_yes, PlayerAction(2), common.PLAYER1) common.apply_player_action(c4_yes, PlayerAction(3), common.PLAYER1) c4_no = common.initialize_game_state() common.apply_player_action(c4_no, PlayerAction(0), common.PLAYER1) common.apply_player_action(c4_no, PlayerAction(1), common.PLAYER1) common.apply_player_action(c4_no, PlayerAction(2), common.PLAYER2) common.apply_player_action(c4_no, PlayerAction(3), common.PLAYER1) assert common.connected_four(c4_yes, PLAYER1) == True assert common.connected_four(c4_yes, PLAYER1, PlayerAction(3)) == True assert common.connected_four(c4_no, PLAYER1) == False assert common.connected_four(c4_no, PLAYER1, PlayerAction(3)) == False
def testConnectedFour(self): from agents.common import connected_four board = np.zeros((6, 7)) board[0, 0] = 1 * player board2 = board.copy() self.assertFalse(connected_four(board, player)) self.assertFalse(connected_four(board, PLAYER2)) #Generate new board: board[:, 1] = np.ones(6) * player self.assertTrue(connected_four(board, player)) self.assertFalse(connected_four(board2, player)) self.assertTrue(connected_four(board.T, player)) #Generate new board: board2[2:6, 3:7] = np.eye(4) * PLAYER2 board2[5, :] = np.array([1, 1, 1, 0, 1, 1, 1]) * player self.assertFalse(connected_four(board2, player)) self.assertFalse(connected_four( board2, player)) #Top corner piece is now player
def test_connect_four(): from agents.common import connected_four assert not connected_four(b1, PLAYER1) assert not connected_four(b1, PLAYER2) assert not connected_four(b2, PLAYER1) assert connected_four(b2, PLAYER2) assert connected_four(b3, PLAYER1) assert not connected_four(b3, PLAYER2) assert connected_four(b4, PLAYER1) assert not connected_four(b4, PLAYER2)
def test_connected_four(): from agents.common import connected_four test_arr = np.zeros((6, 7)) test_arr[5, 0] = PLAYER1 test_arr[5, 1] = PLAYER1 test_arr[5, 2] = PLAYER1 test_arr[5, 3] = PLAYER1 test_arr[4, 0] = PLAYER2 test_arr[4, 1] = PLAYER2 test_arr[4, 2] = PLAYER2 assert (connected_four(test_arr, PLAYER1)) test_arr = np.zeros((6, 7)) test_arr[2, 4] = PLAYER2 test_arr[3, 4] = PLAYER2 test_arr[4, 4] = PLAYER2 test_arr[5, 4] = PLAYER2 test_arr[3, 5] = PLAYER1 test_arr[4, 5] = PLAYER1 test_arr[5, 5] = PLAYER1 assert (connected_four(test_arr, PLAYER2)) test_arr = np.zeros((6, 7)) test_arr[5, 0] = PLAYER1 test_arr[4, 1] = PLAYER1 test_arr[3, 2] = PLAYER1 test_arr[2, 3] = PLAYER1 test_arr[5, 6] = PLAYER2 test_arr[5, 5] = PLAYER2 test_arr[5, 4] = PLAYER2 assert (connected_four(test_arr, PLAYER1)) assert (connected_four(np.flipud(test_arr), PLAYER1))
def test_connected_four(board=board_to_test, player=play, last_action=play_act): """ Fuction to determine if the last piece placed is connecting 4 of the same :param board: Playing board (np.ndarray) :param player: The player putting the piece (BoardPiece) :param last_action: The column where the piece is placed (PlayerAction) """ from agents.common import connected_four ret = connected_four(board, player, last_action) assert isinstance(ret, bool)
def test_connected_four(): from agents.common import connected_four from agents.common import initialize_game_state dummy_board = initialize_game_state() # check empty board assert connected_four(dummy_board, PLAYER1) is False # check a horizontal win horizontal_win_player1 = dummy_board.copy() horizontal_win_player1[0, 0:4] = PLAYER1 assert connected_four(horizontal_win_player1, PLAYER1) is True # check a vertical win vertical_win_player1 = dummy_board.copy() vertical_win_player1[0:4, 0] = PLAYER1 assert connected_four(vertical_win_player1, PLAYER1) is True # check a diagonal win diagonal_win_player1 = dummy_board.copy() for i in range(4): diagonal_win_player1[i, i] = PLAYER1 assert connected_four(diagonal_win_player1, PLAYER1) is True
def test_connected_four(): from agents.common import connected_four # 5th column has connected 4 for PLAYER1 - testing vertical win test_board = np.array( [[BoardPiece(2), BoardPiece(0), BoardPiece(2), BoardPiece(0), BoardPiece(1), BoardPiece(0), BoardPiece(1)], [BoardPiece(1), BoardPiece(2), BoardPiece(1), BoardPiece(1), BoardPiece(2), BoardPiece(2), BoardPiece(1)], [BoardPiece(2), BoardPiece(0), BoardPiece(2), BoardPiece(0), BoardPiece(1), BoardPiece(0), BoardPiece(1)], [BoardPiece(1), BoardPiece(2), BoardPiece(1), BoardPiece(1), BoardPiece(1), BoardPiece(2), BoardPiece(2)], [BoardPiece(2), BoardPiece(0), BoardPiece(2), BoardPiece(0), BoardPiece(1), BoardPiece(0), BoardPiece(1)], [BoardPiece(1), BoardPiece(2), BoardPiece(1), BoardPiece(1), BoardPiece(1), BoardPiece(2), BoardPiece(1)]]) assert connected_four(test_board, PLAYER1) == True assert connected_four(test_board, PLAYER2) == False # row 0, first 4 units are PLAYER2 - testing horizontal win test_board_2 = np.array( [[BoardPiece(2), BoardPiece(2), BoardPiece(2), BoardPiece(2), BoardPiece(1), BoardPiece(0), BoardPiece(1)], [BoardPiece(1), BoardPiece(2), BoardPiece(1), BoardPiece(1), BoardPiece(1), BoardPiece(2), BoardPiece(1)], [BoardPiece(2), BoardPiece(0), BoardPiece(2), BoardPiece(0), BoardPiece(1), BoardPiece(0), BoardPiece(1)], [BoardPiece(1), BoardPiece(2), BoardPiece(1), BoardPiece(2), BoardPiece(2), BoardPiece(2), BoardPiece(2)], [BoardPiece(2), BoardPiece(0), BoardPiece(2), BoardPiece(0), BoardPiece(1), BoardPiece(0), BoardPiece(1)], [BoardPiece(1), BoardPiece(2), BoardPiece(1), BoardPiece(2), BoardPiece(1), BoardPiece(2), BoardPiece(1)]]) assert connected_four(test_board_2, PLAYER2) == True assert connected_four(test_board_2, PLAYER1) == False # test \ diagonal - Player 2 wins test_board_3 = np.array( [[BoardPiece(2), BoardPiece(0), BoardPiece(2), BoardPiece(0), BoardPiece(1), BoardPiece(0), BoardPiece(1)], [BoardPiece(1), BoardPiece(2), BoardPiece(1), BoardPiece(1), BoardPiece(2), BoardPiece(2), BoardPiece(1)], [BoardPiece(2), BoardPiece(0), BoardPiece(2), BoardPiece(0), BoardPiece(1), BoardPiece(0), BoardPiece(1)], [BoardPiece(1), BoardPiece(2), BoardPiece(1), BoardPiece(1), BoardPiece(1), BoardPiece(2), BoardPiece(2)], [BoardPiece(2), BoardPiece(0), BoardPiece(2), BoardPiece(0), BoardPiece(1), BoardPiece(0), BoardPiece(1)], [BoardPiece(1), BoardPiece(2), BoardPiece(1), BoardPiece(2), BoardPiece(2), BoardPiece(2), BoardPiece(1)]]) assert connected_four(test_board_3, PLAYER2) == True # test / diagonal - Player 2 wins test_board_4 = np.array( [[BoardPiece(2), BoardPiece(0), BoardPiece(2), BoardPiece(0), BoardPiece(1), BoardPiece(0), BoardPiece(1)], [BoardPiece(1), BoardPiece(2), BoardPiece(1), BoardPiece(2), BoardPiece(2), BoardPiece(2), BoardPiece(1)], [BoardPiece(2), BoardPiece(0), BoardPiece(2), BoardPiece(0), BoardPiece(1), BoardPiece(0), BoardPiece(1)], [BoardPiece(1), BoardPiece(2), BoardPiece(1), BoardPiece(1), BoardPiece(1), BoardPiece(2), BoardPiece(2)], [BoardPiece(2), BoardPiece(0), BoardPiece(2), BoardPiece(0), BoardPiece(1), BoardPiece(0), BoardPiece(1)], [BoardPiece(1), BoardPiece(2), BoardPiece(1), BoardPiece(0), BoardPiece(2), BoardPiece(2), BoardPiece(1)]]) assert connected_four(test_board_4, PLAYER2) == True
def simulation(self, node: Node) -> int: """ simulates game until board is full or either player won :param node: start node :return: result of the game simulation """ simulation_board = deepcopy(node.board) player = original_player = node.player while not check_board_full(simulation_board) and len( check_open_columns(simulation_board)) > 0: avail_moves = check_open_columns(simulation_board) # switch between players player = PLAYER2 if player == PLAYER1 else PLAYER1 # opposite player makes a move first # simulate simulation_board = apply_player_action( simulation_board, avail_moves[random.choice(range(len(avail_moves)))], player=player) # early stopping in case a player won if connected_four(simulation_board, player): break # evaluate end state of the game after simulation for the original player return self.result(simulation_board, original_player)
def get_player_actions( board: np.ndarray, player: BoardPiece, _last_action: Optional[PlayerAction] = None ) -> list: #could move this to common ''' Returns an array with the possible columns that a player could place a piece in. Here also returns an empty list when the game is already won. An empty list is therefore returned whenever all actions have been explored or a terminal state has been reached. ''' if _last_action != None: if connected_four(board, player, _last_action): return [] #if game is won if np.count_nonzero(board) == board.shape[0] * board.shape[1]: return [] #if game is draw player_actions = [] for col in range(board.shape[1]): if np.count_nonzero(board[:, col]) < board.shape[0]: player_actions.append(col) return player_actions #if still possible actions
def test_connected_four(): from agents.common import connected_four player_check = 2 player_wrong = 1 board1 = np.zeros((6, 7), dtype=np.int8) board1[0, :] = np.array([0, 1, 1, 1, 1, 1, 0]) board1[1, :] = np.array([0, 1, 1, 1, 1, 1, 0]) board1[2, :] = np.array([0, 1, 1, 1, 1, 1, 0]) board1[3, :] = np.array([0, 2, 2, 2, 2, 0, 0]) player_action1 = np.int8(4) board2 = np.zeros((6, 7), dtype=np.int8) board2[:, 4] = np.array([2, 2, 2, 2, 0, 0]) board3 = np.array([[0, 0, 2, 1, 1, 1], [0, 0, 0, 2, 2, 1], [0, 0, 0, 0, 2, 1], [0, 0, 0, 0, 0, 2], [0, 0, 0, 0, 0, 0]]) player_action3 = np.array([5]) board4 = np.array([[0, 2, 2, 1, 2, 1], [0, 1, 1, 2, 2, 1], [0, 1, 2, 0, 0, 0], [0, 2, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]) player_action4 = np.array([1]) ret = connected_four(board1, player_check, player_action1) ret1 = connected_four(board2, player_check, player_action1) ret2 = connected_four(board3, player_check, player_action3) ret3 = connected_four(board4, player_check, player_action4) ret4 = connected_four(board4, player_wrong, player_action4) ret5 = connected_four(board4, player_check, player_action1) assert isinstance(ret, bool) assert ret == True assert ret1 == True assert ret2 == True assert ret3 == True assert ret4 == False assert ret5 == False
def test_connected_four(): """ test for connected_four(): - winning conditions are picked up - horizontal - vertical - diagonal l - diagonal r - win is possible for both players - no win is picked up also for both players Implementation: > Make a test board with a 4 in a row horizontal pattern and the rest filled with the opponents pieces and zeros (noise). Shift the board column to the right and use it to asses for a winning condition. Repeat this until the board has been shifted over all positions and also perform this over the rows to cover all possible positions where the 4 in row horizontal win condition can appear. On the process, the pattern will also be broken when the matrix wraps around itself, making 3 in a row and 2 in a row, this can also be checked to see that there should not be winning conditions in the board. The previous steps can be done for the vertical, diagonal R, and diagonal L, to test all possible winning conditions. This will basically be a full permutation test of winning conditions in the board plus added features like checking for no winning conditions. We also should repeat this process for winning boards for player 1 and for player 2. """ # Make transfer variables for ease of use n = NO_PLAYER o = PLAYER1 x = PLAYER2 # Loop between playesrs for p, d in zip([PLAYER1, PLAYER2], [PLAYER2, PLAYER1]): # Test board for horizontal and vertical + distractions board1 = np.array([[p, p, p, p, n, n, n], [d, n, d, n, d, n, d], [n, n, n, n, n, n, n], [n, d, n, d, n, d, n], [n, n, n, n, n, n, n], [d, n, d, n, d, n, d]]) # Test board for right diagonal and left diagonal + distractions board2 = np.array([[p, d, n, d, n, d, n], [n, p, n, n, n, n, n], [d, n, p, n, d, n, d], [n, n, n, p, n, n, n], [n, d, n, d, n, d, n], [n, n, n, n, n, n, n]]) # This will perform an extensive permutation testing for (i, j), _ in np.ndenumerate(board1): # Horizontal h = cc.connected_four(board=np.roll(np.roll(board1, i, axis=1), j, axis=0), player=p) # Vertical v = cc.connected_four(board=np.roll(np.roll(board1, i, axis=1), j, axis=0).T, player=p) # Diagonal L dl = cc.connected_four(board=np.roll(np.roll(board2, i, axis=1), j, axis=0), player=p) # Diagonal R dr = cc.connected_four(board=np.fliplr( np.roll(np.roll(board2, i, axis=1), j, axis=0)), player=p) # Winning condition met if (i < board1.shape[0] - 4) & (j < board1.shape[1] - 4): assert h assert v assert dl assert dr else: assert ~h assert ~v assert ~dl assert ~dr
def evaluate_heuristic(board: np.ndarray, action: PlayerAction, player: BoardPiece) -> int: """ Calculates a score for a board Parameters ---------- board : np.ndarray Board that the move is performed on action: PlayerAction Column of the move that is performed player: BoardPiece Player who performs the move Return ------ Aggregated Score of all Moves that are possible after the action is performed """ board_copy = board.copy() board_copy = apply_player_action(board_copy, action, player, False) heuristic = 0 # check if player can win with this action if connected_four(board_copy, player, None): heuristic = 99 return heuristic # check if other player can win with this action board_copy2 = board.copy() apply_player_action(board_copy2, action, other_player(player), False) if connected_four(board_copy2, other_player(player), None): heuristic = -99 return heuristic # find lowest open row for row in range(6): if board[row, action] == NO_PLAYER: break if row == 5: raise ValueError("column can't be played") # initialize calculation values skip_a, skip_b, skip_c, skip_d, skip_e, skip_f, skip_g, skip_h = False, False, False, False, False, False, False, False streak_ab, streak_cd, streak_ef, streak_gh = 1, 1, 1, 1 heuristic_a, heuristic_b, heuristic_c, heuristic_d, heuristic_e, heuristic_f, heuristic_g, heuristic_h = 0, 0, 0, 0, 0, 0, 0, 0 for i in range(1, 4): if (action + i) < 7 and not skip_a: if board[row, action + i] == player: heuristic_a += 1 streak_ab += 1 elif board[row, action + i] == NO_PLAYER: streak_ab += 1 else: skip_a = True if (action - i) > -1 and not skip_b: if board[row, action - i] == player: heuristic_b += 1 streak_ab += 1 elif board[row, action - i] == NO_PLAYER: streak_ab += 1 else: skip_b = True if (row + i) < 6 and not skip_c: if board[row + i, action] == player: heuristic_c += 1 streak_cd += 1 elif board[row + i, action] == NO_PLAYER: streak_cd += 1 else: skip_c = True if (row - i) > -1 and not skip_d: if board[row - i, action] == player: heuristic_d += 1 streak_cd += 1 elif board[row - i, action] == NO_PLAYER: streak_cd += 1 else: skip_d = True if ((action + i) < 7 and (row + i) < 6) and not skip_e: if board[row + i, action + i] == player: heuristic_e += 1 streak_ef += 1 elif board[row + i, action + i] == NO_PLAYER: streak_ef += 1 else: skip_e = True if ((action - i) > -1 and (row - i) > -1) and not skip_f: if board[row - i, action - i] == player: heuristic_f += 1 streak_ef += 1 elif board[row - i, action - i] == NO_PLAYER: streak_ef += 1 else: skip_f = True if ((action + i) < 7 and (row - i) > -1) and not skip_g: if board[row - i, action + i] == player: heuristic_g += 1 streak_gh += 1 elif board[row - i, action + i] == NO_PLAYER: streak_gh += 1 else: skip_g = True if ((action - i) > -1 and (row + i) < 6) and not skip_h: if board[row + i, action - i] == player: heuristic_h += 1 streak_gh += 1 elif board[row + i, action - i] == NO_PLAYER: streak_gh += 1 else: skip_h = True if streak_ab < 4: # wenn mit dem move in einer Reihe keine 4 erreicht werden können heuristic_a = 0 heuristic_b = 0 elif streak_ab == 7: heuristic += 2 else: # (streak_ab > 3) and (streak_ab < 7): heuristic += 1 if streak_cd < 4: # wenn mit dem move in einer Spalte keine 4 erreicht werden können heuristic_c = 0 heuristic_d = 0 elif streak_cd == 7: heuristic += 2 else: # (streak_cd > 3) and (streak_cd < 7): heuristic += 1 if streak_ef < 4: # wenn mit dem move in einer rechts-Diagonalen keine 4 erreicht werden können heuristic_e = 0 heuristic_f = 0 elif streak_ef == 7: heuristic += 2 else: # (streak_ef > 3) and (streak_ef < 7): heuristic += 1 if streak_gh < 4: # wenn mit dem move in einer links-Diagonalen keine 4 erreicht werden können heuristic_g = 0 heuristic_h = 0 elif streak_gh == 7: heuristic += 2 else: # (streak_gh > 3) and (streak_gh < 7): heuristic += 1 heuristic += heuristic_a + heuristic_b + heuristic_c + heuristic_d + heuristic_e + heuristic_f + heuristic_g + heuristic_h return heuristic
def minimax(board: np.ndarray, depth: int, maximizingPlayer: bool, player: BoardPiece, weights: np.ndarray = weights_array): """ Minimax function to obtain the best position for a given player :param board: Actual board in the game (np.ndarray) :param depth: Depth for simulation :param maximizingPlayer: Flag (bool) for maximizing or minimizing :param player: Player that is being maximize value or not :param weights: Array of weights for the scoring function :return: score of the board and the best move to perform """ board_terminal = connected_four(board, player=PLAYER1) or connected_four( board, player=PLAYER2) or full_board(board) columns = np.argwhere(board[-1, :] == NO_PLAYER) if depth == 0 or board_terminal: board_score = scoring_function(board=board, weights=weights, player=player) return int(board_score), None elif maximizingPlayer: board_score = -10000000 for c in columns: c = int(c) im_board, _ = apply_player_action(board=board, action=c, player=player, copy=True) if im_board[-1, c] != 0: board_terminal = True score, _ = minimax(im_board, depth - 1, False, player, weights) if score > board_score: board_score = score best_move = c return int(board_score), int(best_move) else: board_score = 100000000 if player == PLAYER1: opponent = PLAYER2 else: opponent = PLAYER1 for c in columns: c = int(c) im_board, _ = apply_player_action(board=board, action=c, player=opponent, copy=True) if im_board[-1, c] != 0: board_terminal = True score, _ = minimax(im_board, depth - 1, True, opponent, weights) score = -score if score < board_score: board_score = score best_move = c return int(board_score), int(best_move)
def minimax(board: np.ndarray, depth: int, alpha: int, beta: int, player: BoardPiece, maximizing_player: bool) -> Tuple[int, int]: ''' Returns a column where action should be placed and the min and max score for GameState :param board: current state of board :param depth: depth of search tree :param maximizingPlayer: True if we want to max for player :return: min or max score for action of player ''' #check which player is the agent so that we don't max/min for wrong player if player == PLAYER1: opponent_player = PLAYER2 else: opponent_player = PLAYER1 #check which columns are currently open open_cols = np.asarray(check_open_columns(board)) #check if depth is 0 if depth == 0: score = heuristic(board, player) return None, score #check if we're at a leaf/terminal node if check_end_state(board, player) != GameState.STILL_PLAYING: if connected_four(board, player): #agent won return None, 100000 if connected_four(board, opponent_player): #opponent won return None, -100000 else: #must be a draw return None, 0 if maximizing_player: #get max score for agent score = -math.inf for column in open_cols: #now simulate making a move and check what score it would get, save the original board in board board, board_copy = apply_player_action(board, column, player, True) # recursive call to minimax with depth-1 with board_copy so board isn't modified next_score = minimax(board_copy, depth - 1, alpha, beta, player, False)[1] #only get the score #if the score is better save score and column if next_score > score: score = next_score action_column = column #evaluate alpha for early stopping alpha = max(alpha, score) if alpha >= beta: #don't evaluate more options down this path of tree break return action_column, score else: score = math.inf for column in open_cols: board, action_board = apply_player_action(board, column, opponent_player, True) next_score = minimax(action_board, depth - 1, alpha, beta, player, True)[1] if next_score < score: score = next_score action_column = column beta = min( beta, score) #here we want to minimize since we're opponent player if alpha >= beta: break return action_column, score
def test_connected_four(): from agents.common import initialize_game_state from agents.common import apply_player_action from agents.common import connected_four board = initialize_game_state() # TRUE TESTS # vertical apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) ret = connected_four(board, BoardPiece(1)) assert isinstance(ret, bool) assert ret == True # horizontal board = initialize_game_state() apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 3, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 4, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 5, BoardPiece(1), False) ret = connected_four(board, 1, 5) assert isinstance(ret, bool) assert ret == True # left right diagonal board = initialize_game_state() apply_player_action(board, 0, BoardPiece(1), False) apply_player_action(board, 0, BoardPiece(2), False) apply_player_action(board, 0, BoardPiece(1), False) apply_player_action(board, 1, BoardPiece(2), False) apply_player_action(board, 1, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 5, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 4, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 4, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 3, BoardPiece(1), False) ret = connected_four(board, 1, 3) assert isinstance(ret, bool) assert ret == True # right left diagonal board = initialize_game_state() apply_player_action(board, 0, BoardPiece(1), False) apply_player_action(board, 0, BoardPiece(2), False) apply_player_action(board, 0, BoardPiece(1), False) apply_player_action(board, 1, BoardPiece(2), False) apply_player_action(board, 1, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 5, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 4, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 4, BoardPiece(1), False) apply_player_action(board, 1, BoardPiece(2), False) apply_player_action(board, 3, BoardPiece(1), False) apply_player_action(board, 0, BoardPiece(2), False) ret = connected_four(board, 2, 0) assert isinstance(ret, bool) assert ret == True # FALSE TESTS # vertical board = initialize_game_state() apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) ret = connected_four(board, BoardPiece(2), 3) assert ret == False # horizontal board = initialize_game_state() apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 3, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 4, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) ret = connected_four(board, 2, 2) assert isinstance(ret, bool) assert ret == False # left right diagonal board = initialize_game_state() apply_player_action(board, 0, BoardPiece(1), False) apply_player_action(board, 0, BoardPiece(2), False) apply_player_action(board, 0, BoardPiece(1), False) apply_player_action(board, 1, BoardPiece(2), False) apply_player_action(board, 1, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 5, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 4, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 4, BoardPiece(1), False) ret = connected_four(board, BoardPiece(1), 4) assert ret == False # right left diagonal board = initialize_game_state() apply_player_action(board, 0, BoardPiece(1), False) apply_player_action(board, 0, BoardPiece(2), False) apply_player_action(board, 0, BoardPiece(1), False) apply_player_action(board, 1, BoardPiece(2), False) apply_player_action(board, 1, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 5, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 4, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 4, BoardPiece(1), False) apply_player_action(board, 1, BoardPiece(2), False) ret = connected_four(board, 2, 1) assert isinstance(ret, bool) assert ret == False # NO WIN TEST board = initialize_game_state() apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) ret = connected_four(board, BoardPiece(1)) assert isinstance(ret, bool) assert ret == False
def test_connected_four(): from agents.common import connected_four board = np.zeros((6, 7), dtype=BoardPiece) player = BoardPiece(2) ret = connected_four(board, player) assert isinstance(ret, bool)
def monte_carlo_tree_search( board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState], timeout: np.int8 = 10) -> Tuple[PlayerAction, Optional[SavedState]]: ''' 4 step tree search algorithm: 1. Selection 2. Expansion 3. Simulation 4. Bakpropagation :param board: :param player: :param saved_state: :param timeout: :return: ''' MinPiece = 3 - player MaxPiece = player root = Node(board=board, player=MinPiece) #check immediate win for action in root.action_notExp: state = board.copy() apply_player_action(state, action, MaxPiece) if connected_four(state, MaxPiece, action) == True: return action, saved_state start = time.clock() while True: node = root state = board.copy() # selection # keep going down the tree based on best UCT values until terminal (no more children) or unexpanded node (no more moves to expand) while node.action_notExp == [] and node.childNodes != []: node = node.selection() apply_player_action(state, node.action, MaxPiece) # expansion if node.action_notExp != []: action = random.choice(node.action_notExp) node = node.expansion(action) # simulation state = node.board.copy() player_roll = node.player result = 0 while get_player_actions( state, 3 - player_roll, action ) and result != 1 and result != -0.1: #check here if win or loss already occured player_roll = 3 - player_roll action = random.choice( get_player_actions(state, player_roll, action)) apply_player_action(state, action, player_roll) result = check_result( state, MaxPiece, action ) #check if the agent won or lost i.e. the player looking for max wins # backpropagation while node is not None: node.update(result) node = node.parent duration = time.clock() - start if duration > timeout: break choose_fnct = lambda child: child.wins / child.visits chosen_child = sorted( root.childNodes, key=choose_fnct)[::-1] #change order from highest to largest return chosen_child[0].action, saved_state #choose largest element
def minimax(depth: int, board: np.ndarray, player: BoardPiece, alpha, beta, maximizing=True): """ :param depth: depth of the tree search of type int :param board: Contains current state of the board an ndarray, shape (ROWS, COLUMNS) and data type (dtype) BoardPiece :param player: Current player playing the game of type BoardPiece :param alpha: Alpha value for alpha-beta pruning of type float :param beta: Beta value for alpha-beta pruning of type float :param maximizing: A boolean value to switch between maximising and minimising heuristic_value :return: column : the column to be played by the agent of type int value : the heuristic value of the board """ board_copy = np.copy(board) valid_columns = [] for col in range(COLUMNS): if board[ROWS - 1][col] == 0: valid_columns.append(col) if depth == 0 or check_end_state( board, player).name == GameState.IS_WIN or len(valid_columns) == 0: if check_end_state( board, player).name == GameState.IS_WIN or len(valid_columns) == 0: if connected_four(board_copy, BoardPiece(2)): return None, math.inf elif connected_four(board_copy, BoardPiece(1)): return None, -math.inf else: return None, 0 else: return None, board_heuristic(board_copy, BoardPiece(2)) if maximizing: value = -math.inf column = random.choice(valid_columns) for col in valid_columns: board_copy = np.copy(board) value_temp = minimax(depth - 1, board_copy, player, alpha, beta, False)[1] if value_temp > value: value = value_temp column = col alpha = max(alpha, value) if alpha >= beta: break return column, value else: value = math.inf column = random.choice(valid_columns) for col in valid_columns: board_copy = np.copy(board) value_temp = minimax(depth - 1, board_copy, player, alpha, beta, True)[1] if value_temp < value: value = value_temp column = col beta = min(beta, value) if beta <= alpha: break return column, value
def MCTS(board: np.ndarray) -> PlayerAction: rootNode = Node(state=board, player=PLAYER) itermax = 100000 start = time.time() global Timeout for i in range(itermax): node = rootNode ############# # selection # ############# # keep going down the tree based on best UCT values until terminal or unexpanded node while not np.any(node.untriedMoves) and node.childNodes != []: node = node.selection() ############# # Expand # ############# if np.any(node.untriedMoves): # Choose a random action from available moves action = np.random.choice(node.untriedMoves) node = node.expand(action) ############# # rollout # ############# board = node.state.copy() win_game_flag = False currentPlayer = node.player while np.any(find_columns(board)) and not win_game_flag: if currentPlayer == PLAYER2: currentPlayer = PLAYER1 else: currentPlayer = PLAYER2 action = np.random.choice(find_columns(board)) board, _ = apply_player_action(board, action, currentPlayer) win_game_flag = connected_four(board, currentPlayer) ################# # backpropagate # ################# if win_game_flag: if currentPlayer == PLAYER: result = 1 # The player won else: result = -1 # The player lost against the opponent else: result = 0 while node is not None: node.update(result) node = node.parent duration = time.time() - start if duration > Timeout: break bestScore = -10000000.0 selectedColumn = -1 for child in rootNode.childNodes: if connected_four(child.state, child.player): return child.move else: score = child.wins / child.visits if score > bestScore: selectedColumn = child.move bestScore = score return selectedColumn