def test_check_end_state(): from agents.common import check_end_state from agents.common import apply_player_action from agents.common import initialize_game_state from agents.common import GameState from agents.common import pretty_print_board # test 'is win' board = initialize_game_state() apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 3, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 4, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 5, BoardPiece(1), False) ret = check_end_state(board, BoardPiece(1), 5) assert isinstance(ret, GameState) assert ret == GameState.IS_WIN # test still playing board = initialize_game_state() apply_player_action(board, 2, BoardPiece(2), True) apply_player_action(board, 3, BoardPiece(1), True) ret = check_end_state(board, 1, 3) assert ret == GameState.STILL_PLAYING # test is draw board[:, 0] = BoardPiece(1) board[:, 1:3] = BoardPiece(2) board[:, 3:5] = BoardPiece(1) board[:, 5:7] = BoardPiece(2) board[3:5, :] = BoardPiece(1) board[1, :] = BoardPiece(2) ret = check_end_state(board, 2, 5) assert ret == GameState.IS_DRAW
def test_connected_four(): from agents.common import initialize_game_state from agents.common import apply_player_action from agents.common import connected_four board = initialize_game_state() # TRUE TESTS # vertical apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) ret = connected_four(board, BoardPiece(1)) assert isinstance(ret, bool) assert ret == True # horizontal board = initialize_game_state() apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 3, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 4, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 5, BoardPiece(1), False) ret = connected_four(board, 1, 5) assert isinstance(ret, bool) assert ret == True # left right diagonal board = initialize_game_state() apply_player_action(board, 0, BoardPiece(1), False) apply_player_action(board, 0, BoardPiece(2), False) apply_player_action(board, 0, BoardPiece(1), False) apply_player_action(board, 1, BoardPiece(2), False) apply_player_action(board, 1, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 5, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 4, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 4, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 3, BoardPiece(1), False) ret = connected_four(board, 1, 3) assert isinstance(ret, bool) assert ret == True # right left diagonal board = initialize_game_state() apply_player_action(board, 0, BoardPiece(1), False) apply_player_action(board, 0, BoardPiece(2), False) apply_player_action(board, 0, BoardPiece(1), False) apply_player_action(board, 1, BoardPiece(2), False) apply_player_action(board, 1, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 5, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 4, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 4, BoardPiece(1), False) apply_player_action(board, 1, BoardPiece(2), False) apply_player_action(board, 3, BoardPiece(1), False) apply_player_action(board, 0, BoardPiece(2), False) ret = connected_four(board, 2, 0) assert isinstance(ret, bool) assert ret == True # FALSE TESTS # vertical board = initialize_game_state() apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) ret = connected_four(board, BoardPiece(2), 3) assert ret == False # horizontal board = initialize_game_state() apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 3, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 4, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) ret = connected_four(board, 2, 2) assert isinstance(ret, bool) assert ret == False # left right diagonal board = initialize_game_state() apply_player_action(board, 0, BoardPiece(1), False) apply_player_action(board, 0, BoardPiece(2), False) apply_player_action(board, 0, BoardPiece(1), False) apply_player_action(board, 1, BoardPiece(2), False) apply_player_action(board, 1, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 5, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 4, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 4, BoardPiece(1), False) ret = connected_four(board, BoardPiece(1), 4) assert ret == False # right left diagonal board = initialize_game_state() apply_player_action(board, 0, BoardPiece(1), False) apply_player_action(board, 0, BoardPiece(2), False) apply_player_action(board, 0, BoardPiece(1), False) apply_player_action(board, 1, BoardPiece(2), False) apply_player_action(board, 1, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 5, BoardPiece(1), False) apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 4, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 4, BoardPiece(1), False) apply_player_action(board, 1, BoardPiece(2), False) ret = connected_four(board, 2, 1) assert isinstance(ret, bool) assert ret == False # NO WIN TEST board = initialize_game_state() apply_player_action(board, 2, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) apply_player_action(board, 2, BoardPiece(1), False) apply_player_action(board, 3, BoardPiece(2), False) ret = connected_four(board, BoardPiece(1)) assert isinstance(ret, bool) assert ret == False
def iterativeDeepingSearch(board: np.ndarray, player: BoardPiece) -> np.ndarray: """ Performs iterative deepening DFS on the search tree, which is advisable when moves are under time constraint. Does a full traversal of the game tree up to certain depth, then increments the depth. Only result from the last fuLl traversal of the tree should be considered. :param board: the board :param player: the player to move :param maxDepth: the maximal depth to which to search (temporary measure) :return: a list of moves with the best score """ iter = MAX_DEPTH #sets cut-off depth for DFS: incrementally decreasing bestScore = np.NINF tempBoard = board.copy() tempBestScore = bestScore #Moves stored in OrderedDict: keys := score, vals := list(moves) #this will help (later on) with storing some of the suboptimal moves #and help circumvent some horizon problems bestMoves = OrderedDict() new_bestMoves = OrderedDict() #Generate list of best moves: #TODO: generate list of best and second (nth?) best moves #TODO: then draw move from a skewed (e.g. exponential) probability distribution #TODO: add time-limit related while-loop wrap #Early in the game: play moves in the center columns: while iter > 0: possible_moves = np.where(board[5] == noPlayer) for moveI, move in np.ndenumerate(possible_moves): last_move = move score = np.NINF bestScore = tempBestScore new_board = apply_player_action(tempBoard, move, player) #Check if new_board is in the transposition table: hash_key = hash_board(new_board) if transpoTable.get(hash_key) is not None: score = transpoTable[hash_key] else: tempBoard = board.copy() new_player = (player % 2) + 1 score = alphaBeta(new_board, new_player, iter, last_move) if score > bestScore: bestScore = score tempBestScore = bestScore new_bestMoves.clear() new_bestMoves[bestScore] = [move] transpoTable[hash_key] = bestScore # FIFO queue, pop item upon exceeding space limit if len(list(transpoTable.keys())) > transpo_size: transpoTable.popitem() #store all moves with the same score: elif score == bestScore: new_bestMoves[bestScore].append(move) transpoTable[hash_key] = bestScore if len(list(transpoTable.keys())) > transpo_size: transpoTable.popitem() #Check old and new bestScores are the same: if bestMoves != OrderedDict() and list(bestMoves.keys())[0] == list( new_bestMoves.keys())[0]: #Merge moves with the same score: my guess is this will be important when the heuristic #is such that it creates the same value a lot of the time and no computational concern otherwise new_bestMoves[list(bestMoves.keys())[0]] = bestMoves[list(bestMoves.keys())[0]] + \ new_bestMoves[list(new_bestMoves.keys())[0]] else: bestMoves = new_bestMoves.copy() new_bestMoves.clear() iter -= 1 #Break if winning move has been found: if tempBestScore == GameState.IS_WIN.value: break tempBestScore = np.NINF #When under time constraint: check how deep you can go print("Iteration: {}".format(iter)) keys, values = list(bestMoves.keys()), list(bestMoves.values()) return keys, values
def minimax(board: np.ndarray, player: BoardPiece, score_dict: np.ndarray, depth: int, alpha: float, beta: float, maxplayer: bool) -> (PlayerAction, float): """ Minimax algorithm with alpha-beta pruning :param board: np.ndarray: current state of the board, filled with Player pieces :param player: BoardPiece: player piece to evaluate for best move (maximazing player) :param score_dict: np.ndarray: list of score points to give to the different patterns... see board_score :param depth: int: depth of tree search :param alpha: float: keep track of best score :param beta: float: keep track of worst score :param maxplayer: bool: flag if the maximizing player is playing :return: (PlayerAction, float): best possible action and its score """ # Get possible moves # Player possible actions poss_actions = (np.arange(board.shape[1], dtype=PlayerAction)[board[-1, :] == NO_PLAYER]) poss_actions = poss_actions[np.argsort(np.abs(poss_actions - 3))] # center search bias pieces = np.array([PLAYER1, PLAYER2]) # Final or end state node reached current_state = cc.check_end_state(board=board, player=player) if (depth == 0) or (current_state != cc.GameState.STILL_PLAYING): if (current_state == cc.GameState.IS_WIN) and ~maxplayer: return None, 10000 + depth if (current_state == cc.GameState.IS_WIN) and maxplayer: return None, -(10000 + depth) if current_state == cc.GameState.IS_DRAW: return None, 0 else: return None, board_score(board=board, player=player, score_dict=score_dict) if maxplayer: # Initialize score max_score = -np.infty for moves in poss_actions: # How would a mover change my score? move_board = cc.apply_player_action(board=board, action=moves, player=player, copy=True) score = minimax(board=move_board, player=player, score_dict=score_dict, depth=depth - 1, alpha=alpha, beta=beta, maxplayer=False)[1] if score > max_score: max_score = score action = moves alpha = max(alpha, score) if beta <= alpha: break return action, max_score else: # Initialize opponent score min_score = np.infty opponent = pieces[pieces != player][0] for moves in poss_actions: # How would a mover change my score? move_board = cc.apply_player_action(board=board, action=moves, player=opponent, copy=True) score = -minimax(board=move_board, player=opponent, score_dict=score_dict, depth=depth - 1, alpha=alpha, beta=beta, maxplayer=True)[1] if score < min_score: min_score = score action = moves beta = min(beta, score) return action, min_score
def test_connected_four_horizontal(self): c4_yes = common.initialize_game_state() common.apply_player_action(c4_yes, PlayerAction(0), common.PLAYER1) common.apply_player_action(c4_yes, PlayerAction(1), common.PLAYER1) common.apply_player_action(c4_yes, PlayerAction(2), common.PLAYER1) common.apply_player_action(c4_yes, PlayerAction(3), common.PLAYER1) c4_no = common.initialize_game_state() common.apply_player_action(c4_no, PlayerAction(0), common.PLAYER1) common.apply_player_action(c4_no, PlayerAction(1), common.PLAYER1) common.apply_player_action(c4_no, PlayerAction(2), common.PLAYER2) common.apply_player_action(c4_no, PlayerAction(3), common.PLAYER1) assert common.connected_four(c4_yes, PLAYER1) == True assert common.connected_four(c4_yes, PLAYER1, PlayerAction(3)) == True assert common.connected_four(c4_no, PLAYER1) == False assert common.connected_four(c4_no, PLAYER1, PlayerAction(3)) == False
def generate_move_negamax( board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState] ) -> Tuple[PlayerAction, Optional[SavedState]]: """ Generate move using negamax -- including some workaround to force winning moves and blocking moves. :param board: current board state :param player: current player :param saved_state: :return: """ open_moves = get_valid_moves(board) print(f'Open moves: {open_moves}') new_states = [ apply_player_action(board, move, player, copy=True) for move in open_moves ] # if a move results in a win, play it winning_moves = np.array([ check_end_state(state, player) for state in new_states ]) == GameState.IS_WIN if np.any(winning_moves): actions = open_moves[np.argwhere(winning_moves)].squeeze() if actions.size > 1: action = np.random.choice(actions) else: action = actions # print(f'playing action {action} for a win') return action, saved_state # if a move results in blocking an opponent's win, play it other_player = BoardPiece(player % 2 + 1) new_states_other = [ apply_player_action(board, move, other_player, copy=True) for move in open_moves ] blocking_moves = np.array([ check_end_state(state, other_player) for state in new_states_other ]) == GameState.IS_WIN if np.any(blocking_moves): actions = open_moves[np.argwhere(blocking_moves)].squeeze() if actions.size > 1: action = np.random.choice(actions) else: action = actions # print(f'playing action {action} for a block') return action, saved_state # otherwise, use the heuristic function to score possible states scores = [ negamax_alpha_beta(state, player, MAX_DEPTH, alpha=-np.inf, beta=np.inf) for state in new_states ] # randomly select among best moves if np.sum(scores == np.max(scores)) > 1: best_moves = open_moves[np.argwhere( scores == np.max(scores))].squeeze() action = np.random.choice(best_moves) else: action = open_moves[np.argmax(scores)].squeeze() # print(f'Heuristic values: {scores}') # print(f'playing action {action} with heuristic value {np.max(scores)}') return action, saved_state
# test init, prior to creating further nodes def test_init_node(): assert np.all(initial_node.legal_moves == get_valid_moves(initial_state)) assert np.all(initial_node.legal_moves == initial_node.unexpanded_moves) assert np.all(initial_node.board == initial_state) # manually create a fully expanded node from the initial node # just relies on the init method (and apply_player_action) fully_expanded_node = copy.deepcopy(initial_node) for move in fully_expanded_node.legal_moves: new_board = apply_player_action(board=fully_expanded_node.board, action=move, player=fully_expanded_node.to_play, copy=True) new_node = MonteCarloNode( new_board, to_play=BoardPiece(fully_expanded_node.to_play % 2 + 1), last_move=move, parent=fully_expanded_node) fully_expanded_node.children[move] = new_node fully_expanded_node.expanded_moves = fully_expanded_node.legal_moves fully_expanded_node._unexpanded_moves = [] def test_unexpanded_moves(): assert np.all(initial_node.unexpanded_moves == initial_node.legal_moves) assert fully_expanded_node.unexpanded_moves == []