def test_check_end_state():
    from agents.common import check_end_state
    from agents.common import apply_player_action
    from agents.common import initialize_game_state
    from agents.common import GameState
    from agents.common import pretty_print_board

    # test 'is win'
    board = initialize_game_state()
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 3, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 4, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 5, BoardPiece(1), False)

    ret = check_end_state(board, BoardPiece(1), 5)
    assert isinstance(ret, GameState)
    assert ret == GameState.IS_WIN

    # test still playing
    board = initialize_game_state()
    apply_player_action(board, 2, BoardPiece(2), True)
    apply_player_action(board, 3, BoardPiece(1), True)
    ret = check_end_state(board, 1, 3)
    assert ret == GameState.STILL_PLAYING

    # test is draw
    board[:, 0] = BoardPiece(1)
    board[:, 1:3] = BoardPiece(2)
    board[:, 3:5] = BoardPiece(1)
    board[:, 5:7] = BoardPiece(2)
    board[3:5, :] = BoardPiece(1)
    board[1, :] = BoardPiece(2)
    ret = check_end_state(board, 2, 5)
    assert ret == GameState.IS_DRAW
def test_connected_four():
    from agents.common import initialize_game_state
    from agents.common import apply_player_action
    from agents.common import connected_four
    board = initialize_game_state()

    # TRUE TESTS
    # vertical
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    ret = connected_four(board, BoardPiece(1))
    assert isinstance(ret, bool)
    assert ret == True

    # horizontal
    board = initialize_game_state()
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 3, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 4, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 5, BoardPiece(1), False)
    ret = connected_four(board, 1, 5)
    assert isinstance(ret, bool)
    assert ret == True

    # left right diagonal
    board = initialize_game_state()
    apply_player_action(board, 0, BoardPiece(1), False)
    apply_player_action(board, 0, BoardPiece(2), False)
    apply_player_action(board, 0, BoardPiece(1), False)
    apply_player_action(board, 1, BoardPiece(2), False)
    apply_player_action(board, 1, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 5, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 4, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 4, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 3, BoardPiece(1), False)
    ret = connected_four(board, 1, 3)
    assert isinstance(ret, bool)
    assert ret == True

    # right left diagonal
    board = initialize_game_state()
    apply_player_action(board, 0, BoardPiece(1), False)
    apply_player_action(board, 0, BoardPiece(2), False)
    apply_player_action(board, 0, BoardPiece(1), False)
    apply_player_action(board, 1, BoardPiece(2), False)
    apply_player_action(board, 1, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 5, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 4, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 4, BoardPiece(1), False)
    apply_player_action(board, 1, BoardPiece(2), False)
    apply_player_action(board, 3, BoardPiece(1), False)
    apply_player_action(board, 0, BoardPiece(2), False)

    ret = connected_four(board, 2, 0)
    assert isinstance(ret, bool)
    assert ret == True

    # FALSE TESTS
    # vertical
    board = initialize_game_state()
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    ret = connected_four(board, BoardPiece(2), 3)
    assert ret == False

    # horizontal
    board = initialize_game_state()
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 3, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 4, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    ret = connected_four(board, 2, 2)
    assert isinstance(ret, bool)
    assert ret == False

    # left right diagonal
    board = initialize_game_state()
    apply_player_action(board, 0, BoardPiece(1), False)
    apply_player_action(board, 0, BoardPiece(2), False)
    apply_player_action(board, 0, BoardPiece(1), False)
    apply_player_action(board, 1, BoardPiece(2), False)
    apply_player_action(board, 1, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 5, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 4, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 4, BoardPiece(1), False)
    ret = connected_four(board, BoardPiece(1), 4)
    assert ret == False

    # right left diagonal
    board = initialize_game_state()
    apply_player_action(board, 0, BoardPiece(1), False)
    apply_player_action(board, 0, BoardPiece(2), False)
    apply_player_action(board, 0, BoardPiece(1), False)
    apply_player_action(board, 1, BoardPiece(2), False)
    apply_player_action(board, 1, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 5, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 4, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 4, BoardPiece(1), False)
    apply_player_action(board, 1, BoardPiece(2), False)
    ret = connected_four(board, 2, 1)
    assert isinstance(ret, bool)
    assert ret == False

    # NO WIN TEST
    board = initialize_game_state()
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    ret = connected_four(board, BoardPiece(1))
    assert isinstance(ret, bool)
    assert ret == False
Beispiel #3
0
def iterativeDeepingSearch(board: np.ndarray,
                           player: BoardPiece) -> np.ndarray:
    """
    Performs iterative deepening DFS on the search tree, which is advisable when
    moves are under time constraint. Does a full traversal of the game tree up to certain
    depth, then increments the depth. Only result from the last fuLl traversal of the tree
    should be considered.
    :param board: the board
    :param player: the player to move
    :param maxDepth: the maximal depth to which to search (temporary measure)
    :return: a list of moves with the best score
    """

    iter = MAX_DEPTH  #sets cut-off depth for DFS: incrementally decreasing
    bestScore = np.NINF
    tempBoard = board.copy()
    tempBestScore = bestScore

    #Moves stored in OrderedDict: keys := score, vals := list(moves)
    #this will help (later on) with storing some of the suboptimal moves
    #and help circumvent some horizon problems
    bestMoves = OrderedDict()
    new_bestMoves = OrderedDict()

    #Generate list of best moves:
    #TODO: generate list of best and second (nth?) best moves
    #TODO: then draw move from a skewed (e.g. exponential) probability distribution
    #TODO: add time-limit related while-loop wrap

    #Early in the game: play moves in the center columns:

    while iter > 0:

        possible_moves = np.where(board[5] == noPlayer)

        for moveI, move in np.ndenumerate(possible_moves):

            last_move = move
            score = np.NINF

            bestScore = tempBestScore

            new_board = apply_player_action(tempBoard, move, player)
            #Check if new_board is in the transposition table:
            hash_key = hash_board(new_board)

            if transpoTable.get(hash_key) is not None:
                score = transpoTable[hash_key]

            else:
                tempBoard = board.copy()
                new_player = (player % 2) + 1
                score = alphaBeta(new_board, new_player, iter, last_move)

            if score > bestScore:
                bestScore = score
                tempBestScore = bestScore
                new_bestMoves.clear()
                new_bestMoves[bestScore] = [move]
                transpoTable[hash_key] = bestScore

                # FIFO queue, pop item upon exceeding space limit
                if len(list(transpoTable.keys())) > transpo_size:
                    transpoTable.popitem()

            #store all moves with the same score:
            elif score == bestScore:
                new_bestMoves[bestScore].append(move)
                transpoTable[hash_key] = bestScore
                if len(list(transpoTable.keys())) > transpo_size:
                    transpoTable.popitem()

        #Check old and new bestScores are the same:
        if bestMoves != OrderedDict() and list(bestMoves.keys())[0] == list(
                new_bestMoves.keys())[0]:

            #Merge moves with the same score: my guess is this will be important when the heuristic
            #is such that it creates the same value a lot of the time and no computational concern otherwise
            new_bestMoves[list(bestMoves.keys())[0]] = bestMoves[list(bestMoves.keys())[0]] + \
                                                       new_bestMoves[list(new_bestMoves.keys())[0]]

        else:
            bestMoves = new_bestMoves.copy()

        new_bestMoves.clear()
        iter -= 1

        #Break if winning move has been found:
        if tempBestScore == GameState.IS_WIN.value:
            break

        tempBestScore = np.NINF

    #When under time constraint: check how deep you can go
    print("Iteration: {}".format(iter))
    keys, values = list(bestMoves.keys()), list(bestMoves.values())
    return keys, values
Beispiel #4
0
def minimax(board: np.ndarray, player: BoardPiece, score_dict: np.ndarray,
            depth: int, alpha: float, beta: float,
            maxplayer: bool) -> (PlayerAction, float):
    """
    Minimax algorithm with alpha-beta pruning
    :param board:
        np.ndarray: current state of the board, filled with Player pieces
    :param player:
        BoardPiece: player piece to evaluate for best move (maximazing player)
    :param score_dict:
        np.ndarray: list of score points to give to the different patterns... see board_score
    :param depth:
        int: depth of tree search
    :param alpha:
        float: keep track of best score
    :param beta:
        float: keep track of worst score
    :param maxplayer:
        bool: flag if the maximizing player is playing
    :return:
        (PlayerAction, float): best possible action and its score
    """
    # Get possible moves
    # Player possible actions
    poss_actions = (np.arange(board.shape[1],
                              dtype=PlayerAction)[board[-1, :] == NO_PLAYER])
    poss_actions = poss_actions[np.argsort(np.abs(poss_actions -
                                                  3))]  # center search bias
    pieces = np.array([PLAYER1, PLAYER2])

    # Final or end state node reached
    current_state = cc.check_end_state(board=board, player=player)
    if (depth == 0) or (current_state != cc.GameState.STILL_PLAYING):
        if (current_state == cc.GameState.IS_WIN) and ~maxplayer:
            return None, 10000 + depth
        if (current_state == cc.GameState.IS_WIN) and maxplayer:
            return None, -(10000 + depth)
        if current_state == cc.GameState.IS_DRAW:
            return None, 0
        else:
            return None, board_score(board=board,
                                     player=player,
                                     score_dict=score_dict)

    if maxplayer:
        # Initialize score
        max_score = -np.infty

        for moves in poss_actions:
            # How would a mover change my score?
            move_board = cc.apply_player_action(board=board,
                                                action=moves,
                                                player=player,
                                                copy=True)
            score = minimax(board=move_board,
                            player=player,
                            score_dict=score_dict,
                            depth=depth - 1,
                            alpha=alpha,
                            beta=beta,
                            maxplayer=False)[1]

            if score > max_score:
                max_score = score
                action = moves
            alpha = max(alpha, score)
            if beta <= alpha:
                break
        return action, max_score
    else:
        # Initialize opponent score
        min_score = np.infty
        opponent = pieces[pieces != player][0]

        for moves in poss_actions:
            # How would a mover change my score?
            move_board = cc.apply_player_action(board=board,
                                                action=moves,
                                                player=opponent,
                                                copy=True)
            score = -minimax(board=move_board,
                             player=opponent,
                             score_dict=score_dict,
                             depth=depth - 1,
                             alpha=alpha,
                             beta=beta,
                             maxplayer=True)[1]

            if score < min_score:
                min_score = score
                action = moves
            beta = min(beta, score)
        return action, min_score
    def test_connected_four_horizontal(self):
        c4_yes = common.initialize_game_state()
        common.apply_player_action(c4_yes, PlayerAction(0), common.PLAYER1)
        common.apply_player_action(c4_yes, PlayerAction(1), common.PLAYER1)
        common.apply_player_action(c4_yes, PlayerAction(2), common.PLAYER1)
        common.apply_player_action(c4_yes, PlayerAction(3), common.PLAYER1)

        c4_no = common.initialize_game_state()
        common.apply_player_action(c4_no, PlayerAction(0), common.PLAYER1)
        common.apply_player_action(c4_no, PlayerAction(1), common.PLAYER1)
        common.apply_player_action(c4_no, PlayerAction(2), common.PLAYER2)
        common.apply_player_action(c4_no, PlayerAction(3), common.PLAYER1)

        assert common.connected_four(c4_yes, PLAYER1) == True
        assert common.connected_four(c4_yes, PLAYER1, PlayerAction(3)) == True
        assert common.connected_four(c4_no, PLAYER1) == False
        assert common.connected_four(c4_no, PLAYER1, PlayerAction(3)) == False
Beispiel #6
0
def generate_move_negamax(
    board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState]
) -> Tuple[PlayerAction, Optional[SavedState]]:
    """
    Generate move using negamax -- including some workaround to force winning moves and blocking moves.

    :param board: current board state
    :param player: current player
    :param saved_state:
    :return:
    """
    open_moves = get_valid_moves(board)
    print(f'Open moves: {open_moves}')

    new_states = [
        apply_player_action(board, move, player, copy=True)
        for move in open_moves
    ]

    # if a move results in a win, play it
    winning_moves = np.array([
        check_end_state(state, player) for state in new_states
    ]) == GameState.IS_WIN
    if np.any(winning_moves):
        actions = open_moves[np.argwhere(winning_moves)].squeeze()
        if actions.size > 1:
            action = np.random.choice(actions)
        else:
            action = actions
        # print(f'playing action {action} for a win')
        return action, saved_state

    # if a move results in blocking an opponent's win, play it
    other_player = BoardPiece(player % 2 + 1)

    new_states_other = [
        apply_player_action(board, move, other_player, copy=True)
        for move in open_moves
    ]
    blocking_moves = np.array([
        check_end_state(state, other_player) for state in new_states_other
    ]) == GameState.IS_WIN
    if np.any(blocking_moves):
        actions = open_moves[np.argwhere(blocking_moves)].squeeze()
        if actions.size > 1:
            action = np.random.choice(actions)
        else:
            action = actions
        # print(f'playing action {action} for a block')
        return action, saved_state

    # otherwise, use the heuristic function to score possible states
    scores = [
        negamax_alpha_beta(state,
                           player,
                           MAX_DEPTH,
                           alpha=-np.inf,
                           beta=np.inf) for state in new_states
    ]

    # randomly select among best moves
    if np.sum(scores == np.max(scores)) > 1:
        best_moves = open_moves[np.argwhere(
            scores == np.max(scores))].squeeze()
        action = np.random.choice(best_moves)
    else:
        action = open_moves[np.argmax(scores)].squeeze()
    # print(f'Heuristic values: {scores}')
    # print(f'playing action {action} with heuristic value {np.max(scores)}')
    return action, saved_state
Beispiel #7
0

# test init, prior to creating further nodes
def test_init_node():
    assert np.all(initial_node.legal_moves == get_valid_moves(initial_state))
    assert np.all(initial_node.legal_moves == initial_node.unexpanded_moves)
    assert np.all(initial_node.board == initial_state)


# manually create a fully expanded node from the initial node
# just relies on the init method (and apply_player_action)
fully_expanded_node = copy.deepcopy(initial_node)

for move in fully_expanded_node.legal_moves:
    new_board = apply_player_action(board=fully_expanded_node.board,
                                    action=move,
                                    player=fully_expanded_node.to_play,
                                    copy=True)
    new_node = MonteCarloNode(
        new_board,
        to_play=BoardPiece(fully_expanded_node.to_play % 2 + 1),
        last_move=move,
        parent=fully_expanded_node)
    fully_expanded_node.children[move] = new_node

fully_expanded_node.expanded_moves = fully_expanded_node.legal_moves
fully_expanded_node._unexpanded_moves = []


def test_unexpanded_moves():
    assert np.all(initial_node.unexpanded_moves == initial_node.legal_moves)
    assert fully_expanded_node.unexpanded_moves == []