def build_book(book, num_rounds=100):

    for num in range(num_rounds):
        state = Isolation()
        states = []

        while state.ply_count <= 3:
            action = random.choice(state.actions())
            player = state.player()
            states.append((state, player, action))
            state = state.result(action)

        while not state.terminal_test():
            action = alpha_beta(state, state.player())
            player = state.player()
            state = state.result(action)

        win_0 = state.utility(0) > 0
        win_1 = state.utility(1) > 0
        assert win_0 != win_1

        for s in states:
            state = s[0]
            player = s[1]
            action = s[2]

            if win_0:
                if player == 0:
                    book[state][action] += 1
                else:
                    book[state][action] += -1
            else:
                if player == 0:
                    book[state][action] += -1
                else:
                    book[state][action] += 1

    return book
 def _simulation(self, state: Isolation, leaf_player_id) -> float:
     while True:
         if state.terminal_test(): return state.utility(leaf_player_id)
         state = state.result(random.choice(state.actions()))
class GenomeTester:
    def __init__(self, init_cell, genome, search_depth):
        assert (init_cell <= 114) and (init_cell >=
                                       0), "Invalid opening cell value"
        self.init_cell = init_cell
        self.board = Isolation()
        self.player0_moves = 0
        self.player1_moves = 0
        self.genome = genome
        self.search_depth = search_depth
        self.active_player = 0
        self.move_history = []

    def run(self):
        ############################
        ####### mini max  ##########
        def minimax(state, depth, player_id):
            def min_value(state, depth, player_id):
                if state.terminal_test(): return state.utility(player_id)
                if depth <= 0: return score(state, player_id)
                value = float("inf")
                for action in state.actions():
                    value = min(
                        value,
                        max_value(state.result(action), depth - 1, player_id))
                return value

            def max_value(state, depth, player_id):
                if state.terminal_test(): return state.utility(player_id)
                if depth <= 0: return score(state, player_id)
                value = float("-inf")
                for action in state.actions():
                    value = max(
                        value,
                        min_value(state.result(action), depth - 1, player_id))
                return value

            return max(
                state.actions(),
                key=lambda x: min_value(state.result(x), depth - 1, player_id))

        def score(state, player_id):
            own_loc = state.locs[player_id]
            opp_loc = state.locs[1 - player_id]
            own_liberties = state.liberties(own_loc)
            opp_liberties = state.liberties(opp_loc)
            return len(own_liberties) - len(opp_liberties)

        ####### mini max  ##########
        ############################

        if self.player0_moves == 0:
            self.board = self.board.result(self.init_cell)

        if self.player1_moves == 0:
            self.board = self.board.result(random.choice(self.board.actions()))

        while not self.board.terminal_test():
            if self.active_player == 0:
                if self.player0_moves < len(self.genome):
                    next_move = self.genome[self.player0_moves]
                    if next_move not in self.board.actions():
                        # move is most likely blocked (not as bad as a loss)
                        #return self.genome, NEG_INF_INT
                        next_move = minimax(self.board,
                                            self.search_depth,
                                            player_id=0)
                else:
                    next_move = minimax(self.board,
                                        self.search_depth,
                                        player_id=0)
                self.player0_moves += 1
                self.active_player = 1
            else:
                next_move = minimax(self.board, self.search_depth, player_id=1)
                self.player1_moves += 1
                self.active_player = 0
            self.board = self.board.result(next_move)
            if self.player0_moves < len(self.genome):
                self.move_history.append(next_move)

        player0_score = self.board.utility(player_id=0)
        if player0_score < 0:  # lost
            return self.genome, float("-inf")
        elif player0_score == 0:  # game didnt finish
            return self.genome, NEG_INF_INT
        else:
            return self.genome, -1.0 * (self.player0_moves +
                                        self.player1_moves)