コード例 #1
0
class TestBigStateManager(unittest.TestCase):
    def setUp(self) -> None:
        self.state_manager = StateManager(8, 1)
        self.valid_action = "0,0:1"

    def test_build_board(self):
        # Check board shape
        self.assertSequenceEqual(
            self.state_manager.board.shape,
            (self.state_manager.board_size, self.state_manager.board_size),
        )

    def test_check_and_extract_action_string(self):
        self.assertEqual(
            self.state_manager.check_and_extract_action_string(
                self.valid_action),
            (0, 0, 1),
        )

    def test_perform_action(self):
        # Test update of board and state
        correct_board_string = "1" + (
            ":" + str(2)).zfill(self.state_manager.board_size**2 + 1)
        self.state_manager.perform_action(self.valid_action)
        self.assertEqual(self.state_manager.board[0, 0], 1)
        self.assertEqual(self.state_manager.get_state(), correct_board_string)
        # Do some moves
        self.state_manager.perform_action("3,3:2")
        self.state_manager.perform_action("2,3:1")
        self.state_manager.perform_action("3,2:2")
        # Check that the moves are in the graphs
        self.assertSequenceEqual(list(self.state_manager.P2graph.nodes),
                                 ["3,3:2", "3,2:2"])
        self.assertSequenceEqual(list(self.state_manager.P1graph.nodes),
                                 ["0,0:1", "2,3:1"])
        # Check that there is an edge between the two adjacent pieces
        self.assertTrue(self.state_manager.P2graph.has_edge("3,3:2", "3,2:2"))
        # There should not be an edge between the nodes of the other player
        self.assertFalse(self.state_manager.P1graph.has_edge("0,0:1", "2,3:1"))

    def test_generate_possible_actions(self):
        # Creating list of tuple with state and corresponding number of possible states
        state_list = [
            (
                self.state_manager.get_state(),
                self.state_manager.board_size**2,
            ),
            ("1" * 60 + "0" * 4 + ":1", 4),
        ]
        for state_tuple in state_list:
            self.assertEqual(
                len(
                    self.state_manager.generate_possible_actions(
                        state_tuple[0])),
                state_tuple[1],
            )
        # checking if the actions are correct
        self.assertEqual(
            self.state_manager.generate_possible_actions(
                ("10" + "1" * 62 + ":2")),
            ["0,1:2"],
        )
        self.assertSequenceEqual(
            self.state_manager.generate_possible_actions(("1" * 61 + "001:1")),
            ["7,5:1", "7,6:1"],
        )

    def test_generate_child_states(self):
        initial_state = self.state_manager.get_state()
        # checking number of child states
        self.assertEqual(
            len(self.state_manager.generate_child_states(initial_state)),
            self.state_manager.board_size**2,
        )
        self.assertEqual(
            len(self.state_manager.generate_child_states("2" * 63 + "0:1")), 1)
        # Check that the player switches
        self.assertEqual(
            self.state_manager.generate_child_states("2" * 63 + "0:1")[0][-1],
            "2")
        self.assertEqual(
            self.state_manager.generate_child_states("2" * 63 + "0:2")[0][-1],
            "1")
        # Checking that output is correct
        self.assertSequenceEqual(
            self.state_manager.generate_child_states("2" * 63 + "0:1"),
            ["2" * 63 + "1:2"],
        )
        self.assertSequenceEqual(
            self.state_manager.generate_child_states("2" * 30 + "00" +
                                                     "1" * 32 + ":2"),
            [
                "2" * 30 + "20" + "1" * 32 + ":1",
                "2" * 30 + "02" + "1" * 32 + ":1"
            ],
        )

    def test_set_state_manager(self):
        initial_state = (
            "2112102122011010211221002101022212201222022111011220021110221001:1"
        )
        self.state_manager.set_state_manager(initial_state)
        path2 = [
            "0,0:2",
            "1,0:2",
            "2,0:2",
            "3,0:2",
        ]
        path1 = [
            "0,4:1",
            "1,3:1",
            "2,2:1",
            "3,1:1",
            "4,0:1",
        ]

        # Check some of the paths in the state
        def check_path(path, correct_graph, invalid_graph):
            for i in range(len(path) - 1):
                self.assertTrue(correct_graph.has_edge(path[i], path[i + 1]))
                self.assertFalse(invalid_graph.has_edge(path[i], path[i + 1]))
            self.assertTrue(nx.has_path(correct_graph, path[0], path[-1]))

        check_path(path2, self.state_manager.P2graph,
                   self.state_manager.P1graph)
        check_path(path1, self.state_manager.P1graph,
                   self.state_manager.P2graph)

    def test_is_end_state(self):
        end_state_p1 = (
            "2112102122011010211221002101022212201222122111011220021110221001:2"
        )
        self.state_manager.set_state_manager(end_state_p1)
        self.assertTrue(self.state_manager.is_end_state())

    def test_convert_flattened_index_to_cords(self):
        # index 8 should be second row first col
        self.assertSequenceEqual(
            self.state_manager.convert_flattened_index_to_cords(8), (1, 0))
        self.assertSequenceEqual(
            self.state_manager.convert_flattened_index_to_cords(18), (2, 2))
コード例 #2
0
class GameVisualizer:
    def __init__(
        self,
        board_size,
        player1=None,
        player2=None,
        starting_player=1,
        random_play=False,
        frame_rate=1000,
        initial_state=None,
        cartesian_cords=True,
    ):
        # Game logic
        self.board_size = board_size
        self.initial_state = initial_state
        self.state_manager = StateManager(board_size, starting_player)
        if initial_state:
            self.state_manager.set_state_manager(initial_state)
        self.random_play = random_play

        # Setting players
        self.player1 = player1
        self.player2 = player2

        # WINDOW
        self.master = Tk()
        self.master.title("HexGameVisualizer")
        self.master.protocol("WM_DELETE_WINDOW", self.quit_application)

        self.action_input = Entry(self.master)
        self.action_input.bind("<Return>", lambda event: self.button_clicked())
        self.action_input.pack()

        self.perform_action_button = Button(
            self.master, text="perform move", command=self.button_clicked
        )
        self.perform_action_button.pack()

        # TODO: Add label to describe the players currently playing
        self.label = Label(self.master)
        self.label.pack()

        self.start_pos = (60, 30)
        self.canvas = Canvas(
            self.master,
            width=self.start_pos[0] + self.board_size * 55 + self.start_pos[0],
            height=self.start_pos[1] + self.board_size * 33 + self.start_pos[1],
        )
        self.canvas.pack()

        # CONSTANTS
        self.frame_rate = frame_rate
        self.border_size = 10
        self.counter = 0
        self.size = 20
        self.cartesian_cords = cartesian_cords

        # LISTS CONTROLLING GAME AND DRAWING OF BOARD
        self.board = []
        self.board_border = []
        self.actions = []
        self.player_pieces = []

    def quit_application(self):
        import sys

        self.master.quit()
        sys.exit()

    def add_action(self, action: str):
        self.actions.append(action)

    def preprocess_actions(self):
        new_actions = []
        for action in self.actions:
            new_actions.append(GameVisualizer.preprocess_action(action))
        return new_actions

    @staticmethod
    def preprocess_action(action: str):
        positions, player = action.split(":")
        x_pos, y_pos = positions.split(",")
        return int(x_pos), int(y_pos), int(player)

    def run(self):
        self.actions = self.preprocess_actions()
        self.build_and_draw_board()
        if self.initial_state:
            self.state_manager.set_state_manager(self.initial_state)
            self.draw_initial_state()
        if len(self.actions):
            self.master.after(self.frame_rate, self.draw)
        mainloop()

    def model_perform_action(self, model: ANET):
        print(self.state_manager.get_state())
        distribution = model.predict(self.state_manager.get_state())
        print(distribution)
        argmax_distribution_index = int(
            np.argmax(distribution)
        )  # Greedy best from distribution
        action = self.state_manager.get_action_from_flattened_board_index(
            argmax_distribution_index, self.state_manager.get_state()
        )
        self.perform_action(GameVisualizer.preprocess_action(action))

    def button_clicked(self):
        if self.state_manager.is_end_state():
            return
        try:
            current_player = self.player1 if self.state_manager.current_player() == 1 else self.player2
            if current_player:
                self.model_perform_action(current_player)
            else:
                input_action = (
                    f"{self.action_input.get()}:{self.state_manager.current_player()}"
                )
                if self.random_play:
                    input_action = random.choice(
                        self.state_manager.generate_possible_actions(
                            self.state_manager.get_state()
                        )
                    )
                self.perform_action(GameVisualizer.preprocess_action(input_action))
            self.action_input.delete(0, "end")
        except ValueError:
            self.label["text"] = "Something went wrong"
        if self.state_manager.is_end_state():
            self.label["text"] = "Game over"

    def draw_initial_state(self):
        initial_board = self.state_manager.build_board(self.initial_state)
        for row_index, row in enumerate(initial_board):
            for col_index, player in enumerate(row):
                if player:
                    self.player_pieces.append(
                        Cell(
                            self.canvas,
                            self.board[row_index][col_index].top,
                            player=player,
                        )
                    )

    def get_canvas_position(self, position: (int, int)) -> (int, int):
        x, y = self.start_pos
        x += self.size * 2 * position[1] + self.size * position[0]
        y += (self.size + self.size / 1.7) * position[0]
        return x, y

    def build_and_draw_board(self):
        for i in range(self.board_size):
            row = []
            for j in range(self.board_size):
                row.append(
                    Cell(
                        self.canvas,
                        self.get_canvas_position((i, j)),
                        draw_on_init=False,
                    )
                )
            self.board.append(row)
        self.draw_board_border()
        for row_index, row in enumerate(self.board):
            for col_index, cell in enumerate(row):
                self.board[row_index][col_index].draw()

    def get_column(self, target_col_index: int):
        column = []
        for row_index, row in enumerate(self.board):
            for col_index, cell in enumerate(row):
                if target_col_index == col_index:
                    column.append(cell)
        return column

    def draw_board_border(self):
        borders = []
        first_row = self.board[0]
        borders.append(
            self.canvas.create_polygon(
                first_row[0].left1[0],
                first_row[0].left1[1],
                first_row[0].left1[0] - self.border_size,
                first_row[0].left1[1] - self.border_size,
                first_row[0].left1[0],
                first_row[0].left1[1] - 2 * self.border_size,
                first_row[-1].top[0] + 2 * self.border_size,
                first_row[-1].right1[1] - 2 * self.border_size,
                first_row[-1].top[0],
                (first_row[-1].right1[1] + first_row[-1].right2[1]) / 2,
                fill=PLAYER_ONE_COLOR,
            )
        )
        [
            borders.append(
                self.canvas.create_text(
                    cell.top[0] - 1.5 * self.border_size,
                    cell.top[1],
                    text=str(i) if self.cartesian_cords else chr(65 + i),
                    fill=FONT_COLOR,
                    font=(FONT, FONT_SIZE),
                )
            )
            for i, cell in enumerate(first_row)
        ]
        first_column = self.get_column(0)
        borders.append(
            self.canvas.create_polygon(
                first_column[0].left1[0],
                first_column[0].left1[1],
                first_column[0].left1[0] - self.border_size,
                first_column[0].left1[1] - self.border_size,
                first_column[0].left1[0] - 2 * self.border_size,
                first_column[0].left1[1],
                first_column[-1].bottom[0] - 2 * self.border_size,
                first_column[-1].left2[1] + 2 * self.border_size,
                first_column[-1].bottom[0],
                (first_column[-1].left2[1] + first_column[-1].left1[1]) / 2,
                fill=PLAYER_TWO_COLOR,
            )
        )
        [
            borders.append(
                self.canvas.create_text(
                    cell.left1[0] - self.border_size / 1.5,
                    (cell.left1[1] + cell.left2[1]) / 2,
                    text=str(i) if self.cartesian_cords else str(i + 1),
                    fill=FONT_COLOR,
                    font=(FONT, FONT_SIZE),
                )
            )
            for i, cell in enumerate(first_column)
        ]
        last_row = self.board[-1]
        borders.append(
            self.canvas.create_polygon(
                last_row[0].bottom[0],
                (last_row[0].right1[1] + last_row[0].right2[1]) / 2,
                last_row[0].bottom[0] - 2 * self.border_size,
                last_row[0].right2[1] + 2 * self.border_size,
                last_row[-1].right2[0],
                last_row[-1].right2[1] + 2 * self.border_size,
                last_row[-1].right2[0] + self.border_size,
                last_row[-1].right2[1] + self.border_size,
                last_row[-1].right2[0],
                last_row[-1].right2[1],
                fill=PLAYER_ONE_COLOR,
            )
        )
        [
            borders.append(
                self.canvas.create_text(
                    cell.bottom[0] + 1.2 * self.border_size,
                    cell.bottom[1],
                    text=str(i) if self.cartesian_cords else chr(65 + i),
                    fill=FONT_COLOR,
                    font=(FONT, FONT_SIZE),
                )
            )
            for i, cell in enumerate(last_row)
        ]
        last_column = self.get_column(self.board_size - 1)
        borders.append(
            # Bottom up
            self.canvas.create_polygon(
                last_column[-1].right2[0],
                last_column[-1].right2[1],
                last_column[-1].right2[0] + self.border_size,
                last_column[-1].right2[1] + self.border_size,
                last_column[-1].right2[0] + 2 * self.border_size,
                last_column[-1].right2[1],
                last_column[0].top[0] + 2 * self.border_size,
                last_column[0].right1[1] - 2 * self.border_size,
                last_column[0].top[0],
                (last_column[0].right1[1] + last_column[0].right2[1]) / 2,
                fill=PLAYER_TWO_COLOR,
            )
        )
        [
            borders.append(
                self.canvas.create_text(
                    cell.right1[0] + self.border_size / 1.5,
                    (cell.left1[1] + cell.left2[1]) / 2,
                    text=str(i) if self.cartesian_cords else str(i + 1),
                    fill=FONT_COLOR,
                    font=(FONT, FONT_SIZE),
                )
            )
            for i, cell in enumerate(last_column)
        ]

        return borders

    def get_board_pos(self, pos: (int, int)):
        return self.board_size * pos[0] + pos[1]

    def get_cords(self, board_pos: int):
        return math.floor(board_pos / self.board_size), board_pos % self.board_size

    def draw(self):
        self.perform_action(self.actions.pop(0))
        if len(self.actions) > 0:
            self.master.after(self.frame_rate, self.draw)

    def perform_action(self, action: (int, int, int)):
        print(action)
        x_pos, y_pos, player = action
        self.player_pieces.append(
            Cell(self.canvas, self.board[x_pos][y_pos].top, player=player,)
        )
        self.state_manager.perform_action(f"{x_pos},{y_pos}:{player}")
コード例 #3
0
class MCTS:
    def __init__(
        self,
        state_manager: StateManager,
        actor_net,
        max_tree_height=5,
        c=1,
        number_of_simulations=10,
        verbose=False,
        random_simulation_rate=0.2,
    ):
        self.state_manager = StateManager(state_manager.board_size,
                                          state_manager.current_player())
        self.tree = StateTree(self.state_manager.get_state())
        self.tree.add_state_node(self.tree.root_state,
                                 self.state_manager.is_end_state())
        self.c = c
        self.max_tree_height = max_tree_height
        self.actor_net = actor_net
        self.number_of_simulations = number_of_simulations
        self.verbose = verbose
        self.random_simulation_rate = random_simulation_rate

    def run(self, root_state: str, progress: float):
        """
        Main method: Runs the monte carlo tree search algorithm, tree traversal -> rollout -> backprop, m times.
        Then finds the greedy best move from root state of the current tree
        :param root_state: state to run the algorithm from -> root node
        :return: the greedy best action from root node of the current tree
        """
        self.tree.cut_tree_with_new_root_node(root_state)
        self.state_manager.set_state_manager(self.tree.root_state)
        for i in range(self.number_of_simulations):
            rollout_state = self.traverse_tree(self.tree.root_state, depth=0)
            simulation_reward = self.simulate(rollout_state)
            self.backpropagate(rollout_state, simulation_reward)
            self.state_manager.set_state_manager(self.tree.root_state)

        distribution = self.get_distribution(self.tree.root_state)
        self.actor_net.add_case(self.tree.root_state, distribution.copy())
        if random.random() > math.tanh(progress):
            chosen_action = self.choose_action_stochastically(
                np.array(distribution), self.tree.root_state)
        else:
            chosen_action = self.epsilon_greedy_action_from_distribution(
                np.array(distribution), self.tree.root_state, epsilon=0.0)
        if self.verbose:
            print("distribution", distribution)
            print("chosen_action", chosen_action)
        return chosen_action

    # MAIN ALGORITHM METHODS
    def traverse_tree(self, state: str, depth: int) -> str:
        """
        Traversing the tree expanding nodes by using the tree policy (tree_policy)
        :param state: current state
        :param depth: current depth of the tree
        :return chosen state to simulate from
        """
        if depth == self.max_tree_height or self.tree.is_end_state(state):
            return state
        # If the current state has not explored it's children yet: Add all to graph and chose one to simulate from
        elif not self.tree.get_outgoing_edges(state):
            children = self.expand(state)
            return self.choose_random_child(state, children)
        else:
            child = self.tree_policy(state)
            self.state_manager.check_difference_and_perform_action(child)
            if self.tree.get_state_number_of_visits(child) == 0:
                self.tree.set_end_state(child,
                                        self.state_manager.is_end_state())
            return self.traverse_tree(child, depth + 1)

    def expand(self, state) -> [str]:
        """
        Expanding all child nodes from the input state and adding them to the graph.
        :param state: state to find all children from
        :return: list of all child states
        """
        children = StateManager.generate_child_states(state)
        for child in children:
            if child not in self.tree.get_nodes():
                self.tree.add_state_node(child)
            self.tree.add_edge(state, child)
        return children

    def simulate(self, state: str):
        """
        Performs one roll-out using the actor net as policy
        :return: return 1 if the simulation ends in player "true" winning, -1 otherwise
        """
        if self.state_manager.get_state() != state:
            raise ValueError(
                "The state manager is not set to the start of the simulation")
        while not self.state_manager.is_end_state():
            if random.random() < self.random_simulation_rate:
                distribution = self.actor_net.predict(
                    self.state_manager.get_state())
                chosen_action = self.epsilon_greedy_action_from_distribution(
                    distribution, self.state_manager.get_state(), epsilon=0.0)
            else:
                chosen_action = random.choice(
                    self.state_manager.generate_possible_actions(
                        self.state_manager.get_state()))
            self.state_manager.perform_action(chosen_action)
        return MCTS.get_end_state_reward(self.state_manager.current_player())

    def backpropagate(self, state: str, simulation_reward: int):
        """
        Starts at rollout start state and jumps up in the tree updating the nodes sap and number of visits
        :param state: rollout start state
        :param simulation_reward: reward from simulation
        """
        if state == self.tree.root_state:
            self.tree.increment_state_number_of_visits(state)
            return
        parent_state = self.tree.get_parent(state)

        self.tree.increment_state_number_of_visits(state)
        self.tree.increment_edge_number_of_visits(parent_state, state)
        edge_times_enc = self.tree.get_edge_number_of_visits(
            parent_state, state)
        edge_sap_value = self.tree.get_sap_value(parent_state, state)
        new_sap_value = (self.tree.get_sap_value(parent_state, state) +
                         (simulation_reward - edge_sap_value) / edge_times_enc)
        self.tree.set_sap_value(parent_state, state, new_sap_value)
        self.tree.set_active_edge(parent_state, state, False)

        self.backpropagate(parent_state, simulation_reward)

    # HELPER METHODS

    def tree_policy(self, state: str) -> str:
        """
        Using the uct score to determine the child state of a input state
        :param state: input state
        :return: child state
        """
        state_number_of_visits = self.tree.get_state_number_of_visits(state)
        if self.state_manager.get_player(state) == 1:
            best_edge = self.tree.get_outgoing_edges(
                state,
                sort_by_function=lambda edge: self.compute_uct(
                    self.tree.get_sap_value(*edge),
                    state_number_of_visits,
                    self.tree.get_edge_number_of_visits(*edge),
                    True,
                ),
            )[0]
        else:
            best_edge = self.tree.get_outgoing_edges(
                state,
                sort_by_function=lambda edge: self.compute_uct(
                    self.tree.get_sap_value(*edge),
                    state_number_of_visits,
                    self.tree.get_edge_number_of_visits(*edge),
                    False,
                ),
            )[-1]
        parent, best_child = best_edge
        self.tree.set_active_edge(parent, best_child, True)
        return best_child

    def compute_uct(
        self,
        sap_value: float,
        number_of_visits_node: int,
        number_of_visits_edge: int,
        maximizing_player: bool,
    ) -> float:
        """
        Computes the uct for the tree policy
        :param sap_value: sap value for the edge
        :param number_of_visits_node: number of visits for the parent state
        :param number_of_visits_edge: number of visits for the edge between the two nodes
        :param maximizing_player: if the current player is the maximizing player
        :return: uct value
        """
        uct = sap_value
        usa_term = self.c * math.sqrt(
            math.log(number_of_visits_node) / (1 + number_of_visits_edge))
        if maximizing_player:
            uct += usa_term
        else:
            uct -= usa_term
        return uct

    def greedy_best_action(self, state: str) -> str:
        sorted_list = self.tree.get_outgoing_edges(
            state,
            sort_by_function=lambda edge: self.tree.get_edge_number_of_visits(
                *edge),
        )
        return self.state_manager.get_action(*sorted_list[0])

    def choose_random_child(self, parent_state: str, child_list: [str]) -> str:
        """
        Helper method choosing a random state from the child list, updating state manager
        and adding edge and node parameters
        :param parent_state: parent state for the child list (to set edge parameters)
        :param child_list: list of children from parent state
        :return: chosen child
        """
        child = random.choice(child_list)
        self.state_manager.check_difference_and_perform_action(child)
        self.tree.set_end_state(child, self.state_manager.is_end_state())
        self.tree.set_active_edge(parent_state, child, True)
        return child

    def epsilon_greedy_action_from_distribution(self,
                                                distribution: np.ndarray,
                                                state: str,
                                                epsilon=0.2):
        """
        Chooses an epsilon greedy index from the distribution converting that index to an action
        :param distribution: distribution from number of simulations per node
        :param state: current state to calculate action
        :param epsilon: the epsilon value to be used
        :return: actionstring
        """
        if random.random() > epsilon:
            chosen_index = int(np.argmax(distribution))
        else:
            # Choose random state from those with positive probability
            # prob == 0 might be occupied cells on the board
            if not [
                    i[0]
                    for i, prob in np.ndenumerate(distribution) if prob > 0
            ]:
                chosen_index = int(np.argmax(distribution))
            else:
                chosen_index = random.choice([
                    i[0] for i, prob in np.ndenumerate(distribution)
                    if prob > 0
                ])
        return self.state_manager.get_action_from_flattened_board_index(
            chosen_index, state)

    @staticmethod
    def get_end_state_reward(current_player: int) -> int:
        """
        We have chosen player 1 to be "us", giving a positive reward if player 1 wins.
        :param current_player: current player for the state manager
        :return: reward for end state
        """
        return -1 if current_player == 1 else 1

    def get_distribution(self, state: str):
        """
        Returns the distribution of total visits for child nodes of input state
        :param state: state to get distribution from
        :return: a normalized list of length equal to the total number of positions on the board
        """

        parent_board, parent_player = StateManager.extract_state(state)
        child_states = self.tree.get_child_states(state)
        change_indices_dict = {}
        total_visits = 0
        for child in child_states:
            child_board, child_player = StateManager.extract_state(child)
            for i in range(len(child_board)):
                if parent_board[i] != child_board[i]:
                    child_number_of_visits = self.tree.get_edge_number_of_visits(
                        state, child)
                    change_indices_dict[i] = child_number_of_visits
                    total_visits += child_number_of_visits
                    break

        return [
            change_indices_dict[index] /
            total_visits if index in change_indices_dict else 0
            for index in range(self.state_manager.board_size**2)
        ]

    def set_random_simulation_rate(self, new_rate: float):
        self.random_simulation_rate = new_rate

    def choose_action_stochastically(self, distribution, state):
        chosen_index = np.random.choice([i for i in range(len(distribution))],
                                        p=distribution)
        return self.state_manager.get_action_from_flattened_board_index(
            chosen_index, state)