Ejemplo n.º 1
0
    def search(self,
               start_infoset: TichuState,
               iterations: int,
               cheat: bool = False) -> TichuAction:
        logging.debug(
            f"Starting Icarus search for {iterations} iterations; cheating: {cheat}"
        )
        # initialisation
        base_history = self.search_init(start_infoset)

        for iteration in range(iterations):
            # playout
            history = base_history.copy()
            root_state = start_infoset.determinization(
                observer_id=start_infoset.player_id, cheat=cheat)
            state = root_state
            while not state.is_terminal():
                action = self.policy(history=history, state=state)
                history.append(state=state, action=action)
                next_state = state.next_state(action, infoset=True)
                state = next_state

            # state is now terminal
            history.append(state=state, action=None)
            reward_vector = state.reward_vector()

            # backpropagation
            for record, capture_context in self.capture(history, root_state):
                self.backpropagation(record, capture_context, reward_vector)

        return self.best_action(start_infoset)
Ejemplo n.º 2
0
    def search(self,
               root_state: TichuState,
               observer_id: int,
               iterations: int,
               cheat: bool = False,
               clear_graph_on_new_root=True) -> TichuAction:
        logging.debug(
            f"started {self.__class__.__name__} with observer {observer_id}, for {iterations} iterations and cheat={cheat}"
        )
        check_param(observer_id in range(4))
        self.observer_id = observer_id
        root_nid = self._graph_node_id(root_state)

        if root_nid not in self.graph and clear_graph_on_new_root:
            _ = self.graph.clear()
        else:
            logging.debug("Could keep the graph :)")
        self.add_root(root_state)

        iteration = 0
        while iteration < iterations:
            iteration += 1
            self._init_iteration()
            # logging.debug("iteration "+str(iteration))
            state = root_state.determinization(observer_id=self.observer_id,
                                               cheat=cheat)
            # logging.debug("Tree policy")
            leaf_state = self.tree_policy(state)
            # logging.debug("rollout")
            rollout_result = self.rollout_policy(leaf_state)
            # logging.debug("backpropagation")
            assert len(rollout_result) == 4
            self.backpropagation(reward_vector=rollout_result)

        action = self.best_action(root_state)
        logging.debug(f"size of graph after search: {len(self.graph)}")
        # self._draw_graph('./graphs/graph_{}.pdf'.format(time()))
        return action