def expand(self, leaf_state: TichuState) -> None: leaf_nid = self._graph_node_id(leaf_state) for action in leaf_state.possible_actions_gen(): to_nid = self._graph_node_id(state=leaf_state.next_state(action)) self.add_child_node(from_nid=leaf_nid, to_nid=to_nid, action=action)
def _tree_policy(self, history: StateActionHistory, state: TichuState) -> TichuAction: """ :param history: :param state: Any Game-state in the game_graph, but may be a leaf :return: The selected action """ self._visited.add(state) # find max (return uniformly at random from max utc) poss_actions = set(state.possible_actions()) max_val = 0 max_actions = list() for _, to_infoset, action in self.graph.out_edges_iter(nbunch=[state], data='action', default=None): if action in poss_actions: child_n = self.graph.node[to_infoset] self._possible.add(to_infoset) val = child_n['record'].uct(p=to_infoset.player_id) if max_val == val: max_actions.append(action) elif max_val < val: max_val = val max_actions = [action] ret = random.choice(max_actions) # logging.debug(f"tree policy -> {ret}") return ret
def search(self, start_infoset: TichuState, iterations: int, cheat: bool = False) -> TichuAction: logging.debug( f"Starting Icarus search for {iterations} iterations; cheating: {cheat}" ) # initialisation base_history = self.search_init(start_infoset) for iteration in range(iterations): # playout history = base_history.copy() root_state = start_infoset.determinization( observer_id=start_infoset.player_id, cheat=cheat) state = root_state while not state.is_terminal(): action = self.policy(history=history, state=state) history.append(state=state, action=action) next_state = state.next_state(action, infoset=True) state = next_state # state is now terminal history.append(state=state, action=None) reward_vector = state.reward_vector() # backpropagation for record, capture_context in self.capture(history, root_state): self.backpropagation(record, capture_context, reward_vector) return self.best_action(start_infoset)
def best_action(self, state: TichuState) -> TichuAction: """ :param state: :return: The best action to play from the given state """ nid = self._graph_node_id(state) assert nid in self.graph assert self.graph.out_degree(nid) > 0 possactions = state.possible_actions() max_a = next(iter(possactions)) max_v = -float('inf') for _, to_nid, action in self.graph.out_edges_iter(nid, data='action', default=None): if action in possactions: rec = self.graph.node[to_nid]['record'] val = rec.ucb[state.player_id] logging.debug(f" {val}->{action}: {rec}") if val > max_v: max_v = val max_a = action return max_a
def tree_selection(self, state: TichuState) -> TichuAction: """ :param state: :return: """ # logging.debug("Tree selection") nid = self._graph_node_id(state) # store record for backpropagation rec = self.graph.node[nid]['record'] self._visited_records.add(rec) # find max (return uniformly at random from max UCB1 value) poss_actions = set(state.possible_actions()) max_val = -float('inf') max_actions = list() for _, to_nid, action in self.graph.out_edges_iter(nbunch=[nid], data='action', default=None): # logging.debug("Tree selection looking at "+str(action)) if action in poss_actions: child_record = self.graph.node[to_nid]['record'] self._available_records.add(child_record) val = child_record.ucb(p=state.player_id) if max_val == val: max_actions.append(action) elif max_val < val: max_val = val max_actions = [action] next_action = random.choice(max_actions) # logging.debug(f"Tree selection -> {next_action}") return next_action
def _start_search(self, start_state: TichuState) -> TichuAction: logging.debug( f"agent {self.name} (pos {self._position}) starts search.") start_t = time.time() if len(start_state.possible_actions()) == 1: logging.debug( f"agent {self.name} (pos {self._position}) there is only one action to play." ) action = next(iter(start_state.possible_actions())) else: action = self.search(start_state) logging.debug( f"agent {self.name} (pos {self._position}) found action: {action} (time: {time.time()-start_t})" ) return action
def _expand_tree(self, leaf_state: TichuState) -> None: """ Expand all possible actions from the leaf_state :param history: The StateActionHistory up to the leaf_state. leaf_state not included. Following should hold: history.last_state.next_state(history.last_action) == leaf_state :param leaf_state: :return: None """ # logging.debug('expanding tree') leaf_infostate = TichuState.from_tichustate(leaf_state) for action in leaf_state.possible_actions_gen(): to_infoset = TichuState.from_tichustate( leaf_state.next_state(action)) self._add_new_node_if_not_yet_added(infoset=to_infoset) self._add_new_edge(from_infoset=leaf_infostate, to_infoset=to_infoset, action=action)
def evaluate_state(self, state: TichuState) -> RewardVector: """ :param state: :return: """ points = state.count_points() assert points[0] == points[2] and points[1] == points[3] # reward is the difference to the enemy team r0 = points[0] - points[1] r1 = r0 * -1 return (r0, r1, r0, r1)
def _must_expand(self, state: TichuState): if self._expanded: return False poss_acs = set(state.possible_actions()) existing_actions = { action for _, _, action in self.graph.out_edges_iter( nbunch=[state], data='action', default=None) } if len(existing_actions) < len(poss_acs): return True # if all possible actions already exist -> must not expand return not poss_acs.issubset(existing_actions)
def is_fully_expanded(self, state: TichuState) -> bool: poss_acs = set(state.possible_actions()) existing_actions = { action for _, _, action in self.graph.out_edges_iter( nbunch=[self._graph_node_id(state)], data='action', default=None) } if len(existing_actions) < len(poss_acs): return False # if all possible actions already exist -> is fully expanded return poss_acs.issubset(existing_actions)
def _create_tichu_state(self, round_history, wish: Optional[CardValue], trick_on_table: Trick) -> TichuState: return TichuState( player_id=self.position, hand_cards=round_history.last_handcards, won_tricks=round_history.won_tricks, trick_on_table=trick_on_table, wish=wish, ranking=tuple(round_history.ranking), announced_tichu=frozenset(round_history.announced_tichus), announced_grand_tichu=frozenset( round_history.announced_grand_tichus), history=tuple([ a for a in round_history.events if isinstance(a, (SimpleWinTrickEvent, CombinationAction, PassAction)) ]))
def search(self, root_state: TichuState, observer_id: int, iterations: int, cheat: bool = False, clear_graph_on_new_root=True) -> TichuAction: logging.debug( f"started {self.__class__.__name__} with observer {observer_id}, for {iterations} iterations and cheat={cheat}" ) check_param(observer_id in range(4)) self.observer_id = observer_id root_nid = self._graph_node_id(root_state) if root_nid not in self.graph and clear_graph_on_new_root: _ = self.graph.clear() else: logging.debug("Could keep the graph :)") self.add_root(root_state) iteration = 0 while iteration < iterations: iteration += 1 self._init_iteration() # logging.debug("iteration "+str(iteration)) state = root_state.determinization(observer_id=self.observer_id, cheat=cheat) # logging.debug("Tree policy") leaf_state = self.tree_policy(state) # logging.debug("rollout") rollout_result = self.rollout_policy(leaf_state) # logging.debug("backpropagation") assert len(rollout_result) == 4 self.backpropagation(reward_vector=rollout_result) action = self.best_action(root_state) logging.debug(f"size of graph after search: {len(self.graph)}") # self._draw_graph('./graphs/graph_{}.pdf'.format(time())) return action
def _rollout_policy(self, history: StateActionHistory, state: TichuState) -> TichuAction: ret = state.random_action() # logging.debug(f"rollout policy -> {ret}") return ret
def _graph_node_id(self, state: TichuState) -> NodeID: return state.position_in_episode()
def evaluate_state(self, state: TichuState) -> RewardVector: points = state.count_points() assert points[0] == points[2] and points[1] == points[3] return points
def _graph_node_id(self, state: TichuState) -> NodeID: return state.unique_infoset_id(self.observer_id)