Exemple #1
0
def _get_nl_leduc_tree(env_args=None):
    if env_args is None:
        env_args = DiscretizedNLLeduc.ARGS_CLS(
            n_seats=2,
            starting_stack_sizes_list=[1000, 1000],
            bet_sizes_list_as_frac_of_pot=[1.0])

    env_bldr = HistoryEnvBuilder(env_cls=DiscretizedNLLeduc, env_args=env_args)

    _tree = PublicTree(
        env_bldr=env_bldr,
        stack_size=env_args.starting_stack_sizes_list,
        stop_at_street=None,
    )

    _tree.build_tree()

    for p in range(env_bldr.N_SEATS):
        _tree.fill_uniform_random()
    _tree.compute_ev()

    _tree.export_to_file()
    print("Tree with stack size", _tree.stack_size, "has", _tree.n_nodes,
          "nodes out of which", _tree.n_nonterm, "are non-terminal.")
    print(np.mean(_tree.root.exploitability) * env_bldr.env_cls.EV_NORMALIZER)

    return _tree
Exemple #2
0
 def __init__(self, t_prof, br_agent, mode=None, device=None):
     super().__init__(t_prof=t_prof, mode=mode, device=device)
     
     self.tree = PublicTree(
         env_bldr=rl_util.get_env_builder(t_prof=t_prof),
         stack_size=t_prof.eval_stack_sizes[0],
         stop_at_street=None,
         put_out_new_round_after_limit=True,
         is_debugging=t_prof.DEBUGGING
     )
     self.tree.build_tree()
     self.br_agent = br_agent # agent to play best response against
     self.solve_br()
     
     self.modes = ["EVAL", "BR", "BAYESIAN"]
     if mode:
         self.mode = mode
     else:
         self.mode = "EVAL" # default is eval
         
     if self.mode == "BAYESIAN":
         self._fill_tree_w_prior()
Exemple #3
0
    def __init__(self, t_prof, chief_handle, eval_agent_cls):
        super().__init__(t_prof=t_prof, eval_env_bldr=rl_util.get_env_builder(t_prof=t_prof), chief_handle=chief_handle,
                         eval_type="BR")
        self._env_bldr = rl_util.get_env_builder(t_prof=t_prof)

        assert self._env_bldr.N_SEATS == 2

        self._eval_agent = eval_agent_cls(t_prof=t_prof)

        self._game_trees = [
            PublicTree(env_bldr=self._env_bldr,
                       stack_size=stack_size,
                       stop_at_street=None,
                       put_out_new_round_after_limit=True,
                       is_debugging=self._t_prof.DEBUGGING)
            for stack_size in self._t_prof.eval_stack_sizes
        ]

        for gt in self._game_trees:
            gt.build_tree()
            print("Tree with stack size", gt.stack_size, "has", gt.n_nodes, "nodes out of which", gt.n_nonterm,
                  "are non-terminal.")
Exemple #4
0
    def _evaluate_avg_strats(self):
        expl_totals = []
        for t_idx in range(len(self._trees)):
            METRIC = self._env_bldrs[t_idx].env_cls.WIN_METRIC
            eval_tree = PublicTree(
                env_bldr=self._env_bldrs[t_idx],
                stack_size=self._env_args[t_idx].starting_stack_sizes_list,
                stop_at_street=None,
                is_debugging=False,
            )
            eval_tree.build_tree()

            def _fill(_node_eval, _node_train):
                if _node_eval.p_id_acting_next != eval_tree.CHANCE_ID and (
                        not _node_eval.is_terminal):
                    _node_eval.strategy = np.copy(
                        _node_train.data["avg_strat"])
                    assert np.allclose(np.sum(_node_eval.strategy, axis=1),
                                       1,
                                       atol=0.0001)

                for c_eval, c_train in zip(_node_eval.children,
                                           _node_train.children):
                    _fill(_node_eval=c_eval, _node_train=c_train)

            # sets up some stuff; we overwrite strategy afterwards
            eval_tree.fill_uniform_random()

            # fill with strat
            _fill(_node_eval=eval_tree.root,
                  _node_train=self._trees[t_idx].root)
            eval_tree.update_reach_probs()

            # compute EVs
            eval_tree.compute_ev()

            eval_tree.export_to_file(name=self._name + "_Avg_" +
                                     str(self._iter_counter))

            # log
            expl_p = [
                float(eval_tree.root.exploitability[p]) *
                self._env_bldrs[t_idx].env_cls.EV_NORMALIZER
                for p in range(eval_tree.n_seats)
            ]
            expl_total = sum(expl_p) / eval_tree.n_seats
            expl_totals.append(expl_total)

            self._chief_handle.add_scalar(self._exps_avg_total[t_idx],
                                          "Evaluation/" + METRIC,
                                          self._iter_counter, expl_total)

        expl_total_averaged = sum(expl_totals) / float(len(expl_totals))
        self._chief_handle.add_scalar(self._exp_all_averaged_avg_total,
                                      "Evaluation/" + METRIC,
                                      self._iter_counter, expl_total_averaged)
Exemple #5
0
    def __init__(
        self,
        name,
        chief_handle,
        game_cls,
        agent_bet_set,
        algo_name,
        starting_stack_sizes=None,
    ):
        """
        Args:
            name (str):                             Under this name all logs, data, and checkpoints will appear.
            chief_handle (ChiefBase):               Reference to chief worker
            game_cls (PokerEnv subclass):           Class (not instance) to be trained in.
            agent_bet_set (iterable):               Choosing a bet-set from bet_sets.py is recommended. If solving a
                                                    Limit poker game, this value will not be considered, but must still
                                                    be passed. Just set this to any list of floats (e.g. [0.0])
            starting_stack_sizes (list of ints):    For each stack size in this list, a CFR strategy will be computed.
                                                    Results are logged individually and averaged (uniform).
                                                    If None, takes the default for the game.
        """

        self._name = name
        self._n_seats = 2

        self._chief_handle = chief_handle

        if starting_stack_sizes is None:
            self._starting_stack_sizes = [game_cls.DEFAULT_STACK_SIZE]
        else:
            self._starting_stack_sizes = copy.deepcopy(starting_stack_sizes)
        self._game_cls_str = game_cls.__name__

        self._env_args = [
            game_cls.ARGS_CLS(
                n_seats=self._n_seats,
                starting_stack_sizes_list=[
                    start_chips for _ in range(self._n_seats)
                ],
                bet_sizes_list_as_frac_of_pot=agent_bet_set,
            ) for start_chips in self._starting_stack_sizes
        ]
        self._env_bldrs = [
            HistoryEnvBuilder(env_cls=get_env_cls_from_str(self._game_cls_str),
                              env_args=self._env_args[s])
            for s in range(len(self._starting_stack_sizes))
        ]

        self._trees = [
            PublicTree(
                env_bldr=self._env_bldrs[idx],
                stack_size=self._env_args[idx].starting_stack_sizes_list,
                stop_at_street=None) for idx in range(len(self._env_bldrs))
        ]

        for tree in self._trees:
            tree.build_tree()
            print("Tree with stack size", tree.stack_size, "has", tree.n_nodes,
                  "nodes out of which", tree.n_nonterm, "are non-terminal.")

        self._algo_name = algo_name

        self._exps_curr_total = [
            self._chief_handle.create_experiment(
                self._name + "_Curr_S" + str(self._starting_stack_sizes[s]) +
                "_total_" + self._algo_name)
            for s in range(len(self._starting_stack_sizes))
        ]

        self._exps_avg_total = [
            self._chief_handle.create_experiment(
                self._name + "_Avg_total_S" +
                str(self._starting_stack_sizes[s]) + "_" + self._algo_name)
            for s in range(len(self._starting_stack_sizes))
        ]

        self._exp_all_averaged_curr_total = self._chief_handle.create_experiment(
            self._name + "_Curr_total_averaged_" + self._algo_name)

        self._exp_all_averaged_avg_total = self._chief_handle.create_experiment(
            self._name + "_Avg_total_averaged_" + self._algo_name)

        self._iter_counter = None
Exemple #6
0
class EvalAgentTree(_EvalAgentBase):
    
    def __init__(self, t_prof, br_agent, mode=None, device=None):
        super().__init__(t_prof=t_prof, mode=mode, device=device)
        
        self.tree = PublicTree(
            env_bldr=rl_util.get_env_builder(t_prof=t_prof),
            stack_size=t_prof.eval_stack_sizes[0],
            stop_at_street=None,
            put_out_new_round_after_limit=True,
            is_debugging=t_prof.DEBUGGING
        )
        self.tree.build_tree()
        self.br_agent = br_agent # agent to play best response against
        self.solve_br()
        
        self.modes = ["EVAL", "BR", "BAYESIAN"]
        if mode:
            self.mode = mode
        else:
            self.mode = "EVAL" # default is eval
            
        if self.mode == "BAYESIAN":
            self._fill_tree_w_prior()
                        
        
    def _fill_tree_w_prior(self, prior=1):
        def fill(node):
            node.data = np.ones((6,3)) #len(node.allowed_actions)
            for child in node.children:
                fill(child)

        node = self.tree.root
        fill(node)

        
    def can_compute_mode(self):
        """ All modes are always computable (i.e. not dependent on iteration etc.)"""
        return True
    
    def _find_node_by_env(self, action_history):
        node = self.tree.root
        
        """
        envw = self._internal_env_wrapper.env_bldr
        i = 0
        
        last_round_ = None
        round_ = node.env_state['current_round']
        p_id = node.p_id_acting_next
        
        if last_round_ == round_:
            nth_action_this_round += 1
        else:
            last_round_ = round_
            nth_action_this_round = 0
        
        def reverse_idx(idx, round_, p_id, nth_action_this_round):
            return i - nth_action_this_round - p_id * envw._VEC_HALF_ROUND_SIZE[round_] - envw._VEC_ROUND_OFFSETS[round_]
        
        while i < len(action_history):
            if action_history[i] == 1:
                action_idx = reverse_idx(i, round_, p_id, nth_action_this_round) + 1 # fold never accepted
                node = node.children[action_idx] #recurse through tree
        """
        
        i = 0
        
        while i < len(action_history):
            if isinstance(node.children[0], PlayerActionNode): #next node is playerAction
                action = action_history[i][0]
                assert(node.p_id_acting_next == action_history[i][2])
                node = node.children[node.allowed_actions.index(action)]
                i += 1
            else: #chance node, flop
                assert(node.children[0].action == "CHANCE")
                card = self._internal_env_wrapper.env.board
                node = node.children[self._card_to_idx(card)]
                assert(self._card_to_idx(node.env_state['board_2d']) == self._card_to_idx(card))
                
        if not isinstance(node.children[0], PlayerActionNode): # just need to do one more loop lol
            assert(node.children[0].action == "CHANCE")
            card = self._internal_env_wrapper.env.board
            node = node.children[self._card_to_idx(card)]
            assert(self._card_to_idx(node.env_state['board_2d']) == self._card_to_idx(card))
        
        return node
    
    def _card_to_idx(self, card):
        return card[0][0] * 2 + card[0][1]
    
    def solve_br(self):
        self.tree.fill_with_agent_policy(agent=self.br_agent)
        self.tree.compute_ev()
                    
    def get_action(self, step_env=True, need_probs=False):
        """ !! BEFORE CALLING, NOTIFY EVALAGENT OF THE PAST ACTIONS / ACTIONSEQUENCE !! """
        # print("action history", self._internal_env_wrapper._action_history_vector)
        # node = self._find_node_by_env(self._internal_env_wrapper._action_history_vector)
        
        # print("action history", self._internal_env_wrapper._action_history_list)
        node = self._find_node_by_env(self._internal_env_wrapper._action_history_list)
        
        p_id_acting = self._internal_env_wrapper.env.current_player.seat_id
        range_idx = self._internal_env_wrapper.env.get_range_idx(p_id=p_id_acting)
        legal_actions_list = self._internal_env_wrapper.env.get_legal_actions()
        a_probs_all_hands = None
        
        if self.mode == "BR":
            action = None
            best_ev = -1e10 #really bad
            
            for idx, potential_action in enumerate(node.allowed_actions):
                if node.children[idx].ev[p_id_acting,range_idx] > best_ev:
                    action = potential_action # deterministic
                    best_ev = node.children[idx].ev[p_id_acting,range_idx]
            
        elif self.mode == "EVAL":
            a_probs = node.strategy[range_idx,:]
            # print(node.strategy, node.strategy.shape)
            # print(node.allowed_actions)
            # print("allowed:", legal_actions_list)
            # print(node.ev, node.ev_br)
            action = np.random.choice(node.allowed_actions, p=a_probs)
            
        elif self.mode == "BAYESIAN":
            
            psuedocounts = node.data[range_idx,node.allowed_actions]
            a_probs = psuedocounts/sum(psuedocounts)
            action = np.random.choice(node.allowed_actions, p=a_probs)
            
            """
            self.solve_br()
            action = None
            best_ev = -1e10 #really bad
            
            for idx, potential_action in enumerate(node.allowed_actions):
                if node.children[idx].ev[p_id_acting,range_idx] > best_ev:
                    action = potential_action # deterministic
                    best_ev = node.children[idx].ev[p_id_acting,range_idx]
            """

        if step_env:
            self._internal_env_wrapper.step(action=action)
        
        assert(a_probs_all_hands is None)
        
        return action, a_probs_all_hands
    
    def get_a_probs_for_each_hand(self):
        ## BEFORE CALLING, NOTIFY EVALAGENT OF THE PAST ACTIONS / ACTIONSEQUENCE!!!!!
        node = self._find_node_by_env(self._internal_env_wrapper._action_history_list)
        legal_actions_list = self._internal_env_wrapper.env.get_legal_actions()
        
        if self.mode == "BAYESIAN":
            x = node.data
            mask = np.ones(x.shape,dtype=bool) #np.ones_like(a,dtype=bool)
            mask[:,node.allowed_actions] = False
            x[mask] = 0
            return x / x.sum(axis=1)[:, np.newaxis]
        
    def get_mode(self):
        if self.mode == "BR":
            return "BESTRESPONSE"
        elif self.mode == "EVAL":
            return "COPYCAT"
        elif self.mode == "BAYESIAN":
            return "BAYESIAN"