예제 #1
0
    def __init__(
        self,
        name,
        chief_handle,
        game_cls,
        agent_bet_set,
        algo_name,
        starting_stack_sizes=None,
    ):
        """
        Args:
            name (str):                             Under this name all logs, data, and checkpoints will appear.
            chief_handle (ChiefBase):               Reference to chief worker
            game_cls (PokerEnv subclass):           Class (not instance) to be trained in.
            agent_bet_set (iterable):               Choosing a bet-set from bet_sets.py is recommended. If solving a
                                                    Limit poker game, this value will not be considered, but must still
                                                    be passed. Just set this to any list of floats (e.g. [0.0])
            starting_stack_sizes (list of ints):    For each stack size in this list, a CFR strategy will be computed.
                                                    Results are logged individually and averaged (uniform).
                                                    If None, takes the default for the game.
        """

        self._name = name
        self._n_seats = 2

        self._chief_handle = chief_handle

        if starting_stack_sizes is None:
            self._starting_stack_sizes = [game_cls.DEFAULT_STACK_SIZE]
        else:
            self._starting_stack_sizes = copy.deepcopy(starting_stack_sizes)
        self._game_cls_str = game_cls.__name__

        self._env_args = [
            game_cls.ARGS_CLS(
                n_seats=self._n_seats,
                starting_stack_sizes_list=[
                    start_chips for _ in range(self._n_seats)
                ],
                bet_sizes_list_as_frac_of_pot=agent_bet_set,
            ) for start_chips in self._starting_stack_sizes
        ]
        self._env_bldrs = [
            HistoryEnvBuilder(env_cls=get_env_cls_from_str(self._game_cls_str),
                              env_args=self._env_args[s])
            for s in range(len(self._starting_stack_sizes))
        ]

        self._trees = [
            PublicTree(
                env_bldr=self._env_bldrs[idx],
                stack_size=self._env_args[idx].starting_stack_sizes_list,
                stop_at_street=None) for idx in range(len(self._env_bldrs))
        ]

        for tree in self._trees:
            tree.build_tree()
            print("Tree with stack size", tree.stack_size, "has", tree.n_nodes,
                  "nodes out of which", tree.n_nonterm, "are non-terminal.")

        self._algo_name = algo_name

        self._exps_curr_total = [
            self._chief_handle.create_experiment(
                self._name + "_Curr_S" + str(self._starting_stack_sizes[s]) +
                "_total_" + self._algo_name)
            for s in range(len(self._starting_stack_sizes))
        ]

        self._exps_avg_total = [
            self._chief_handle.create_experiment(
                self._name + "_Avg_total_S" +
                str(self._starting_stack_sizes[s]) + "_" + self._algo_name)
            for s in range(len(self._starting_stack_sizes))
        ]

        self._exp_all_averaged_curr_total = self._chief_handle.create_experiment(
            self._name + "_Curr_total_averaged_" + self._algo_name)

        self._exp_all_averaged_avg_total = self._chief_handle.create_experiment(
            self._name + "_Avg_total_averaged_" + self._algo_name)

        self._iter_counter = None
예제 #2
0
파일: _util.py 프로젝트: zxpower/PokerRL
def get_env_builder_rlbr(t_prof):
    env_bldr_cls = get_builder_from_str(t_prof.env_builder_cls_str)
    return env_bldr_cls(env_cls=get_env_cls_from_str(t_prof.game_cls_str),
                        env_args=t_prof.module_args["rlbr"].get_rlbr_env_args(
                            agents_env_args=t_prof.module_args["env"]))