def __init__( self, name, chief_handle, game_cls, agent_bet_set, algo_name, starting_stack_sizes=None, ): """ Args: name (str): Under this name all logs, data, and checkpoints will appear. chief_handle (ChiefBase): Reference to chief worker game_cls (PokerEnv subclass): Class (not instance) to be trained in. agent_bet_set (iterable): Choosing a bet-set from bet_sets.py is recommended. If solving a Limit poker game, this value will not be considered, but must still be passed. Just set this to any list of floats (e.g. [0.0]) starting_stack_sizes (list of ints): For each stack size in this list, a CFR strategy will be computed. Results are logged individually and averaged (uniform). If None, takes the default for the game. """ self._name = name self._n_seats = 2 self._chief_handle = chief_handle if starting_stack_sizes is None: self._starting_stack_sizes = [game_cls.DEFAULT_STACK_SIZE] else: self._starting_stack_sizes = copy.deepcopy(starting_stack_sizes) self._game_cls_str = game_cls.__name__ self._env_args = [ game_cls.ARGS_CLS( n_seats=self._n_seats, starting_stack_sizes_list=[ start_chips for _ in range(self._n_seats) ], bet_sizes_list_as_frac_of_pot=agent_bet_set, ) for start_chips in self._starting_stack_sizes ] self._env_bldrs = [ HistoryEnvBuilder(env_cls=get_env_cls_from_str(self._game_cls_str), env_args=self._env_args[s]) for s in range(len(self._starting_stack_sizes)) ] self._trees = [ PublicTree( env_bldr=self._env_bldrs[idx], stack_size=self._env_args[idx].starting_stack_sizes_list, stop_at_street=None) for idx in range(len(self._env_bldrs)) ] for tree in self._trees: tree.build_tree() print("Tree with stack size", tree.stack_size, "has", tree.n_nodes, "nodes out of which", tree.n_nonterm, "are non-terminal.") self._algo_name = algo_name self._exps_curr_total = [ self._chief_handle.create_experiment( self._name + "_Curr_S" + str(self._starting_stack_sizes[s]) + "_total_" + self._algo_name) for s in range(len(self._starting_stack_sizes)) ] self._exps_avg_total = [ self._chief_handle.create_experiment( self._name + "_Avg_total_S" + str(self._starting_stack_sizes[s]) + "_" + self._algo_name) for s in range(len(self._starting_stack_sizes)) ] self._exp_all_averaged_curr_total = self._chief_handle.create_experiment( self._name + "_Curr_total_averaged_" + self._algo_name) self._exp_all_averaged_avg_total = self._chief_handle.create_experiment( self._name + "_Avg_total_averaged_" + self._algo_name) self._iter_counter = None
def get_env_builder_rlbr(t_prof): env_bldr_cls = get_builder_from_str(t_prof.env_builder_cls_str) return env_bldr_cls(env_cls=get_env_cls_from_str(t_prof.game_cls_str), env_args=t_prof.module_args["rlbr"].get_rlbr_env_args( agents_env_args=t_prof.module_args["env"]))