def __init__(self, t_prof, chief_handle, eval_agent_cls): super().__init__(t_prof=t_prof) self._args = t_prof.module_args["rlbr"] self._env_bldr = rl_util.get_env_builder(t_prof=t_prof) self._chief_handle = chief_handle self._eval_agent_cls = eval_agent_cls self._eval_env_bldr = _util.get_env_builder_rlbr(t_prof=t_prof) self._ddqns = [None for _ in range(self._eval_env_bldr.N_SEATS)] self._rlbr_seat_id = None self._agent_seat_id = None self._rlbr_env_wrapper = None self._opponent = None self._buf = None self._br_memory_saver = None if t_prof.nn_type == "recurrent": from PokerRL.rl.buffers.CircularBufferRNN import CircularBufferRNN from PokerRL.rl.buffers.BRMemorySaverRNN import BRMemorySaverRNN self.CircularBufferCls = CircularBufferRNN self.BRMemorySaverCls = BRMemorySaverRNN elif t_prof.nn_type == "feedforward": from PokerRL.rl.buffers.CircularBufferFLAT import CircularBufferFLAT from PokerRL.rl.buffers.BRMemorySaverFLAT import BRMemorySaverFLAT self.CircularBufferCls = CircularBufferFLAT self.BRMemorySaverCls = BRMemorySaverFLAT else: raise ValueError(t_prof.nn_type)
def __init__(self, t_prof, chief_handle, eval_agent_cls): super().__init__( t_prof=t_prof, eval_env_bldr=_util.get_env_builder_rlbr(t_prof=t_prof), chief_handle=chief_handle, evaluator_name="RL-BR", log_conf_interval=True) assert self._eval_env_bldr.N_SEATS == 2, "only works for 2 players at the moment" self._args = t_prof.module_args["rlbr"] self._eval_agent = eval_agent_cls(t_prof=t_prof) self._la_handles_0 = None self._la_handles_1 = None self._ps_handle = None if self._t_prof.log_verbose and self._args.n_brs_to_train > 1: self._exps_br_quality = { eval_mode: [[ self._ray.get( self._ray.remote( self._chief_handle.create_experiment, self._t_prof.name + " " + eval_mode + "_stack_" + str(stack_size[0]) + ": " + self._evaluator_name + " RLBR Quality" + "_brID" + str(_br_id))) for _br_id in range(self._args.n_brs_to_train) ] for stack_size in self._t_prof.eval_stack_sizes] for eval_mode in self._t_prof.eval_modes_of_algo }
def __init__(self, t_prof, chief_handle): super().__init__(t_prof=t_prof, chief_handle=chief_handle) self._args = t_prof.module_args["rlbr"] self._eval_env_bldr = _util.get_env_builder_rlbr(t_prof=t_prof) self._env_bldr = self._eval_env_bldr # Override base class variable if self._t_prof.log_verbose: self._exp_mem_usage = self._ray.get( self._ray.remote(self._chief_handle.create_experiment, self._t_prof.name + "_RL-BR_PS_Memory_Usage")) self._nets = [None for _ in range(self._env_bldr.N_SEATS)] self._eps = [None for _ in range(self._env_bldr.N_SEATS)] self._optims = [None for _ in range(self._env_bldr.N_SEATS)]
def __init__(self, t_prof, chief_handle, eval_agent_cls): super().__init__( t_prof=t_prof, eval_env_bldr=_util.get_env_builder_rlbr(t_prof=t_prof), chief_handle=chief_handle, eval_type="RL-BR", log_conf_interval=True) assert self._eval_env_bldr.N_SEATS == 2, "only works for 2 players at the moment" self._args = t_prof.module_args["rlbr"] self._eval_agent = eval_agent_cls(t_prof=t_prof) self._la_handles = None self._ps_handle = None