Exemple #1
0
    def __init__(self, t_prof, chief_handle, eval_agent_cls):
        super().__init__(t_prof=t_prof)
        self._args = t_prof.module_args["rlbr"]

        self._env_bldr = rl_util.get_env_builder(t_prof=t_prof)

        self._chief_handle = chief_handle
        self._eval_agent_cls = eval_agent_cls
        self._eval_env_bldr = _util.get_env_builder_rlbr(t_prof=t_prof)

        self._ddqns = [None for _ in range(self._eval_env_bldr.N_SEATS)]
        self._rlbr_seat_id = None
        self._agent_seat_id = None
        self._rlbr_env_wrapper = None
        self._opponent = None
        self._buf = None
        self._br_memory_saver = None

        if t_prof.nn_type == "recurrent":
            from PokerRL.rl.buffers.CircularBufferRNN import CircularBufferRNN
            from PokerRL.rl.buffers.BRMemorySaverRNN import BRMemorySaverRNN

            self.CircularBufferCls = CircularBufferRNN
            self.BRMemorySaverCls = BRMemorySaverRNN
        elif t_prof.nn_type == "feedforward":
            from PokerRL.rl.buffers.CircularBufferFLAT import CircularBufferFLAT
            from PokerRL.rl.buffers.BRMemorySaverFLAT import BRMemorySaverFLAT

            self.CircularBufferCls = CircularBufferFLAT
            self.BRMemorySaverCls = BRMemorySaverFLAT

        else:
            raise ValueError(t_prof.nn_type)
Exemple #2
0
    def __init__(self, t_prof, chief_handle, eval_agent_cls):
        super().__init__(
            t_prof=t_prof,
            eval_env_bldr=_util.get_env_builder_rlbr(t_prof=t_prof),
            chief_handle=chief_handle,
            evaluator_name="RL-BR",
            log_conf_interval=True)

        assert self._eval_env_bldr.N_SEATS == 2, "only works for 2 players at the moment"

        self._args = t_prof.module_args["rlbr"]
        self._eval_agent = eval_agent_cls(t_prof=t_prof)

        self._la_handles_0 = None
        self._la_handles_1 = None
        self._ps_handle = None

        if self._t_prof.log_verbose and self._args.n_brs_to_train > 1:
            self._exps_br_quality = {
                eval_mode: [[
                    self._ray.get(
                        self._ray.remote(
                            self._chief_handle.create_experiment,
                            self._t_prof.name + " " + eval_mode + "_stack_" +
                            str(stack_size[0]) + ": " + self._evaluator_name +
                            " RLBR Quality" + "_brID" + str(_br_id)))
                    for _br_id in range(self._args.n_brs_to_train)
                ] for stack_size in self._t_prof.eval_stack_sizes]
                for eval_mode in self._t_prof.eval_modes_of_algo
            }
    def __init__(self, t_prof, chief_handle):
        super().__init__(t_prof=t_prof, chief_handle=chief_handle)
        self._args = t_prof.module_args["rlbr"]
        self._eval_env_bldr = _util.get_env_builder_rlbr(t_prof=t_prof)
        self._env_bldr = self._eval_env_bldr  # Override base class variable

        if self._t_prof.log_verbose:
            self._exp_mem_usage = self._ray.get(
                self._ray.remote(self._chief_handle.create_experiment,
                                 self._t_prof.name + "_RL-BR_PS_Memory_Usage"))

        self._nets = [None for _ in range(self._env_bldr.N_SEATS)]
        self._eps = [None for _ in range(self._env_bldr.N_SEATS)]
        self._optims = [None for _ in range(self._env_bldr.N_SEATS)]
Exemple #4
0
    def __init__(self, t_prof, chief_handle, eval_agent_cls):
        super().__init__(
            t_prof=t_prof,
            eval_env_bldr=_util.get_env_builder_rlbr(t_prof=t_prof),
            chief_handle=chief_handle,
            eval_type="RL-BR",
            log_conf_interval=True)

        assert self._eval_env_bldr.N_SEATS == 2, "only works for 2 players at the moment"

        self._args = t_prof.module_args["rlbr"]
        self._eval_agent = eval_agent_cls(t_prof=t_prof)

        self._la_handles = None
        self._ps_handle = None