Example #1
0
    def add_new_iteration_strategy_model(self, owner, adv_net_state_dict,
                                         cfr_iter):
        iter_strat = IterationStrategy(t_prof=self._t_prof,
                                       env_bldr=self._env_bldr,
                                       owner=owner,
                                       device=self._t_prof.device_inference,
                                       cfr_iter=cfr_iter)

        iter_strat.load_net_state_dict(
            self._ray.state_dict_to_torch(
                adv_net_state_dict, device=self._t_prof.device_inference))
        self._strategy_buffers[iter_strat.owner].add(
            iteration_strat=iter_strat)

        #  Store to disk
        if self._t_prof.export_each_net:
            path = ospj(self._t_prof.path_strategy_nets, self._t_prof.name)
            file_util.create_dir_if_not_exist(path)
            file_util.do_pickle(obj=iter_strat.state_dict(),
                                path=path,
                                file_name=str(iter_strat.cfr_iteration) +
                                "_P" + str(iter_strat.owner) + ".pkl")

        if self._t_prof.log_verbose:
            if owner == 1:
                # Logs
                process = psutil.Process(os.getpid())
                self.add_scalar(self._exp_mem_usage,
                                "Debug/Memory Usage/Chief", cfr_iter,
                                process.memory_info().rss)
Example #2
0
    def __init__(self,
                 t_prof,
                 eval_methods,
                 chief_cls,
                 eval_agent_cls,
                 n_iterations=None,
                 iteration_to_import=None,
                 name_to_import=None):
        """
        Args:
            t_prof (TrainingProfile)
            eval_methods (dict):                dict of {evaluator1_name: frequency, ...} Currently supported evaluators
                                                are "br", "rlbr", and "lbr"
            chief_cls (ChiefBase subclass):     class, not instance
            n_iterations (int)                  number of iterations to run. If None, runs forever
            iteration_to_import (int):               step/iteration to import
            name_to_import (str):               name of the run to import
        """
        super().__init__(t_prof=t_prof)

        if self._t_prof.CLUSTER:
            self._ray.init_cluster(redis_address=t_prof.redis_head_adr)
        else:
            self._ray.init_local()

        file_util.do_pickle(obj=t_prof,
                            file_name=t_prof.name,
                            path=t_prof.path_trainingprofiles)
        self.n_iterations = n_iterations

        self._step_to_import = iteration_to_import
        self._name_to_import = name_to_import

        if self._t_prof.DISTRIBUTED:
            from PokerRL.eval.lbr.DistLBRMaster import DistLBRMaster as LBRMaster
            from PokerRL.eval.rl_br.DistRLBRMaster import DistRLBRMaster as RLBRMaster
            from PokerRL.eval.rl_br.workers.ps.Dist_RLBR_ParameterServer import \
                Dist_RLBR_ParameterServer as RLBRParameterServer
            from PokerRL.eval.rl_br.workers.la.Dist_RLBR_LearnerActor import Dist_RLBR_LearnerActor as RLBRLearnerActor
            from PokerRL.eval.lbr.DistLBRWorker import DistLBRWorker as LBRWorker
            from PokerRL.eval.br.DistBRMaster import DistBRMaster as BRMaster
            from PokerRL.eval.head_to_head.DistHead2HeadMaster import DistHead2HeadMaster as Head2HeadMaster

        else:
            from PokerRL.eval.lbr.LocalLBRMaster import LocalLBRMaster as LBRMaster
            from PokerRL.eval.rl_br.LocalRLBRMaster import LocalRLBRMaster as RLBRMaster
            from PokerRL.eval.rl_br.workers.ps.Local_RLBR_ParameterServer import \
                Local_RLBR_ParameterServer as RLBRParameterServer
            from PokerRL.eval.rl_br.workers.la.Local_RLBR_LearnerActor import \
                Local_RLBR_LearnerActor as RLBRLearnerActor
            from PokerRL.eval.lbr.LocalLBRWorker import LocalLBRWorker as LBRWorker
            from PokerRL.eval.br.LocalBRMaster import LocalBRMaster as BRMaster
            from PokerRL.eval.head_to_head.LocalHead2HeadMaster import LocalHead2HeadMaster as Head2HeadMaster

        # safety measure to avoid overwriting logs when reloading
        if name_to_import is not None and iteration_to_import is not None and name_to_import == t_prof.name:
            t_prof.name += "_"

        print("Creating Chief...")
        self.chief_handle = self._ray.create_worker(chief_cls, t_prof)

        self.eval_masters = {}
        if "br" in list(eval_methods.keys()):
            print("Creating BR Evaluator...")
            self.eval_masters["br"] = (
                self._ray.create_worker(BRMaster, t_prof, self.chief_handle,
                                        eval_agent_cls),
                eval_methods["br"]  # freq
            )

        if "h2h" in list(eval_methods.keys()):
            print("Creating Head-to-Head Mode Evaluator...")
            self.eval_masters["h2h"] = (
                self._ray.create_worker(Head2HeadMaster, t_prof,
                                        self.chief_handle, eval_agent_cls),
                eval_methods["h2h"]  # freq
            )

        if "lbr" in list(eval_methods.keys()):
            print("Creating LBR Evaluator...")
            self._lbr_workers = [
                self._ray.create_worker(LBRWorker, t_prof, self.chief_handle,
                                        eval_agent_cls)
                for _ in range(self._t_prof.module_args["lbr"].n_workers)
            ]

            self.eval_masters["lbr"] = (
                self._ray.create_worker(LBRMaster, t_prof, self.chief_handle),
                eval_methods["lbr"]  # freq
            )
            self._ray.wait([
                self._ray.remote(
                    self.eval_masters["lbr"][0].set_worker_handles,
                    *self._lbr_workers)
            ])

        if "rlbr" in list(eval_methods.keys()):
            print("Creating RL-BR Evaluator...")
            self._rlbr_ps = self._ray.create_worker(
                RLBRParameterServer,
                t_prof,
                self.chief_handle,
            )
            self._rlbr_las = [
                self._ray.create_worker(RLBRLearnerActor, t_prof,
                                        self.chief_handle, eval_agent_cls)
                for _ in range(self._t_prof.module_args["rlbr"].n_las)
            ]

            self.eval_masters["rlbr"] = (
                self._ray.create_worker(RLBRMaster, t_prof, self.chief_handle,
                                        eval_agent_cls),
                eval_methods["rlbr"]  # freq
            )

            self._ray.wait([
                self._ray.remote(
                    self.eval_masters["rlbr"][0].set_learner_actors,
                    *self._rlbr_las),
            ])
            self._ray.wait([
                self._ray.remote(self.eval_masters["rlbr"][0].set_param_server,
                                 self._rlbr_ps),
            ])

        self.crayon = CrayonWrapper(
            name=t_prof.name,
            chief_handle=self.chief_handle,
            path_log_storage=self._t_prof.path_log_storage,
            crayon_server_address=t_prof.local_crayon_server_docker_address,
            runs_distributed=t_prof.DISTRIBUTED,
            runs_cluster=t_prof.CLUSTER,
        )
Example #3
0
 def store_to_disk(self, path, file_name):
     do_pickle(obj=self.state_dict(), path=path, file_name=file_name)