Ejemplo n.º 1
0
                            ignore_reinit_error=True,
                            logging_level=logging.INFO,
                            log_to_driver=os.getenv("RAY_LOG_TO_DRIVER",
                                                    False))

    def select_policy(agent_id):
        if agent_id == br_player:
            return "best_response"
        else:
            return f"average_policy"

    avg_policy_model_config = avg_pol_scenario.get_avg_trainer_config(
        tmp_env)["model"]

    player_0_avg_pol_spec = StrategySpec.from_json_file(
        "/home/jblanier/git/grl/grl/data/1000_oshi_zumo_nfsp_larger_dqn_larger_sparse_10.56.11PM_Mar-24-20217lav0isx/avg_policy_checkpoint_specs/average_policy_player_0_iter_53000.json"
    )

    class HyperParamSearchCallbacks(DefaultCallbacks):
        def on_episode_start(self, *, worker: "RolloutWorker",
                             base_env: BaseEnv, policies: Dict[PolicyID,
                                                               Policy],
                             episode: MultiAgentEpisode, env_index: int,
                             **kwargs):
            super().on_episode_start(worker=worker,
                                     base_env=base_env,
                                     policies=policies,
                                     episode=episode,
                                     env_index=env_index,
                                     **kwargs)
            if not hasattr(worker,
        dashboard_port=find_free_port(),
        ignore_reinit_error=True,
        logging_level=logging.INFO,
        log_to_driver=os.getenv("RAY_LOG_TO_DRIVER", False))


    def select_policy(agent_id):
        if agent_id == br_player:
            return "best_response"
        else:
            return f"average_policy"


    avg_policy_model_config = avg_pol_scenario.get_avg_trainer_config(tmp_env)["model"]

    player_0_avg_pol_spec = StrategySpec.from_json_file(
        "/home/jblanier/git/grl/grl/data/oshi_zumo_tiny_nfsp_dqn_sparse_01.54.50PM_Apr-08-20218z_hf4wq/avg_policy_checkpoint_specs/average_policy_player_0_iter_214000.json")


    class HyperParamSearchCallbacks(DefaultCallbacks):

        def on_episode_start(self, *, worker: "RolloutWorker", base_env: BaseEnv, policies: Dict[PolicyID, Policy],
                             episode: MultiAgentEpisode, env_index: int, **kwargs):
            super().on_episode_start(worker=worker, base_env=base_env, policies=policies, episode=episode,
                                     env_index=env_index, **kwargs)
            if not hasattr(worker, "avg_pol_loaded") or not worker.avg_pol_loaded:
                avg_policy = worker.policy_map["average_policy"]
                load_pure_strat(policy=avg_policy, pure_strat_spec=player_0_avg_pol_spec)
                worker.avg_pol_loaded = True

        def on_train_result(self, *, trainer, result: dict, **kwargs):
            super().on_train_result(trainer=trainer, result=result, **kwargs)
                            ignore_reinit_error=True,
                            logging_level=logging.INFO,
                            log_to_driver=os.getenv("RAY_LOG_TO_DRIVER",
                                                    False))

    def select_policy(agent_id):
        if agent_id == br_player:
            return "best_response"
        else:
            return f"average_policy"

    avg_policy_model_config = avg_pol_scenario.get_avg_trainer_config(
        tmp_env)["model"]

    player_0_avg_pol_spec = StrategySpec.from_json_file(
        "/home/jblanier/git/grl/grl/data/oshi_zumo_medium_nfsp_dqn_sparse_01.55.05PM_Apr-08-2021ta6arraq/avg_policy_checkpoint_specs/average_policy_player_0_iter_221000.json"
    )

    class HyperParamSearchCallbacks(DefaultCallbacks):
        def on_episode_start(self, *, worker: "RolloutWorker",
                             base_env: BaseEnv, policies: Dict[PolicyID,
                                                               Policy],
                             episode: MultiAgentEpisode, env_index: int,
                             **kwargs):
            super().on_episode_start(worker=worker,
                                     base_env=base_env,
                                     policies=policies,
                                     episode=episode,
                                     env_index=env_index,
                                     **kwargs)
            if not hasattr(worker,
Ejemplo n.º 4
0
                            ignore_reinit_error=True,
                            logging_level=logging.INFO,
                            log_to_driver=os.getenv("RAY_LOG_TO_DRIVER",
                                                    False))

    def select_policy(agent_id):
        if agent_id == br_player:
            return "best_response"
        else:
            return f"average_policy"

    avg_policy_model_config = avg_pol_scenario.get_avg_trainer_config(
        tmp_env)["model"]

    player_0_avg_pol_spec = StrategySpec.from_json_file(
        "/home/jblanier/git/grl/grl/data/leduc_nfsp_dqn_sparse_02.34.06PM_Apr-08-2021bt5ym0l8/avg_policy_checkpoint_specs/average_policy_player_0_iter_263000.json"
    )

    class HyperParamSearchCallbacks(DefaultCallbacks):
        def on_episode_start(self, *, worker: "RolloutWorker",
                             base_env: BaseEnv, policies: Dict[PolicyID,
                                                               Policy],
                             episode: MultiAgentEpisode, env_index: int,
                             **kwargs):
            super().on_episode_start(worker=worker,
                                     base_env=base_env,
                                     policies=policies,
                                     episode=episode,
                                     env_index=env_index,
                                     **kwargs)
            if not hasattr(worker,
        ignore_reinit_error=True,
        logging_level=logging.INFO,
        log_to_driver=os.getenv("RAY_LOG_TO_DRIVER", False))


    def select_policy(agent_id):
        if agent_id == br_player:
            return "best_response"
        else:
            return f"average_policy"


    avg_policy_model_config = avg_pol_scenario.get_avg_trainer_config(tmp_env)["model"]

    player_0_avg_pol_spec = StrategySpec.from_json_file(
        "/home/jblanier/git/grl/grl/data/loss_game_nfsp_10_moves_alpha_2.9_sparse_12.07.15AM_May-18-202120bfveou/avg_policy_checkpoint_specs/average_policy_player_0_iter_30000.json"
    )


    class HyperParamSearchCallbacks(DefaultCallbacks):

        def on_episode_start(self, *, worker: "RolloutWorker", base_env: BaseEnv, policies: Dict[PolicyID, Policy],
                             episode: MultiAgentEpisode, env_index: int, **kwargs):
            super().on_episode_start(worker=worker, base_env=base_env, policies=policies, episode=episode,
                                     env_index=env_index, **kwargs)
            if not hasattr(worker, "avg_pol_loaded") or not worker.avg_pol_loaded:
                avg_policy = worker.policy_map["average_policy"]
                load_pure_strat(policy=avg_policy, pure_strat_spec=player_0_avg_pol_spec)
                worker.avg_pol_loaded = True

        def on_train_result(self, *, trainer, result: dict, **kwargs):