コード例 #1
0
from ray.rllib.agents.dqn import DQNTrainer

from grl.algos.nfsp_rllib.nfsp import NFSPTrainer, NFSPTorchAveragePolicy
from grl.envs.oshi_zumo_multi_agent_env import OshiZumoMultiAgentEnv, ThousandActionOshiZumoMultiAgentEnv, \
    TinyOshiZumoMultiAgentEnv, MediumOshiZumoMultiAgentEnv
from grl.rl_apps.scenarios.catalog import scenario_catalog
from grl.rl_apps.scenarios.catalog.common import default_if_creating_ray_head
from grl.rl_apps.scenarios.nfsp_scenario import NFSPScenario
from grl.rl_apps.scenarios.stopping_conditions import NoStoppingCondition
from grl.rl_apps.scenarios.trainer_configs.oshi_zumo_configs import *
from grl.rl_apps.scenarios.trainer_configs.poker_nfsp_configs import *
from grl.rllib_tools.modified_policies.simple_q_torch_policy import SimpleQTorchPolicyPatched

scenario_catalog.add(
    NFSPScenario(name=f"oshi_zumo_nfsp_dqn",
                 ray_cluster_cpus=default_if_creating_ray_head(default=4),
                 ray_cluster_gpus=default_if_creating_ray_head(default=0),
                 ray_object_store_memory_cap_gigabytes=1,
                 env_class=OshiZumoMultiAgentEnv,
                 env_config={},
                 trainer_class=DQNTrainer,
                 avg_trainer_class=NFSPTrainer,
                 policy_classes={
                     "average_policy": NFSPTorchAveragePolicy,
                     "best_response": SimpleQTorchPolicyPatched,
                 },
                 get_trainer_config=nfsp_leduc_dqn_params,
                 get_avg_trainer_config=nfsp_leduc_avg_policy_params,
                 anticipatory_param=0.1,
                 nfsp_get_stopping_condition=lambda: NoStoppingCondition(),
                 calculate_openspiel_metanash=False,
コード例 #2
0
ファイル: poker_psro_scenarios.py プロジェクト: indylab/nxdo
from ray.rllib.agents.dqn import DQNTrainer
from ray.rllib.agents.ppo import PPOTrainer, PPOTorchPolicy

from grl.envs.poker_multi_agent_env import PokerMultiAgentEnv
from grl.rl_apps.scenarios.catalog import scenario_catalog
from grl.rl_apps.scenarios.catalog.common import default_if_creating_ray_head
from grl.rl_apps.scenarios.psro_scenario import PSROScenario
from grl.rl_apps.scenarios.stopping_conditions import *
from grl.rl_apps.scenarios.trainer_configs.poker_psro_configs import *
from grl.rllib_tools.modified_policies.simple_q_torch_policy import SimpleQTorchPolicyPatched

scenario_catalog.add(
    PSROScenario(
        name="kuhn_psro_dqn",
        ray_cluster_cpus=default_if_creating_ray_head(default=8),
        ray_cluster_gpus=default_if_creating_ray_head(default=0),
        ray_object_store_memory_cap_gigabytes=1,
        env_class=PokerMultiAgentEnv,
        env_config={
            "version": "kuhn_poker",
            "fixed_players": True,
        },
        mix_metanash_with_uniform_dist_coeff=0.0,
        allow_stochastic_best_responses=False,
        trainer_class=DQNTrainer,
        policy_classes={
            "metanash": SimpleQTorchPolicyPatched,
            "best_response": SimpleQTorchPolicyPatched,
            "eval": SimpleQTorchPolicyPatched,
        },
        num_eval_workers=8,