from ray.rllib.agents.dqn import DQNTrainer from grl.algos.nfsp_rllib.nfsp import NFSPTrainer, NFSPTorchAveragePolicy from grl.envs.oshi_zumo_multi_agent_env import OshiZumoMultiAgentEnv, ThousandActionOshiZumoMultiAgentEnv, \ TinyOshiZumoMultiAgentEnv, MediumOshiZumoMultiAgentEnv from grl.rl_apps.scenarios.catalog import scenario_catalog from grl.rl_apps.scenarios.catalog.common import default_if_creating_ray_head from grl.rl_apps.scenarios.nfsp_scenario import NFSPScenario from grl.rl_apps.scenarios.stopping_conditions import NoStoppingCondition from grl.rl_apps.scenarios.trainer_configs.oshi_zumo_configs import * from grl.rl_apps.scenarios.trainer_configs.poker_nfsp_configs import * from grl.rllib_tools.modified_policies.simple_q_torch_policy import SimpleQTorchPolicyPatched scenario_catalog.add( NFSPScenario(name=f"oshi_zumo_nfsp_dqn", ray_cluster_cpus=default_if_creating_ray_head(default=4), ray_cluster_gpus=default_if_creating_ray_head(default=0), ray_object_store_memory_cap_gigabytes=1, env_class=OshiZumoMultiAgentEnv, env_config={}, trainer_class=DQNTrainer, avg_trainer_class=NFSPTrainer, policy_classes={ "average_policy": NFSPTorchAveragePolicy, "best_response": SimpleQTorchPolicyPatched, }, get_trainer_config=nfsp_leduc_dqn_params, get_avg_trainer_config=nfsp_leduc_avg_policy_params, anticipatory_param=0.1, nfsp_get_stopping_condition=lambda: NoStoppingCondition(), calculate_openspiel_metanash=False,
from ray.rllib.agents.dqn import DQNTrainer from ray.rllib.agents.ppo import PPOTrainer, PPOTorchPolicy from grl.envs.poker_multi_agent_env import PokerMultiAgentEnv from grl.rl_apps.scenarios.catalog import scenario_catalog from grl.rl_apps.scenarios.catalog.common import default_if_creating_ray_head from grl.rl_apps.scenarios.psro_scenario import PSROScenario from grl.rl_apps.scenarios.stopping_conditions import * from grl.rl_apps.scenarios.trainer_configs.poker_psro_configs import * from grl.rllib_tools.modified_policies.simple_q_torch_policy import SimpleQTorchPolicyPatched scenario_catalog.add( PSROScenario( name="kuhn_psro_dqn", ray_cluster_cpus=default_if_creating_ray_head(default=8), ray_cluster_gpus=default_if_creating_ray_head(default=0), ray_object_store_memory_cap_gigabytes=1, env_class=PokerMultiAgentEnv, env_config={ "version": "kuhn_poker", "fixed_players": True, }, mix_metanash_with_uniform_dist_coeff=0.0, allow_stochastic_best_responses=False, trainer_class=DQNTrainer, policy_classes={ "metanash": SimpleQTorchPolicyPatched, "best_response": SimpleQTorchPolicyPatched, "eval": SimpleQTorchPolicyPatched, }, num_eval_workers=8,