new_policy.set_state(main_state) # We need to sync the just copied local weights (from main policy) # to all the remote workers as well. trainer.workers.sync_weights() else: print("not good enough; will keep learning ...") # +2 = main + random result["league_size"] = self.current_opponent + 2 if __name__ == "__main__": ray.init(num_cpus=args.num_cpus or None, include_dashboard=False) register_env("open_spiel_env", lambda _: OpenSpielEnv(pyspiel.load_game(args.env))) def policy_mapping_fn(agent_id, episode, worker, **kwargs): # agent_id = [0|1] -> policy depends on episode ID # This way, we make sure that both policies sometimes play agent0 # (start player) and sometimes agent1 (player to move 2nd). return "main" if episode.episode_id % 2 == agent_id else "random" config = { "env": "open_spiel_env", "callbacks": SelfPlayCallback, "model": { "fcnet_hiddens": [512, 512], }, "num_sgd_iter": 20, "num_envs_per_worker": 5,
import pyspiel import unittest import ray import ray.rllib.algorithms.alpha_star as alpha_star from ray.rllib.env.wrappers.open_spiel import OpenSpielEnv from ray.rllib.utils.test_utils import ( check_compute_single_action, check_train_results, framework_iterator, ) from ray.tune import register_env # Connect-4 OpenSpiel env. register_env("connect_four", lambda _: OpenSpielEnv(pyspiel.load_game("connect_four"))) class TestAlphaStar(unittest.TestCase): @classmethod def setUpClass(cls): ray.init(num_cpus=20) @classmethod def tearDownClass(cls): ray.shutdown() def test_alpha_star_compilation(self): """Test whether AlphaStar can be built with all frameworks.""" config = (alpha_star.AlphaStarConfig().environment( env="connect_four").training(
wr = self.win_rates[p] if p in self.win_rates else 0.0 print(f"\t{p}: {wr}") print("Frozen policies:") for p in sorted(self.non_trainable_policies): wr = self.win_rates[p] if p in self.win_rates else 0.0 print(f"\t{p}: {wr}") print() if __name__ == "__main__": ray.init( num_cpus=args.num_cpus or None, include_dashboard=False, ) register_env("open_spiel_env", lambda _: OpenSpielEnv(pyspiel.load_game(args.env))) def policy_mapping_fn(agent_id, episode, worker, **kwargs): # At first, only have main play against the random main exploiter. return "main" if episode.episode_id % 2 == agent_id else "main_exploiter_0" config = { "env": "open_spiel_env", "callbacks": LeagueBasedSelfPlayCallback, "num_sgd_iter": 20, "num_envs_per_worker": 5, "multiagent": { # Initial policy map: All PPO. This will be expanded # to more policy snapshots. This is done in the # custom callback defined above (`LeagueBasedSelfPlayCallback`). "policies": {