Esempio n. 1
0
            new_policy.set_state(main_state)
            # We need to sync the just copied local weights (from main policy)
            # to all the remote workers as well.
            trainer.workers.sync_weights()
        else:
            print("not good enough; will keep learning ...")

        # +2 = main + random
        result["league_size"] = self.current_opponent + 2


if __name__ == "__main__":
    ray.init(num_cpus=args.num_cpus or None, include_dashboard=False)

    register_env("open_spiel_env",
                 lambda _: OpenSpielEnv(pyspiel.load_game(args.env)))

    def policy_mapping_fn(agent_id, episode, worker, **kwargs):
        # agent_id = [0|1] -> policy depends on episode ID
        # This way, we make sure that both policies sometimes play agent0
        # (start player) and sometimes agent1 (player to move 2nd).
        return "main" if episode.episode_id % 2 == agent_id else "random"

    config = {
        "env": "open_spiel_env",
        "callbacks": SelfPlayCallback,
        "model": {
            "fcnet_hiddens": [512, 512],
        },
        "num_sgd_iter": 20,
        "num_envs_per_worker": 5,
Esempio n. 2
0
import pyspiel
import unittest

import ray
import ray.rllib.algorithms.alpha_star as alpha_star
from ray.rllib.env.wrappers.open_spiel import OpenSpielEnv
from ray.rllib.utils.test_utils import (
    check_compute_single_action,
    check_train_results,
    framework_iterator,
)
from ray.tune import register_env

# Connect-4 OpenSpiel env.
register_env("connect_four",
             lambda _: OpenSpielEnv(pyspiel.load_game("connect_four")))


class TestAlphaStar(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        ray.init(num_cpus=20)

    @classmethod
    def tearDownClass(cls):
        ray.shutdown()

    def test_alpha_star_compilation(self):
        """Test whether AlphaStar can be built with all frameworks."""
        config = (alpha_star.AlphaStarConfig().environment(
            env="connect_four").training(
Esempio n. 3
0
            wr = self.win_rates[p] if p in self.win_rates else 0.0
            print(f"\t{p}: {wr}")
        print("Frozen policies:")
        for p in sorted(self.non_trainable_policies):
            wr = self.win_rates[p] if p in self.win_rates else 0.0
            print(f"\t{p}: {wr}")
        print()


if __name__ == "__main__":
    ray.init(
        num_cpus=args.num_cpus or None,
        include_dashboard=False,
    )

    register_env("open_spiel_env", lambda _: OpenSpielEnv(pyspiel.load_game(args.env)))

    def policy_mapping_fn(agent_id, episode, worker, **kwargs):
        # At first, only have main play against the random main exploiter.
        return "main" if episode.episode_id % 2 == agent_id else "main_exploiter_0"

    config = {
        "env": "open_spiel_env",
        "callbacks": LeagueBasedSelfPlayCallback,
        "num_sgd_iter": 20,
        "num_envs_per_worker": 5,
        "multiagent": {
            # Initial policy map: All PPO. This will be expanded
            # to more policy snapshots. This is done in the
            # custom callback defined above (`LeagueBasedSelfPlayCallback`).
            "policies": {