Ejemplo n.º 1
0
def _validate_base(
        extra_config,
        test_mode,
        env_name,
        trainer,
        stop=50000,
        name="DELETEME_TEST",
        num_gpus=0
):
    initialize_ray(test_mode=test_mode, local_mode=True, num_gpus=num_gpus)
    num_agents = 3
    # policy_names = ["agent{}".format(i) for i in range(num_agents)]
    env_config = {"env_name": env_name, "num_agents": num_agents}
    # env = MultiAgentEnvWrapper(env_config)
    config = {
        "seed": 0,
        "env": MultiAgentEnvWrapper,
        "env_config": env_config,
        # "multiagent": {
        # "policies": {
        #     i: (None, env.observation_space, env.action_space, {})
        #     for i in policy_names
        # },
        # "policy_mapping_fn": lambda x: x,
        # },
    }
    if extra_config:
        config.update(extra_config)
    return tune.run(
        trainer,
        name=name,
        stop={"info/num_steps_sampled": stop},
        config=config
    )
Ejemplo n.º 2
0
def regression_test(local_mode=False):
    num_agents = 3
    local_dir = tempfile.mkdtemp()
    initialize_ray(test_mode=True, local_mode=local_mode)
    train(DiCESACTrainer, {
        "gamma":
        0.95,
        "target_network_update_freq":
        32,
        "tau":
        1.0,
        "train_batch_size":
        200,
        "rollout_fragment_length":
        50,
        "optimization": {
            "actor_learning_rate": 0.005,
            "critic_learning_rate": 0.005,
            "entropy_learning_rate": 0.0001
        },
        **get_marl_env_config("CartPole-v0",
                              num_agents,
                              normalize_actions=False)
    }, {"episode_reward_mean": 150 * num_agents},
          exp_name="DELETEME",
          local_dir=local_dir,
          test_mode=True)
    shutil.rmtree(local_dir, ignore_errors=True)
Ejemplo n.º 3
0
def profile():
    """This function is use to profile the efficiency of agent restoring."""

    initialize_ray(num_gpus=4, test_mode=True, local_mode=True)
    ckpt = {
        'path':
        "~/ray_results/0810-20seeds/"
        "PPO_BipedalWalker-v2_0_seed=20_2019"
        "-08-10_16-54-37xaa2muqm/checkpoint_469/checkpoint-469",
        'run_name':
        "PPO",
        'env_name':
        "BipedalWalker-v2"
    }
    num_agents = 20
    master_agents = OrderedDict()
    for i in range(num_agents):
        ckpt.update(name=i)
        agent = MaskSymbolicAgent(ckpt)
        master_agents[i] = copy.deepcopy(agent)

    for i, (name, agent) in enumerate(master_agents.items()):
        print("[{}/{}] RESTORE AGENTS: NAME {}".format(i, num_agents, name))
        a = agent.get()
        print(a)
Ejemplo n.º 4
0
def test_reference_consistency():
    initialize_ray(test_mode=True, local_mode=False)
    algos = ["PPO", "ES", "A2C", "A3C", "IMPALA", "ARS"]
    rws = {}
    for i, algo in enumerate(algos):
        trainer = get_dynamic_trainer(
            algo, 10000, "BipedalWalker-v2")(config={
                "env": "BipedalWalker-v2",
                "seed": i * 1000 + 789
            })
        rw = {
            k: v
            for k, v in
            trainer._reference_agent_weights['default_policy'].items()
            if "value" not in k
        }
        rws[algo] = rw
    ks = list(rws)
    first_weight_dict = next(iter(rws.values()))
    for weight_name in first_weight_dict.keys():
        print("Current weight name: ", weight_name)
        for weight_dict_name in ks[1:]:
            weight_dict = rws[weight_dict_name]
            assert_equal(first_weight_dict[weight_name],
                         weight_dict[weight_name])
Ejemplo n.º 5
0
    def __init__(self, ckpt, existing_agent=None):
        if not ray.is_initialized():
            initialize_ray(num_gpus=0)

        with open(osp.join(osp.dirname(osp.dirname(ckpt)), "params.json"),
                  "rb") as f:
            config = json.load(f)

        config["num_workers"] = 0
        config["num_cpus_per_worker"] = 1
        config["num_cpus_for_driver"] = 1

        self.config = config
        self.config_env = gym.make(self.config["env"])
        self.action_dim = self.config_env.action_space.shape[0]
        self.k = config["model"]["custom_options"]["num_components"]

        self.is_deterministic = config["model"]["custom_action_dist"] == \
                                DeterministicMixture.name
        self.expect_logit_length = (self.k * (1 + 2 * self.action_dim)
                                    if not self.is_deterministic else self.k *
                                    (1 + self.action_dim))

        assert osp.exists(ckpt)
        if existing_agent is not None:
            assert isinstance(existing_agent, MOGESAgent)
            existing_agent = existing_agent.agent
        agent = restore_agent(GaussianESTrainer,
                              ckpt,
                              "BipedalWalker-v2",
                              config,
                              existing_agent=existing_agent)
        self.agent = agent

        self._log_std = None
Ejemplo n.º 6
0
def _test_dice(extra_config={},
               local_mode=False,
               num_agents=3,
               env_name="BipedalWalker-v2",
               t=2000):
    num_gpus = 0
    initialize_ray(test_mode=True, local_mode=local_mode, num_gpus=num_gpus)

    # default config
    env_config = {"env_name": env_name, "num_agents": num_agents}
    config = {
        "env": MultiAgentEnvWrapper,
        "env_config": env_config,
        "num_gpus": num_gpus,
        "log_level": "DEBUG",
        "rollout_fragment_length": 20,
        "train_batch_size": 100,
        "sgd_minibatch_size": 60,
        "num_sgd_iter": 3,
        "num_workers": 1
    }

    if extra_config:
        config.update(extra_config)
    stop = {"timesteps_total": t} if not isinstance(t, dict) else t
    dir_path = tempfile.mkdtemp()
    ret = tune.run(DiCETrainer,
                   local_dir=dir_path,
                   name="DELETEME_TEST_extra_loss_ppo_trainer",
                   stop=stop,
                   config=config,
                   verbose=2,
                   max_failures=0)
    shutil.rmtree(dir_path, ignore_errors=True)
    return ret
Ejemplo n.º 7
0
def test_restore():
    from toolbox.evaluate import restore_agent
    initialize_ray()
    ckpt = "~/ray_results/1114-tnb_4in1" \
           "/TNBPPO_MultiAgentEnvWrapper_2_novelty_mode=min," \
           "use_joint_dataset=False_2019-11-14_10-30-29l456mu0o" \
           "/checkpoint_60/checkpoint-60"
    marl_agent = restore_agent(TNBPPOTrainer, ckpt, MultiAgentEnvWrapper)
Ejemplo n.º 8
0
 def ray_init():
     ray.shutdown()
     initialize_ray(
         test_mode=args.test_mode,
         local_mode=False,
         num_gpus=args.num_gpus if not args.address else None,
         redis_address=args.address if args.address else None
     )
Ejemplo n.º 9
0
def ep_trainer(request):
    initialize_ray(test_mode=True, local_mode=True)
    return EPTrainer(env="BipedalWalker-v2",
                     config=dict(num_sgd_iter=2,
                                 train_batch_size=400,
                                 evolution=dict(episodes_per_batch=20,
                                                train_batch_size=400,
                                                noise_size=20000000),
                                 fuse_mode=request.param))
Ejemplo n.º 10
0
def dice_sac_trainer():
    initialize_ray(test_mode=True, local_mode=True)
    env_name = "BipedalWalker-v2"
    num_agents = 3
    env = gym.make(env_name)
    trainer = DiCESACTrainer(get_marl_env_config(env_name,
                                                 num_agents,
                                                 normalize_actions=False),
                             env=MultiAgentEnvWrapper)
    return env, trainer
Ejemplo n.º 11
0
def test_train_ipd(local_mode=False):
    initialize_ray(test_mode=True, local_mode=local_mode)
    env_name = "CartPole-v0"
    # env_name = "BipedalWalker-v2"
    config = {"num_sgd_iter": 2, "env": env_name, "novelty_threshold": 0.5}
    tune.run(TNBTrainer,
             name="DELETEME_TEST",
             verbose=2,
             checkpoint_freq=10,
             checkpoint_at_end=True,
             stop={"timesteps_total": 50000},
             config=config)
Ejemplo n.º 12
0
def test_multiple_num_agents(local_mode=False):
    num_gpus = 0
    initialize_ray(test_mode=True, local_mode=local_mode, num_gpus=num_gpus)
    config = _get_default_test_config(
        tune.grid_search([2, 3, 4]), "BipedalWalker-v2", num_gpus
    )
    return tune.run(
        CEPPOTrainer,
        local_dir=get_local_dir(),
        name="DELETEME_TEST_extra_loss_ppo_trainer",
        stop={"timesteps_total": 5000},
        config=config
    )
Ejemplo n.º 13
0
def test_maddpg_custom_metrics():
    extra_config = {
        "env": IPDEnv,
        "env_config": {
            "env_name": "BipedalWalker-v2",
            "novelty_threshold": 0.0,
            "yaml_path": os.path.abspath("../data/yaml/test-2-agents.yaml")
        },
        "callbacks": {
            "on_episode_end": on_episode_end
        },
    }
    initialize_ray(test_mode=True, local_mode=False)
    tune.run("PPO", stop={"training_iteration": 10}, config=extra_config)
Ejemplo n.º 14
0
def _test_blackbox(algo):
    initialize_ray(test_mode=True)
    config = {"env": "BipedalWalker-v2"}
    if algo == "ES":
        config['num_workers'] = 2
    dir_path = tempfile.mkdtemp()
    trainer = get_dynamic_trainer(algo, 10000, "BipedalWalker-v2")
    ret = tune.run(trainer,
                   local_dir=dir_path,
                   stop={"training_iteration": 10},
                   config=config,
                   verbose=2,
                   max_failures=0)
    shutil.rmtree(dir_path, ignore_errors=True)
    return ret
Ejemplo n.º 15
0
def gaussian_mixture_trainer(request):
    initialize_ray(test_mode=True, local_mode=False)
    trainer = PGTrainer(env="BipedalWalker-v2",
                        config={
                            "model": {
                                "custom_action_dist": GaussianMixture.name,
                                "custom_options": {
                                    "num_components": request.param
                                }
                            },
                            "num_workers": 0,
                            "train_batch_size": 300,
                            "sample_batch_size": 100
                        })
    return trainer
Ejemplo n.º 16
0
def deterministic_mixture_trainer(request):
    initialize_ray(test_mode=True, local_mode=True)
    trainer = GaussianESTrainer(env="BipedalWalker-v2",
                                config={
                                    "model": {
                                        "custom_action_dist":
                                        DeterministicMixture.name,
                                        "custom_options": {
                                            "num_components": request.param
                                        }
                                    },
                                    "num_workers": 1,
                                    "episodes_per_batch": 1,
                                    "train_batch_size": 300,
                                    "sample_batch_size": 100
                                })
    return trainer
Ejemplo n.º 17
0
def test_cetd3(local_mode=False):
    num_gpus = 0
    initialize_ray(test_mode=True, local_mode=local_mode, num_gpus=num_gpus)
    config = _get_default_test_config(
        num_agents=3, env_name="BipedalWalker-v2", num_gpus=num_gpus
    )
    if "num_sgd_iter" in config:
        config.pop("num_sgd_iter")
    config.pop("sgd_minibatch_size")
    config['timesteps_per_iteration'] = 80
    config['pure_exploration_steps'] = 80
    config['learning_starts'] = 180
    tune.run(
        CETD3Trainer,
        local_dir=get_local_dir(),
        name="DELETEME_TEST_extra_loss_ppo_trainer",
        stop={"timesteps_total": 2000},
        config=config
    )
Ejemplo n.º 18
0
def regression_test2(local_mode=False):
    from ray import tune
    num_agents = 3
    local_dir = tempfile.mkdtemp()
    initialize_ray(test_mode=True, local_mode=local_mode)
    train(
        DiCESACTrainer,
        {
            "soft_horizon":
            True,
            "clip_actions":
            False,
            "normalize_actions":
            False,  # <<== Handle in MARL env
            "metrics_smoothing_episodes":
            5,
            "no_done_at_end":
            True,
            "train_batch_size":
            1000,
            "rollout_fragment_length":
            50,
            constants.DELAY_UPDATE:
            tune.grid_search([True, False]),
            # constants.NOR: tune.grid_search([True, False]),

            # "optimization": {
            #     "actor_learning_rate": 0.005,
            #     "critic_learning_rate": 0.005,
            #     "entropy_learning_rate": 0.0001
            # },
            **get_marl_env_config("Pendulum-v0",
                                  num_agents,
                                  normalize_actions=True)
        },
        {
            "episode_reward_mean": -300 * num_agents,
            "timesteps_total": 13000 * num_agents
        },
        exp_name="DELETEME",
        local_dir=local_dir,
        test_mode=True)
    shutil.rmtree(local_dir, ignore_errors=True)
Ejemplo n.º 19
0
def train(algo,
          init_seed,
          extra_config,
          env_name,
          stop,
          exp_name,
          num_seeds,
          num_gpus,
          test_mode=False,
          **kwargs):
    initialize_ray(test_mode=test_mode, local_mode=False, num_gpus=num_gpus)
    config = {
        "seed": tune.grid_search([i * 100 for i in range(num_seeds)]),
        "env": env_name,
        "log_level": "DEBUG" if test_mode else "INFO"
    }
    if extra_config:
        config.update(extra_config)

    trainer = get_dynamic_trainer(algo, init_seed, env_name)
    analysis = tune.run(
        trainer,
        name=exp_name,
        checkpoint_freq=10 if not test_mode else None,
        keep_checkpoints_num=5 if not test_mode else None,
        checkpoint_score_attr="episode_reward_mean" if not test_mode else None,
        checkpoint_at_end=True if not test_mode else None,
        stop={"timesteps_total": stop} if isinstance(stop, int) else stop,
        config=config,
        max_failures=5,
        **kwargs)

    path = "{}-{}-{}ts-{}.pkl".format(exp_name, env_name, stop, algo)
    with open(path, "wb") as f:
        data = analysis.fetch_trial_dataframes()
        pickle.dump(data, f)
        print("Result is saved at: <{}>".format(path))

    return analysis
Ejemplo n.º 20
0
def test_agent_with_mask():
    initialize_ray(test_mode=True, local_mode=False)

    ckpt = "~/ray_results/0810-20seeds/PPO_BipedalWalker-v2_0_seed=20_2019" \
           "-08-10_16-54-37xaa2muqm/checkpoint_469/checkpoint-469"

    # ckpt = None

    ret_list = []

    agent = restore_agent_with_mask("PPO", ckpt, "BipedalWalker-v2")

    # agent.compute_action(np.ones(24))

    for i in range(10):
        test_reward = agent.train()
        print(pretty_print(test_reward))
        ret_list.append(test_reward)

    print("Test end")

    agent.get_policy().set_default({
        'fc_1_mask': np.ones([
            256,
        ]),
        'fc_2_mask': np.ones([
            256,
        ])
    })

    for i in range(10):
        test_reward2 = agent.train()
        print(pretty_print(test_reward2))
        ret_list.append(test_reward2)

    print("Test2 end")
    return test_reward, test_reward2, ret_list
Ejemplo n.º 21
0
def _base(
        trainer,
        local_mode=False,
        extra_config=None,
        t=500,
        env_name="BipedalWalker-v2",
        num_agents=3
):
    # num_agents = 3
    num_gpus = 0

    initialize_ray(test_mode=True, local_mode=local_mode, num_gpus=num_gpus)

    config = _get_default_test_config(num_agents, env_name, num_gpus)
    if extra_config:
        config.update(extra_config)
    stop = {"timesteps_total": t} if not isinstance(t, dict) else t
    return tune.run(
        trainer,
        local_dir=get_local_dir(),
        name="DELETEME_TEST_extra_loss_ppo_trainer",
        stop=stop,
        config=config
    )
Ejemplo n.º 22
0
def _test_basic(algo):
    initialize_ray(test_mode=True)
    trainer = get_dynamic_trainer(algo, 10000,
                                  "BipedalWalker-v2")(config={
                                      "env": "BipedalWalker-v2"
                                  })

    if algo in ["ES", "ARS"]:
        tw = {
            k: v
            for k, v in trainer.policy.variables.get_weights().items()
            if "value" not in k
        }
    elif algo in ["PPO", "A2C", "A3C", "IMPALA"]:
        tw = {
            k: v
            for k, v in trainer.get_weights()['default_policy'].items()
            if "value" not in k
        }

    rw = {
        k: v
        for k, v in trainer._reference_agent_weights['default_policy'].items()
        if "value" not in k
    }

    assert len(tw) == len(rw)

    twk = list(tw.keys())
    rwk = list(rw.keys())

    for i in range(len(tw)):
        arr1 = tw[twk[i]]
        arr2 = rw[rwk[i]]

        assert_equal(arr1, arr2)
Ejemplo n.º 23
0
def init_ray():
    initialize_ray(num_gpus=4,
                   test_mode=THIS_SCRIPT_IS_IN_TEST_MODE,
                   object_store_memory=40 * int(1e9))
Ejemplo n.º 24
0
 def ray_init():
     ray.shutdown()
     initialize_ray(test_mode=args.test,
                    num_gpus=num_gpus,
                    local_mode=False)
Ejemplo n.º 25
0
        "kl_coeff": 1.0,
        "num_sgd_iter": 20,
        "lr": 0.0002,
        'sample_batch_size': 200,
        'sgd_minibatch_size': 1000 if not is_humanoid else 4000,
        'train_batch_size': 10000 if not is_humanoid else 60000,
        "num_gpus": 1,
        "num_cpus_per_worker": 1,
        "num_cpus_for_driver": 1,
        "num_envs_per_worker": 8,
        'num_workers': 8 if not is_humanoid else 24,
    }

    initialize_ray(
        test_mode=False,
        local_mode=False,
        num_gpus=num_gpus if not args.address else None,
        address=args.address if args.address else None
    )

    if "Bullet" in env_name:
        from ray.tune.registry import register_env


        def make_pybullet(_=None):
            import pybullet_envs
            import gym
            print("Successfully import pybullet and found: ",
                  pybullet_envs.getList())
            return gym.make(env_name)

Ejemplo n.º 26
0

def wrap_stats_fn(policy, train_batch):
    ret = kl_and_loss_stats_modified(policy, train_batch)
    ret.update(novelty_loss_param=policy.novelty_loss_param,
               novelty_target=policy.novelty_target_tensor)
    return ret


AdaptiveExtraLossPPOTFPolicy = ExtraLossPPOTFPolicy.with_updates(
    name="AdaptiveExtraLossPPOTFPolicy",
    get_default_config=lambda: adaptive_extra_loss_ppo_default_config,
    before_loss_init=setup_mixins_modified,
    stats_fn=wrap_stats_fn,
    mixins=mixin_list + [AddLossMixin, NoveltyParamMixin])

AdaptiveExtraLossPPOTrainer = ExtraLossPPOTrainer.with_updates(
    name="AdaptiveExtraLossPPO",
    after_optimizer_step=wrap_after_train_result,
    validate_config=validate_config_basic,
    default_config=adaptive_extra_loss_ppo_default_config,
    default_policy=AdaptiveExtraLossPPOTFPolicy,
)

if __name__ == '__main__':
    from toolbox import initialize_ray

    print("Prepare to create AELPPO")
    initialize_ray(test_mode=True, num_gpus=0)
    AdaptiveExtraLossPPOTrainer(env="BipedalWalker-v2", config=None)
Ejemplo n.º 27
0
def register_mixture_action_distribution():
    ModelCatalog.register_custom_action_dist(GaussianMixture.name,
                                             GaussianMixture)
    ModelCatalog.register_custom_action_dist(DeterministicMixture.name,
                                             DeterministicMixture)
    print("Successfully register GaussianMixture and DeterministicMixture "
          "action distribution.")


register_mixture_action_distribution()

if __name__ == '__main__':
    from ray import tune
    from toolbox import initialize_ray

    initialize_ray(test_mode=True, local_mode=True)

    tune.run(
        "TD3",
        # PPOTrainerWithoutKL,
        local_dir="/tmp/ray",
        name="DELETE_ME_TEST",
        config={
            "env": "BipedalWalker-v2",
            "log_level": "DEBUG",
            "model": {
                "custom_action_dist": GaussianMixture.name,
                "custom_options": {
                    "num_components": 7
                }
            }
Ejemplo n.º 28
0
    parser.add_argument("--soft", action="store_true")  # default hard
    parser.add_argument("--ppo", action="store_true")
    parser.add_argument("--es", action="store_true")
    parser.add_argument("--es-optimizer", type=str, default="adam")
    parser.add_argument("--local-mode", "-lm", action="store_true")
    args = parser.parse_args()

    print(args)

    local_mode = args.local_mode
    env_name = "CartPole-v0"
    dir_path = tempfile.mkdtemp()
    now = time.time()
    num_gpus = 0

    initialize_ray(test_mode=True, local_mode=local_mode, num_gpus=1)

    config = {
        "env": env_name,
        "num_sgd_iter": 10,
        "num_gpus": num_gpus,
        "train_batch_size": 4000,
        "sample_batch_size": 200,
        "fuse_mode": SOFT_FUSE if args.soft else HARD_FUSE,
        "lr": 0.005,
        "evolution": {
            "train_batch_size": 4000,  # The same as PPO
            "num_workers": 10,  # default is 10,
            "optimizer_type": args.es_optimizer
        }
    }
Ejemplo n.º 29
0
def train(
        extra_config,
        trainer,
        env_name,
        stop,
        exp_name,
        num_agents,
        num_seeds,
        num_gpus,
        num_cpus=None,
        test_mode=False,
        address=None,
        redis_password=None,
        clip_memory=False,
        init_memory=None,
        init_object_store_memory=None,
        init_redis_max_memory=None,
        **kwargs
):
    # assert isinstance(stop, int)
    if address is not None:
        num_gpus = None

    if clip_memory:
        init_memory = int(300 * GB)
        init_object_store_memory = int(100 * GB)
        init_redis_max_memory = int(50 * GB)

    initialize_ray(
        test_mode=test_mode,
        local_mode=False,
        num_gpus=num_gpus,
        address=address,
        redis_password=redis_password,
        memory=init_memory,
        object_store_memory=init_object_store_memory,
        redis_max_memory=init_redis_max_memory,
        num_cpus=num_cpus
    )
    env_config = {"env_name": env_name, "num_agents": num_agents}
    config = {
        "seed": tune.grid_search([i * 100 for i in range(num_seeds)]),
        "env": MultiAgentEnvWrapper,
        "env_config": env_config,
        "log_level": "DEBUG" if test_mode else "INFO"
    }
    if extra_config:
        config.update(extra_config)

    analysis = tune.run(
        trainer,
        local_dir=get_local_dir(),
        name=exp_name,
        checkpoint_freq=10,
        keep_checkpoints_num=10,
        checkpoint_score_attr="episode_reward_mean",
        checkpoint_at_end=True,
        stop={"info/num_steps_sampled": stop}
        if isinstance(stop, int) else stop,
        config=config,
        max_failures=20,
        reuse_actors=False,
        **kwargs
    )

    path = "{}-{}-{}ts-{}agents.pkl".format(
        exp_name, env_name, stop, num_agents
    )
    with open(path, "wb") as f:
        data = analysis.fetch_trial_dataframes()
        pickle.dump(data, f)
        print("Result is saved at: <{}>".format(path))

    return analysis
Ejemplo n.º 30
0
    "seed": tune.grid_search([i * 100 for i in range(3)]),
    "env": "DeceptiveMaze-v0",

    "num_sgd_iter": 10,
    "lr": 0.001,
    'sample_batch_size': 16,
    'sgd_minibatch_size': 32,
    'train_batch_size': 512,
    "num_gpus": 0.2,
    "num_envs_per_worker": 4,
    'num_workers': 1,
    "model": {"fcnet_hiddens": [64, 64]}
}

initialize_ray(
    test_mode=False,
    local_mode=False,
    num_gpus=4)

num_agents = 4

dece_config = copy.deepcopy(ppo_default_config)
dece_config.update(
    {
        "env": MultiAgentEnvWrapper,
        "constrain_novelty": tune.grid_search(['hard', 'soft', None]),
        "env_config": {
            "env_name": ppo_default_config['env'],
            "num_agents": num_agents
        },
        "alpha_coefficient": tune.grid_search([0.05, 0.1, 0.01]),
        "tau": tune.grid_search([0.05, 0.1, 0.01]),