def test_multiple_num_agents(local_mode=False): num_gpus = 0 initialize_ray(test_mode=True, local_mode=local_mode, num_gpus=num_gpus) config = _get_default_test_config( tune.grid_search([2, 3, 4]), "BipedalWalker-v2", num_gpus ) return tune.run( CEPPOTrainer, local_dir=get_local_dir(), name="DELETEME_TEST_extra_loss_ppo_trainer", stop={"timesteps_total": 5000}, config=config )
def test_cetd3(local_mode=False): num_gpus = 0 initialize_ray(test_mode=True, local_mode=local_mode, num_gpus=num_gpus) config = _get_default_test_config( num_agents=3, env_name="BipedalWalker-v2", num_gpus=num_gpus ) if "num_sgd_iter" in config: config.pop("num_sgd_iter") config.pop("sgd_minibatch_size") config['timesteps_per_iteration'] = 80 config['pure_exploration_steps'] = 80 config['learning_starts'] = 180 tune.run( CETD3Trainer, local_dir=get_local_dir(), name="DELETEME_TEST_extra_loss_ppo_trainer", stop={"timesteps_total": 2000}, config=config )
def _base( trainer, local_mode=False, extra_config=None, t=500, env_name="BipedalWalker-v2", num_agents=3 ): # num_agents = 3 num_gpus = 0 initialize_ray(test_mode=True, local_mode=local_mode, num_gpus=num_gpus) config = _get_default_test_config(num_agents, env_name, num_gpus) if extra_config: config.update(extra_config) stop = {"timesteps_total": t} if not isinstance(t, dict) else t return tune.run( trainer, local_dir=get_local_dir(), name="DELETEME_TEST_extra_loss_ppo_trainer", stop=stop, config=config )
from ray.tune.registry import register_env def make_pybullet(_=None): import pybullet_envs import gym print("Successfully import pybullet and found: ", pybullet_envs.getList()) return gym.make(env_name) register_env(env_name, make_pybullet) analysis = tune.run( "PPO", local_dir=get_local_dir(), name=exp_name, checkpoint_freq=10, keep_checkpoints_num=10, checkpoint_score_attr="episode_reward_mean", checkpoint_at_end=True, stop={"info/num_steps_sampled": stop} if isinstance(stop, int) else stop, config=walker_config, max_failures=20, reuse_actors=False ) path = "{}-{}-{}ts.pkl".format( exp_name, env_name, stop )
def train( extra_config, trainer, env_name, stop, exp_name, num_agents, num_seeds, num_gpus, num_cpus=None, test_mode=False, address=None, redis_password=None, clip_memory=False, init_memory=None, init_object_store_memory=None, init_redis_max_memory=None, **kwargs ): # assert isinstance(stop, int) if address is not None: num_gpus = None if clip_memory: init_memory = int(300 * GB) init_object_store_memory = int(100 * GB) init_redis_max_memory = int(50 * GB) initialize_ray( test_mode=test_mode, local_mode=False, num_gpus=num_gpus, address=address, redis_password=redis_password, memory=init_memory, object_store_memory=init_object_store_memory, redis_max_memory=init_redis_max_memory, num_cpus=num_cpus ) env_config = {"env_name": env_name, "num_agents": num_agents} config = { "seed": tune.grid_search([i * 100 for i in range(num_seeds)]), "env": MultiAgentEnvWrapper, "env_config": env_config, "log_level": "DEBUG" if test_mode else "INFO" } if extra_config: config.update(extra_config) analysis = tune.run( trainer, local_dir=get_local_dir(), name=exp_name, checkpoint_freq=10, keep_checkpoints_num=10, checkpoint_score_attr="episode_reward_mean", checkpoint_at_end=True, stop={"info/num_steps_sampled": stop} if isinstance(stop, int) else stop, config=config, max_failures=20, reuse_actors=False, **kwargs ) path = "{}-{}-{}ts-{}agents.pkl".format( exp_name, env_name, stop, num_agents ) with open(path, "wb") as f: data = analysis.fetch_trial_dataframes() pickle.dump(data, f) print("Result is saved at: <{}>".format(path)) return analysis