def regression_test(local_mode=False): num_agents = 3 local_dir = tempfile.mkdtemp() initialize_ray(test_mode=True, local_mode=local_mode) train(DiCESACTrainer, { "gamma": 0.95, "target_network_update_freq": 32, "tau": 1.0, "train_batch_size": 200, "rollout_fragment_length": 50, "optimization": { "actor_learning_rate": 0.005, "critic_learning_rate": 0.005, "entropy_learning_rate": 0.0001 }, **get_marl_env_config("CartPole-v0", num_agents, normalize_actions=False) }, {"episode_reward_mean": 150 * num_agents}, exp_name="DELETEME", local_dir=local_dir, test_mode=True) shutil.rmtree(local_dir, ignore_errors=True)
def dice_sac_trainer(): initialize_ray(test_mode=True, local_mode=True) env_name = "BipedalWalker-v2" num_agents = 3 env = gym.make(env_name) trainer = DiCESACTrainer(get_marl_env_config(env_name, num_agents, normalize_actions=False), env=MultiAgentEnvWrapper) return env, trainer
def regression_test2(local_mode=False): from ray import tune num_agents = 3 local_dir = tempfile.mkdtemp() initialize_ray(test_mode=True, local_mode=local_mode) train( DiCESACTrainer, { "soft_horizon": True, "clip_actions": False, "normalize_actions": False, # <<== Handle in MARL env "metrics_smoothing_episodes": 5, "no_done_at_end": True, "train_batch_size": 1000, "rollout_fragment_length": 50, constants.DELAY_UPDATE: tune.grid_search([True, False]), # constants.NOR: tune.grid_search([True, False]), # "optimization": { # "actor_learning_rate": 0.005, # "critic_learning_rate": 0.005, # "entropy_learning_rate": 0.0001 # }, **get_marl_env_config("Pendulum-v0", num_agents, normalize_actions=True) }, { "episode_reward_mean": -300 * num_agents, "timesteps_total": 13000 * num_agents }, exp_name="DELETEME", local_dir=local_dir, test_mode=True) shutil.rmtree(local_dir, ignore_errors=True)
stop = int(5e7) config = { "num_sgd_iter": 10, "num_envs_per_worker": 1, "entropy_coeff": 0.001, "lambda": 0.95, "lr": 2.5e-4, # 'sample_batch_size': 200 if large else 50, # 'sgd_minibatch_size': 100 if large else 64, # 'train_batch_size': 10000 if large else 2048, "num_gpus": 1, "num_cpus_per_worker": 1, "num_cpus_for_driver": 2, 'num_workers': 16 } config.update( get_marl_env_config(env_name, tune.grid_search([args.num_agents]))) train( DiCETrainer, config=config, stop=stop, exp_name=exp_name, num_seeds=args.num_seeds, num_gpus=args.num_gpus, test_mode=args.test, )
1, "timesteps_per_iteration": 1000, "learning_starts": 10000, "clip_actions": True, # "normalize_actions": True, <<== This is handled by MARL env "evaluation_interval": 1, "metrics_smoothing_episodes": 5, } config.update( get_marl_env_config(config["env"], args.num_agents, normalize_actions=True)) config["evaluation_config"] = dict( # env_config=config["env_config"], explore=False) train(DiCESACTrainer, config=config, stop=stop, exp_name=exp_name, num_seeds=args.num_seeds, num_gpus=args.num_gpus, test_mode=args.test, keep_checkpoints_num=5)
dice_utils.TWO_SIDE_CLIP_LOSS: False, dice_utils.ONLY_TNB: True, dice_utils.NORMALIZE_ADVANTAGE: True, # May be need to set false } ) DiESTrainer = DiCETrainer.with_updates( name="DiES", default_config=dies_default_config, after_train_result=run_evolution_strategies ) if __name__ == '__main__': env_name = "CartPole-v0" num_agents = 3 config = { "num_sgd_iter": 2, "train_batch_size": 400, "update_steps": 1000, **get_marl_env_config(env_name, num_agents) } initialize_ray(test_mode=True, local_mode=True) train( DiESTrainer, config, exp_name="DELETE_ME_TEST", stop={"timesteps_total": 10000}, test_mode=True )