def regression_test(local_mode=False): num_agents = 3 local_dir = tempfile.mkdtemp() initialize_ray(test_mode=True, local_mode=local_mode) train(DiCESACTrainer, { "gamma": 0.95, "target_network_update_freq": 32, "tau": 1.0, "train_batch_size": 200, "rollout_fragment_length": 50, "optimization": { "actor_learning_rate": 0.005, "critic_learning_rate": 0.005, "entropy_learning_rate": 0.0001 }, **get_marl_env_config("CartPole-v0", num_agents, normalize_actions=False) }, {"episode_reward_mean": 150 * num_agents}, exp_name="DELETEME", local_dir=local_dir, test_mode=True) shutil.rmtree(local_dir, ignore_errors=True)
def regression_test2(local_mode=False): from ray import tune num_agents = 3 local_dir = tempfile.mkdtemp() initialize_ray(test_mode=True, local_mode=local_mode) train( DiCESACTrainer, { "soft_horizon": True, "clip_actions": False, "normalize_actions": False, # <<== Handle in MARL env "metrics_smoothing_episodes": 5, "no_done_at_end": True, "train_batch_size": 1000, "rollout_fragment_length": 50, constants.DELAY_UPDATE: tune.grid_search([True, False]), # constants.NOR: tune.grid_search([True, False]), # "optimization": { # "actor_learning_rate": 0.005, # "critic_learning_rate": 0.005, # "entropy_learning_rate": 0.0001 # }, **get_marl_env_config("Pendulum-v0", num_agents, normalize_actions=True) }, { "episode_reward_mean": -300 * num_agents, "timesteps_total": 13000 * num_agents }, exp_name="DELETEME", local_dir=local_dir, test_mode=True) shutil.rmtree(local_dir, ignore_errors=True)
elif difficulty == "easy": stop = int(1e6) config["env"] = tune.grid_search([ "BipedalWalker-v3", # 'ReacherBulletEnv-v0', # 'PusherBulletEnv-v0', # 'ThrowerBulletEnv-v0', # 'StrikerBulletEnv-v0', 'Walker2DBulletEnv-v0', 'HalfCheetahBulletEnv-v0', 'AntBulletEnv-v0', 'HopperBulletEnv-v0', ]) else: raise ValueError("args.set must in [hard, easy].") train( algo, config=config, stop=stop, exp_name=exp_name, num_seeds=args.num_seeds, test_mode=args.test, start_seed=args.start_seed, verbose=1, keep_checkpoints_num=10, # We will mainly run on CUHK cluster so we need to specify the local # directory to store things local_dir=args.local_dir)
'AntBulletEnv-v0', 'HopperBulletEnv-v0', ]), "train_batch_size": 4000, "num_workers": 10, "optimizer_type": tune.grid_search(["sgd", "adam"]), "num_gpus": 0, "lr": 2.5e-4, "episodes_per_batch": 1, "num_cpus_per_worker": 0.5 } run = GaussianESTrainer stop = int(1e8) train(run, stop=int(args.stop), verbose=1, config=config, exp_name=args.exp_name, num_seeds=args.num_seeds, num_gpus=args.num_gpus) print("Test finished! Cost time: ", time.time() - now)
dice_utils.TWO_SIDE_CLIP_LOSS: False, dice_utils.ONLY_TNB: True, dice_utils.NORMALIZE_ADVANTAGE: True, # May be need to set false } ) DiESTrainer = DiCETrainer.with_updates( name="DiES", default_config=dies_default_config, after_train_result=run_evolution_strategies ) if __name__ == '__main__': env_name = "CartPole-v0" num_agents = 3 config = { "num_sgd_iter": 2, "train_batch_size": 400, "update_steps": 1000, **get_marl_env_config(env_name, num_agents) } initialize_ray(test_mode=True, local_mode=True) train( DiESTrainer, config, exp_name="DELETE_ME_TEST", stop={"timesteps_total": 10000}, test_mode=True )