'sgd_minibatch_size': 100 if large else 64, 'train_batch_size': 10000 if large else 2048, "num_gpus": 0.25, "num_cpus_per_worker": args.num_cpus_per_worker, "num_cpus_for_driver": args.num_cpus_for_driver, "num_envs_per_worker": 8 if large else 10, 'num_workers': 8 if large else 1, "callbacks": {"on_train_result": None} } if env_name == "FetchPush-v1": stop = int(3e6) config.update( num_workers=8, num_envs_per_worker=10, gamma=0.95, lr=5e-4, delay_update=not args.no_delay_update ) train( "PPO", config=config, stop=stop, exp_name=exp_name, num_seeds=args.num_seeds, num_gpus=args.num_gpus, test_mode=args.test, keep_checkpoints_num=10 )
stop = int(5e7) config = { "num_sgd_iter": 10, "num_envs_per_worker": 1, "entropy_coeff": 0.001, "lambda": 0.95, "lr": 2.5e-4, # 'sample_batch_size': 200 if large else 50, # 'sgd_minibatch_size': 100 if large else 64, # 'train_batch_size': 10000 if large else 2048, "num_gpus": 1, "num_cpus_per_worker": 1, "num_cpus_for_driver": 2, 'num_workers': 16 } config.update( get_marl_env_config(env_name, tune.grid_search([args.num_agents]))) train( DiCETrainer, config=config, stop=stop, exp_name=exp_name, num_seeds=args.num_seeds, num_gpus=args.num_gpus, test_mode=args.test, )
1000, "sample_batch_size": 1, "train_batch_size": 256, "target_network_update_freq": 1, "timesteps_per_iteration": 1000, "learning_starts": 10000, "clip_actions": False, "normalize_actions": True, "evaluation_interval": 1, "metrics_smoothing_episodes": 5, "num_cpus_for_driver": 2, } train("SAC", exp_name=exp_name, keep_checkpoints_num=5, stop=stop, config=sac_config, num_gpus=args.num_gpus, num_seeds=args.num_seeds)
"num_sgd_iter": 10, "lr": tune.grid_search([0.0001, 5e-5]), 'rollout_fragment_length': 50, 'sgd_minibatch_size': 64, 'train_batch_size': 2048, "num_gpus": 0.2, "num_envs_per_worker": 5, 'num_workers': 7, "num_cpus_per_worker": 0.5, "num_cpus_for_driver": 0.5, constants.USE_BISECTOR: tune.grid_search([False]), constants.ONLY_TNB: tune.grid_search([True, False]), # "vf_ratio_clip_param": tune.grid_search([0.05]), } config.update( get_marl_env_config(env_name, tune.grid_search([args.num_agents])) ) train( DiCETrainer, config=config, stop=stop, exp_name=exp_name, num_seeds=args.num_seeds, num_gpus=args.num_gpus, test_mode=args.test, keep_checkpoints_num=5 )
config["model"]["custom_options"].pop("std_mode") if algo in ["ES", "ARS"]: config["num_gpus"] = 0 config["num_cpus_per_worker"] = 0.5 config["num_workers"] = 15 config["num_cpus_for_driver"] = 0.5 # # test # config["model"]["custom_options"]["num_components"] = 2 # initialize_ray(test_mode=True, local_mode=True) # trainer = GaussianESTrainer(config=config, env="BipedalWalker-v2") if args.redis_password: from toolbox import initialize_ray import os initialize_ray(address=os.environ["ip_head"], test_mode=args.test, redis_password=args.redis_password) train(get_dynamic_trainer(algo), config=config, stop=stop, exp_name=exp_name, num_seeds=args.num_seeds, num_gpus=args.num_gpus, test_mode=test, keep_checkpoints_num=5, start_seed=args.start_seed)
"kl_coeff": 1.0, "num_sgd_iter": 10, "lr": 0.0001, 'rollout_fragment_length': 200 if large else 50, 'sgd_minibatch_size': 100 if large else 64, 'train_batch_size': 10000 if large else 2048, "num_gpus": 0.25, "num_cpus_per_worker": args.num_cpus_per_worker, "num_cpus_for_driver": args.num_cpus_for_driver, "num_envs_per_worker": 8 if large else 10, 'num_workers': 8 if large else 1, "callbacks": { "on_train_result": None } } if args.ppo: config["env"] = env_name else: config.update( get_marl_env_config(env_name, tune.grid_search([args.num_agents]))) train("PPO" if args.ppo else DiCETrainer, config=config, stop=stop, exp_name=exp_name, num_seeds=args.num_seeds, num_gpus=args.num_gpus, test_mode=args.test, keep_checkpoints_num=3)
8, "entropy_coeff": 0.001, "lambda": 0.95, "lr": 2.5e-4, "num_gpus": 1, # Force to run 2 concurrently "evolution": { "train_batch_size": 4000, # The same as PPO "num_workers": 10, # default is 10, "num_cpus_per_worker": 0.5, "optimizer_type": "adam" if args.adam_optimizer else "sgd" }, # locomotion config "kl_coeff": 1.0, } train(EPTrainer, config=config, stop=1e7, exp_name=exp_name, num_seeds=args.num_seeds, test_mode=args.test, checkpoint_freq=args.checkpoint_freq, start_seed=args.start_seed, verbose=1)