from ray import tune from toolbox.dice.dice import DiCETrainer from toolbox.marl import get_marl_env_config from toolbox.train import train, get_train_parser if __name__ == '__main__': parser = get_train_parser() parser.add_argument("--num-agents", type=int, default=10) args = parser.parse_args() env_name = args.env_name exp_name = "{}-{}".format(args.exp_name, env_name) stop = int(5e7) config = { "num_sgd_iter": 10, "num_envs_per_worker": 1, "entropy_coeff": 0.001, "lambda": 0.95, "lr": 2.5e-4, # 'sample_batch_size': 200 if large else 50, # 'sgd_minibatch_size': 100 if large else 64, # 'train_batch_size': 10000 if large else 2048, "num_gpus": 1, "num_cpus_per_worker": 1, "num_cpus_for_driver": 2, 'num_workers': 16 }
from ray import tune from toolbox.train import train, get_train_parser if __name__ == '__main__': args = get_train_parser().parse_args() exp_name = args.exp_name # It's "12230-ppo..." previously.... env_name = args.env_name stop = int(1e6) sac_config = { "env": tune.grid_search([ "HalfCheetah-v3", # "Walker2d-v3", "Ant-v3", # "Hopper-v3", "Humanoid-v3" ]), "horizon": 1000, "sample_batch_size": 1, "train_batch_size": 256, "target_network_update_freq": 1, "timesteps_per_iteration": 1000, "learning_starts":