NORMALIZE_ADVANTAGE: tune.grid_search([False]), "env": MultiAgentEnvWrapper, "env_config": { "env_name": args.env_name, "num_agents": tune.grid_search([1, 3, 7, 10]) }, # should be fixed "kl_coeff": 1.0, "num_sgd_iter": 10, "lr": 0.0002, 'sample_batch_size': 50, 'sgd_minibatch_size': 64, 'train_batch_size': 2048, "num_gpus": 0.4, "num_cpus_per_worker": 1, "num_cpus_for_driver": 1, "num_envs_per_worker": 5, 'num_workers': 1, } train(extra_config=walker_config, trainer=DECETrainer, env_name=walker_config['env_config']['env_name'], stop={"timesteps_total": int(5e6) if not test else 2000}, exp_name=exp_name, num_agents=walker_config['env_config']['num_agents'], num_seeds=args.num_seeds, num_gpus=args.num_gpus, test_mode=args.test, verbose=1)
"num_sgd_iter": 20, "lr": 0.0001, "horizon": 5000, 'sample_batch_size': 200, 'sgd_minibatch_size': 10000, 'train_batch_size': 100000, "num_gpus": 1, "num_cpus_per_worker": 1, "num_cpus_for_driver": 1, "num_envs_per_worker": 8, 'num_workers': 24, "object_store_memory": int(5 * GB), "memory": int(25 * GB) } train( extra_config=walker_config, trainer=DECETrainer, env_name=walker_config['env_config']['env_name'], stop={"timesteps_total": int(5e8)}, exp_name=exp_name, num_agents=walker_config['env_config']['num_agents'], num_seeds=args.num_seeds, num_gpus=args.num_gpus, test_mode=args.test, verbose=1, init_memory=int(70 * GB), init_object_store_memory=int(15 * GB), init_redis_max_memory=int(6 * GB), )
"env_config": { "env_name": "Walker2d-v3", "num_agents": args.num_agents }, # should be fixed "kl_coeff": 1.0, "num_sgd_iter": 20, "lr": 0.0001, 'sample_batch_size': 256, 'sgd_minibatch_size': 4096, 'train_batch_size': 65536, "num_gpus": 0.5, "num_cpus_per_worker": 0.5, "num_envs_per_worker": 16, 'num_workers': 8, } config = humanoid_config if not args.walker else walker_config train( extra_config=config, trainer=PPOESTrainer, env_name=config['env_config']['env_name'], stop={"timesteps_total": int(2e8) if not args.walker else int(1e7)}, exp_name="DELETEME-TEST" if args.test else args.exp_name, num_agents=args.num_agents if not args.test else 3, num_seeds=args.num_seeds, num_gpus=args.num_gpus, test_mode=args.test, # address=args.address if args.address else None )
"env": MultiAgentEnvWrapper, "env_config": { "env_name": "Walker2d-v3", "num_agents": tune.grid_search([3, 5, 10]) }, # should be fixed "kl_coeff": 1.0, "num_sgd_iter": 20, "lr": 0.0001, 'sample_batch_size': 256, 'sgd_minibatch_size': 4096, 'train_batch_size': 65536, "num_gpus": 0.45, "num_cpus_per_worker": 0.5, "num_cpus_for_driver": 0.5, "num_envs_per_worker": 16, 'num_workers': 8, } train( extra_config=walker_config, trainer=PPOESTrainer, env_name=walker_config['env_config']['env_name'], stop={"timesteps_total": int(5e7)}, exp_name=exp_name, num_agents=walker_config['env_config']['num_agents'], num_seeds=3, num_gpus=4, test_mode=False )