Esempio n. 1
0
        NORMALIZE_ADVANTAGE: tune.grid_search([False]),
        "env": MultiAgentEnvWrapper,
        "env_config": {
            "env_name": args.env_name,
            "num_agents": tune.grid_search([1, 3, 7, 10])
        },

        # should be fixed
        "kl_coeff": 1.0,
        "num_sgd_iter": 10,
        "lr": 0.0002,
        'sample_batch_size': 50,
        'sgd_minibatch_size': 64,
        'train_batch_size': 2048,
        "num_gpus": 0.4,
        "num_cpus_per_worker": 1,
        "num_cpus_for_driver": 1,
        "num_envs_per_worker": 5,
        'num_workers': 1,
    }
    train(extra_config=walker_config,
          trainer=DECETrainer,
          env_name=walker_config['env_config']['env_name'],
          stop={"timesteps_total": int(5e6) if not test else 2000},
          exp_name=exp_name,
          num_agents=walker_config['env_config']['num_agents'],
          num_seeds=args.num_seeds,
          num_gpus=args.num_gpus,
          test_mode=args.test,
          verbose=1)
Esempio n. 2
0
        "num_sgd_iter": 20,
        "lr": 0.0001,
        "horizon": 5000,
        'sample_batch_size': 200,
        'sgd_minibatch_size': 10000,
        'train_batch_size': 100000,
        "num_gpus": 1,
        "num_cpus_per_worker": 1,
        "num_cpus_for_driver": 1,
        "num_envs_per_worker": 8,
        'num_workers': 24,
        "object_store_memory": int(5 * GB),
        "memory": int(25 * GB)
    }

    train(
        extra_config=walker_config,
        trainer=DECETrainer,
        env_name=walker_config['env_config']['env_name'],
        stop={"timesteps_total": int(5e8)},
        exp_name=exp_name,
        num_agents=walker_config['env_config']['num_agents'],
        num_seeds=args.num_seeds,
        num_gpus=args.num_gpus,
        test_mode=args.test,
        verbose=1,
        init_memory=int(70 * GB),
        init_object_store_memory=int(15 * GB),
        init_redis_max_memory=int(6 * GB),
    )
Esempio n. 3
0
        "env_config": {
            "env_name": "Walker2d-v3",
            "num_agents": args.num_agents
        },

        # should be fixed
        "kl_coeff": 1.0,
        "num_sgd_iter": 20,
        "lr": 0.0001,
        'sample_batch_size': 256,
        'sgd_minibatch_size': 4096,
        'train_batch_size': 65536,
        "num_gpus": 0.5,
        "num_cpus_per_worker": 0.5,
        "num_envs_per_worker": 16,
        'num_workers': 8,
    }
    config = humanoid_config if not args.walker else walker_config
    train(
        extra_config=config,
        trainer=PPOESTrainer,
        env_name=config['env_config']['env_name'],
        stop={"timesteps_total": int(2e8) if not args.walker else int(1e7)},
        exp_name="DELETEME-TEST" if args.test else args.exp_name,
        num_agents=args.num_agents if not args.test else 3,
        num_seeds=args.num_seeds,
        num_gpus=args.num_gpus,
        test_mode=args.test,
        # address=args.address if args.address else None
    )
Esempio n. 4
0
        "env": MultiAgentEnvWrapper,
        "env_config": {
            "env_name": "Walker2d-v3",
            "num_agents": tune.grid_search([3, 5, 10])
        },

        # should be fixed
        "kl_coeff": 1.0,
        "num_sgd_iter": 20,
        "lr": 0.0001,
        'sample_batch_size': 256,
        'sgd_minibatch_size': 4096,
        'train_batch_size': 65536,
        "num_gpus": 0.45,
        "num_cpus_per_worker": 0.5,
        "num_cpus_for_driver": 0.5,
        "num_envs_per_worker": 16,
        'num_workers': 8,
    }
    train(
        extra_config=walker_config,
        trainer=PPOESTrainer,
        env_name=walker_config['env_config']['env_name'],
        stop={"timesteps_total": int(5e7)},
        exp_name=exp_name,
        num_agents=walker_config['env_config']['num_agents'],
        num_seeds=3,
        num_gpus=4,
        test_mode=False
    )