Esempio n. 1
0
    def test_gpus_in_non_local_mode(self):
        # Non-local mode.
        ray.init()

        actual_gpus = torch.cuda.device_count()
        print(f"Actual GPUs found (by torch): {actual_gpus}")

        config = A2C_DEFAULT_CONFIG.copy()
        config["num_workers"] = 2
        config["env"] = "CartPole-v0"

        # Expect errors when we run a config w/ num_gpus>0 w/o a GPU
        # and _fake_gpus=False.
        for num_gpus in [0, 0.1, 1, actual_gpus + 4]:
            # Only allow possible num_gpus_per_worker (so test would not
            # block infinitely due to a down worker).
            per_worker = [0] if actual_gpus == 0 or actual_gpus < num_gpus \
                else [0, 0.5, 1]
            for num_gpus_per_worker in per_worker:
                for fake_gpus in [False] + ([] if num_gpus == 0 else [True]):
                    config["num_gpus"] = num_gpus
                    config["num_gpus_per_worker"] = num_gpus_per_worker
                    config["_fake_gpus"] = fake_gpus

                    print(f"\n------------\nnum_gpus={num_gpus} "
                          f"num_gpus_per_worker={num_gpus_per_worker} "
                          f"_fake_gpus={fake_gpus}")

                    frameworks = ("tf", "torch") if num_gpus > 1 else \
                        ("tf2", "tf", "torch")
                    for _ in framework_iterator(config, frameworks=frameworks):
                        # Expect that trainer creation causes a num_gpu error.
                        if actual_gpus < num_gpus + 2 * num_gpus_per_worker \
                                and not fake_gpus:
                            # "Direct" RLlib (create Trainer on the driver).
                            # Cannot run through ray.tune.run() as it would
                            # simply wait infinitely for the resources to
                            # become available.
                            print("direct RLlib")
                            self.assertRaisesRegex(
                                RuntimeError,
                                "Found 0 GPUs on your machine",
                                lambda: A2CTrainer(config, env="CartPole-v0"),
                            )
                        # If actual_gpus >= num_gpus or faked,
                        # expect no error.
                        else:
                            print("direct RLlib")
                            trainer = A2CTrainer(config, env="CartPole-v0")
                            trainer.stop()
                            # Cannot run through ray.tune.run() w/ fake GPUs
                            # as it would simply wait infinitely for the
                            # resources to become available (even though, we
                            # wouldn't really need them).
                            if num_gpus == 0:
                                print("via ray.tune.run()")
                                tune.run("A2C",
                                         config=config,
                                         stop={"training_iteration": 0})
        ray.shutdown()
Esempio n. 2
0
def a2cTraining(env='MiniGrid-TrapMazeS9N5-v0',
                config=config,
                iterations=10000):
    trainer = A2CTrainer(env=env, config=config)
    for it in range(iterations):
        print("it {}".format(it))
        print(trainer.train())
Esempio n. 3
0
    def test_gpus_in_local_mode(self):
        # Local mode.
        ray.init(local_mode=True)

        actual_gpus_available = torch.cuda.device_count()

        config = A2C_DEFAULT_CONFIG.copy()
        config["num_workers"] = 2
        config["env"] = "CartPole-v0"

        # Expect no errors in local mode.
        for num_gpus in [0, 0.1, 1, actual_gpus_available + 4]:
            print(f"num_gpus={num_gpus}")
            for fake_gpus in [False, True]:
                print(f"_fake_gpus={fake_gpus}")
                config["num_gpus"] = num_gpus
                config["_fake_gpus"] = fake_gpus
                frameworks = ("tf", "torch") if num_gpus > 1 else \
                    ("tf2", "tf", "torch")
                for _ in framework_iterator(config, frameworks=frameworks):
                    print("direct RLlib")
                    trainer = A2CTrainer(config, env="CartPole-v0")
                    trainer.stop()
                    print("via ray.tune.run()")
                    tune.run("A2C",
                             config=config,
                             stop={"training_iteration": 0})

        ray.shutdown()
  
  # A2C
  print("Training algorithm: A2C")
  
  trainer = A2CTrainer(
              env=env_title,
              config={
                "num_workers": num_workers,
                "num_cpus_per_worker": num_cpus_per_worker,
                "num_gpus": num_gpus,
                "num_gpus_per_worker": num_gpus_per_worker,
                "model": nw_model,
                "lr": lr,
                "gamma": gamma,
                "lambda": lambda_trainer,
                "multiagent": {
                  "policy_graphs": policy_graphs,
                  "policy_mapping_fn": policy_mapping_fn,
                  "policies_to_train": ["agent_policy{}".format(i) for i in range(n_agents)],
                },
                "callbacks": {
                  "on_episode_start": tune.function(on_episode_start),
                  "on_episode_step": tune.function(on_episode_step),
                  "on_episode_end": tune.function(on_episode_end),
                },
                "log_level": "ERROR",
              })

elif(train_algo == "DDPG"):

  # Deep Deterministic Policy Gradient (DDPG)
Esempio n. 5
0
def training_pipeline(workers, config):
    rollouts = ParallelRollouts(workers, mode="bulk_sync")

    if config["microbatch_size"]:
        num_microbatches = math.ceil(config["train_batch_size"] /
                                     config["microbatch_size"])
        # In microbatch mode, we want to compute gradients on experience
        # microbatches, average a number of these microbatches, and then apply
        # the averaged gradient in one SGD step. This conserves GPU memory,
        # allowing for extremely large experience batches to be used.
        train_op = (
            rollouts.combine(
                ConcatBatches(
                    min_batch_size=config["microbatch_size"])).for_each(
                        ComputeGradients(workers))  # (grads, info)
            .batch(num_microbatches)  # List[(grads, info)]
            .for_each(AverageGradients())  # (avg_grads, info)
            .for_each(ApplyGradients(workers)))
    else:
        # In normal mode, we execute one SGD step per each train batch.
        train_op = rollouts \
            .combine(ConcatBatches(
                min_batch_size=config["train_batch_size"])) \
            .for_each(TrainOneStep(workers))

    return StandardMetricsReporting(train_op, workers, config)


A2CPipeline = A2CTrainer.with_updates(training_pipeline=training_pipeline)
Esempio n. 6
0

def choose_policy_optimizer_modified(workers, config):
    if config["microbatch_size"]:
        raise ValueError()
    else:
        return SyncSamplesOptimizer(
            workers,
            train_batch_size=config["train_batch_size"],
            standardize_fields=["advantages"]  # <<== Here!
        )


ANA2CTrainer = A2CTrainer.with_updates(
    name="ANA2C",
    make_policy_optimizer=choose_policy_optimizer_modified,
    default_policy=ANA3CTFPolicy,
    get_policy_class=get_policy_class_modified)

if __name__ == '__main__':
    import yaml
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("--file", "-f", type=str, default="")
    parser.add_argument("--num-samples", type=int, default=5)
    args = parser.parse_args()

    if args.file:
        with open(args.file, "r") as f:
            config = yaml.safe_load(f)