Exemple #1
0
def test_gym_env_dataset_creator():
    # Discrete action environment
    config = generate_env_config("CartPole-v0", {})
    config = generate_algorithm_config("DDPG", config)
    config["frame_config"]["models"] = [
        "DDPGActorCont",
        "DDPGActorCont",
        "DDPGCritic",
        "DDPGCritic",
    ]
    config["frame_config"]["model_kwargs"] = [{
        "state_dim": 3,
        "action_dim": 1,
        "action_range": 2
    }] * 2 + [{
        "state_dim": 3,
        "action_dim": 1
    }] * 2
    ddpg = init_algorithm_from_config(config)

    assert isinstance(
        gym_env_dataset_creator(ddpg, config["train_env_config"]),
        RLGymDiscActDataset)
    assert isinstance(gym_env_dataset_creator(ddpg, config["test_env_config"]),
                      RLGymDiscActDataset)

    # Continuous action environment
    config = generate_env_config("Pendulum-v0", {})
    assert isinstance(
        gym_env_dataset_creator(ddpg, config["train_env_config"]),
        RLGymContActDataset)
    assert isinstance(gym_env_dataset_creator(ddpg, config["test_env_config"]),
                      RLGymContActDataset)

    # Unsupported environment,
    # like algorithmic, which uses a tuple action space
    # or robotics, which uses the goal action space
    config = generate_env_config("Copy-v0", {})
    with pytest.raises(ValueError, match="not supported"):
        gym_env_dataset_creator(ddpg, config["train_env_config"])

    with pytest.raises(ValueError, match="not supported"):
        gym_env_dataset_creator(ddpg, config["test_env_config"])
Exemple #2
0
    def test_dqn_apex_cpu_spawn_full_train(self, tmpdir):
        # by default, pytorch lightning will use ddp-spawn mode to replace ddp
        # if there are only cpus
        os.environ["WORLD_SIZE"] = "3"
        config = generate_env_config("CartPole-v0", {})
        config = generate_training_config(root_dir=tmpdir.make_numbered_dir(),
                                          config=config)
        config = generate_algorithm_config("DQNApex", config)
        # use ddp_cpu
        config["gpus"] = None
        config["num_processes"] = 3
        # this testing process corresponds to this node
        config["num_nodes"] = 1
        config["early_stopping_patience"] = 100
        # Use class instead of string name since algorithms is distributed.
        config["frame_config"]["models"] = [QNet, QNet]
        config["frame_config"]["model_kwargs"] = [
            {
                "state_dim": 4,
                "action_num": 2
            },
            {
                "state_dim": 4,
                "action_num": 2
            },
        ]

        # for spawn we use a special callback, because the we cannot access
        # max_total_reward from sub-processes
        queue = SimpleQueue(ctx=mp.get_context("spawn"))
        # cb = [SpawnInspectCallback(queue), LoggerDebugCallback()]
        cb = [SpawnInspectCallback(queue)]
        t = Thread(target=launch, args=(config, ), kwargs={"pl_callbacks": cb})
        t.start()

        default_logger.info("Start tracking")
        subproc_max_total_reward = [0, 0, 0]
        while True:
            try:
                result = queue.quick_get(timeout=60)
                default_logger.info(
                    f"Result from process [{result[0]}]: {result[1]}")
                subproc_max_total_reward[result[0]] = result[1]
            except TimeoutError:
                # no more results
                default_logger.info("No more results.")
                break
        t.join()
        assert (
            sum(subproc_max_total_reward) / 3 >= 150
        ), f"Max total reward {sum(subproc_max_total_reward) / 3} below threshold 150."
Exemple #3
0
 def test_dqn_full_train(self, tmpdir):
     config = generate_env_config("CartPole-v0", {})
     config = generate_training_config(root_dir=str(
         tmpdir.make_numbered_dir()),
                                       config=config)
     config = generate_algorithm_config("DQN", config)
     config["early_stopping_patience"] = 100
     config["frame_config"]["models"] = ["QNet", "QNet"]
     config["frame_config"]["model_kwargs"] = [
         {
             "state_dim": 4,
             "action_num": 2
         },
         {
             "state_dim": 4,
             "action_num": 2
         },
     ]
     cb = InspectCallback()
     launch(config, pl_callbacks=[cb])
     assert (
         cb.max_total_reward >= 150
     ), f"Max total reward {cb.max_total_reward} below threshold 150."
            int(self.max_total_reward >= 150), device=trainer.lightning_module.device
        )
        should_stop = t_plugin.reduce(should_stop, reduce_op=ReduceOp.SUM)
        should_stop = bool(should_stop == trainer.world_size)
        return should_stop

    def reduce_max_total_reward(self, trainer, t_plugin):
        avg = t.tensor(self.max_total_reward, device=trainer.lightning_module.device)
        avg = t_plugin.reduce(avg, reduce_op=ReduceOp.SUM)
        return float(avg)


if __name__ == "__main__":
    os.environ["WORLD_SIZE"] = "3"
    print(os.environ["TEST_SAVE_PATH"])
    config = generate_env_config("CartPole-v0", {})
    config = generate_training_config(root_dir=os.environ["ROOT_DIR"], config=config)
    config = generate_algorithm_config("DQNApex", config)

    # use ddp gpu
    config["gpus"] = [0, 0, 0]
    config["num_processes"] = 3
    # this testing process corresponds to this node
    config["num_nodes"] = 1
    config["early_stopping_patience"] = 100
    # Use class instead of string name since algorithms is distributed.
    config["frame_config"]["models"] = [QNet, QNet]
    config["frame_config"]["model_kwargs"] = [
        {"state_dim": 4, "action_num": 2},
        {"state_dim": 4, "action_num": 2},
    ]
                               device=trainer.lightning_module.device)
        should_stop = t_plugin.reduce(should_stop, reduce_op=ReduceOp.SUM)
        should_stop = bool(should_stop == trainer.world_size)
        return should_stop

    def reduce_max_total_reward(self, trainer, t_plugin):
        avg = t.tensor(self.max_total_reward,
                       device=trainer.lightning_module.device)
        avg = t_plugin.reduce(avg, reduce_op=ReduceOp.SUM)
        return float(avg)


if __name__ == "__main__":
    os.environ["WORLD_SIZE"] = "3"
    print(os.environ["TEST_SAVE_PATH"])
    config = generate_env_config({})
    config["train_env_config"]["env_name"] = "CartPole-v0"
    config["test_env_config"]["env_name"] = "CartPole-v0"

    config = generate_training_config(root_dir=os.environ["ROOT_DIR"],
                                      config=config)
    config = generate_algorithm_config("DQNApex", config)

    # use ddp gpu, processes will be created automatically
    config["gpus"] = [0, 0, 0]
    config["num_processes"] = 3
    # this testing process corresponds to this node
    config["num_nodes"] = 1
    config["early_stopping_patience"] = 100
    # Use class instead of string name since algorithms is distributed.
    config["frame_config"]["models"] = [QNet, QNet]
Exemple #6
0
def generate_gym_config_for_env(env: str, config: dict):
    """Helper function for testing openai gym environments."""
    config = generate_env_config(config)
    config["train_env_config"]["env_name"] = env
    config["test_env_config"]["env_name"] = env
    return config