def test_train_client(tmpdir, start_ray_client_server_2_cpus, seed, num_slots):
    """Tests if training modifies model weights."""
    assert ray.util.client.ray.is_connected()
    model = BoringModel()
    plugin = HorovodRayPlugin(num_slots=num_slots, use_gpu=False)
    trainer = get_trainer(tmpdir, plugins=[plugin])
    train_test(trainer, model)
Esempio n. 2
0
def test_predict(tmpdir, ray_start_2_cpus, seed, num_slots):
    """Tests if trained model has high accuracy on test set."""
    config = {
        "layer_1": 32,
        "layer_2": 32,
        "lr": 1e-2,
        "batch_size": 32,
    }
    model = LightningMNISTClassifier(config, tmpdir)
    dm = MNISTDataModule(data_dir=tmpdir,
                         num_workers=1,
                         batch_size=config["batch_size"])
    plugin = HorovodRayPlugin(num_slots=num_slots, use_gpu=False)
    trainer = get_trainer(tmpdir,
                          limit_train_batches=20,
                          max_epochs=1,
                          plugins=[plugin])
    predict_test(trainer, model, dm)
def test_predict_client(tmpdir, start_ray_client_server_2_cpus, seed,
                        num_slots):
    assert ray.util.client.ray.is_connected()
    config = {
        "layer_1": 32,
        "layer_2": 32,
        "lr": 1e-2,
        "batch_size": 32,
    }
    model = LightningMNISTClassifier(config, tmpdir)
    dm = MNISTDataModule(data_dir=tmpdir,
                         num_workers=1,
                         batch_size=config["batch_size"])
    plugin = HorovodRayPlugin(num_slots=num_slots, use_gpu=False)
    trainer = get_trainer(tmpdir,
                          limit_train_batches=20,
                          max_epochs=1,
                          plugins=[plugin])
    predict_test(trainer, model, dm)
def train_mnist(config,
                data_dir=None,
                num_epochs=10,
                num_hosts=1,
                num_slots=4,
                use_gpu=False,
                callbacks=None):
    model = MNISTClassifier(config, data_dir)

    callbacks = callbacks or []

    trainer = pl.Trainer(
        max_epochs=num_epochs,
        callbacks=callbacks,
        plugins=[
            HorovodRayPlugin(
                num_hosts=num_hosts, num_slots=num_slots, use_gpu=use_gpu)
        ])
    trainer.fit(model)
def test_checkpoint_horovod_gpu(tmpdir, ray_start_4_cpus):
    """Tests if Tune checkpointing works with HorovodRayAccelerator."""
    plugin = HorovodRayPlugin(num_hosts=1, num_slots=2, use_gpu=False)
    checkpoint_test(tmpdir, plugin)
def test_tune_iteration_horovod(tmpdir, ray_start_4_cpus):
    """Tests if each HorovodRay trial runs the correct number of iterations."""
    plugin = HorovodRayPlugin(num_hosts=1, num_slots=2, use_gpu=False)
    tune_test(tmpdir, plugin)
Esempio n. 7
0
def test_train_gpu(tmpdir, ray_start_2_gpus, seed, num_slots):
    """Tests if training modifies model weights."""
    model = BoringModel()
    plugin = HorovodRayPlugin(num_slots=num_slots, use_gpu=True)
    trainer = get_trainer(tmpdir, plugins=[plugin], use_gpu=True)
    train_test(trainer, model)
Esempio n. 8
0
def test_load(tmpdir, ray_start_2_cpus, seed, num_slots):
    """Tests if model checkpoint can be loaded."""
    model = BoringModel()
    plugin = HorovodRayPlugin(num_slots=num_slots, use_gpu=False)
    trainer = get_trainer(tmpdir, plugins=[plugin])
    load_test(trainer, model)