def test_train_client(tmpdir, start_ray_client_server_2_cpus, seed, num_slots): """Tests if training modifies model weights.""" assert ray.util.client.ray.is_connected() model = BoringModel() plugin = HorovodRayPlugin(num_slots=num_slots, use_gpu=False) trainer = get_trainer(tmpdir, plugins=[plugin]) train_test(trainer, model)
def test_predict(tmpdir, ray_start_2_cpus, seed, num_slots): """Tests if trained model has high accuracy on test set.""" config = { "layer_1": 32, "layer_2": 32, "lr": 1e-2, "batch_size": 32, } model = LightningMNISTClassifier(config, tmpdir) dm = MNISTDataModule(data_dir=tmpdir, num_workers=1, batch_size=config["batch_size"]) plugin = HorovodRayPlugin(num_slots=num_slots, use_gpu=False) trainer = get_trainer(tmpdir, limit_train_batches=20, max_epochs=1, plugins=[plugin]) predict_test(trainer, model, dm)
def test_predict_client(tmpdir, start_ray_client_server_2_cpus, seed, num_slots): assert ray.util.client.ray.is_connected() config = { "layer_1": 32, "layer_2": 32, "lr": 1e-2, "batch_size": 32, } model = LightningMNISTClassifier(config, tmpdir) dm = MNISTDataModule(data_dir=tmpdir, num_workers=1, batch_size=config["batch_size"]) plugin = HorovodRayPlugin(num_slots=num_slots, use_gpu=False) trainer = get_trainer(tmpdir, limit_train_batches=20, max_epochs=1, plugins=[plugin]) predict_test(trainer, model, dm)
def train_mnist(config, data_dir=None, num_epochs=10, num_hosts=1, num_slots=4, use_gpu=False, callbacks=None): model = MNISTClassifier(config, data_dir) callbacks = callbacks or [] trainer = pl.Trainer( max_epochs=num_epochs, callbacks=callbacks, plugins=[ HorovodRayPlugin( num_hosts=num_hosts, num_slots=num_slots, use_gpu=use_gpu) ]) trainer.fit(model)
def test_checkpoint_horovod_gpu(tmpdir, ray_start_4_cpus): """Tests if Tune checkpointing works with HorovodRayAccelerator.""" plugin = HorovodRayPlugin(num_hosts=1, num_slots=2, use_gpu=False) checkpoint_test(tmpdir, plugin)
def test_tune_iteration_horovod(tmpdir, ray_start_4_cpus): """Tests if each HorovodRay trial runs the correct number of iterations.""" plugin = HorovodRayPlugin(num_hosts=1, num_slots=2, use_gpu=False) tune_test(tmpdir, plugin)
def test_train_gpu(tmpdir, ray_start_2_gpus, seed, num_slots): """Tests if training modifies model weights.""" model = BoringModel() plugin = HorovodRayPlugin(num_slots=num_slots, use_gpu=True) trainer = get_trainer(tmpdir, plugins=[plugin], use_gpu=True) train_test(trainer, model)
def test_load(tmpdir, ray_start_2_cpus, seed, num_slots): """Tests if model checkpoint can be loaded.""" model = BoringModel() plugin = HorovodRayPlugin(num_slots=num_slots, use_gpu=False) trainer = get_trainer(tmpdir, plugins=[plugin]) load_test(trainer, model)