Beispiel #1
0
        # make sure can be torch.loaded
        filepath = str(tmpdir / 'result')
        torch.save(result, filepath)
        torch.load(filepath)

        # assert metric state reset to default values
        result.reset()
        assert metric_a.x == metric_a._defaults['x']
        assert metric_b.x == metric_b._defaults['x']
        assert metric_c.x == metric_c._defaults['x']

        batch_idx = None


@pytest.mark.parametrize('device',
                         ('cpu', pytest.param('cuda', marks=RunIf(min_gpus=1)))
                         )
def test_lightning_module_logging_result_collection(tmpdir, device):
    class LoggingModel(BoringModel):
        def __init__(self):
            super().__init__()
            self.metric = DummyMetric()

        def validation_step(self, batch, batch_idx):
            v = self.metric(batch_idx)
            self.log_dict({"v": v, "m": self.metric})
            return super().validation_step(batch, batch_idx)

        def on_save_checkpoint(self, checkpoint) -> None:
            results = self.trainer._results
            state_dict = results.state_dict()
Beispiel #2
0
                 on_step=False,
                 on_epoch=True,
                 sync_dist=True,
                 reduce_fx="mean")
        self.log("bar_3",
                 batch_idx + self.rank,
                 on_step=False,
                 on_epoch=True,
                 sync_dist=True,
                 reduce_fx="max")
        return super().validation_step(batch, batch_idx)


@pytest.mark.parametrize(
    "devices",
    [1, pytest.param(2, marks=RunIf(min_gpus=2, skip_windows=True))])
def test_logging_sync_dist_true(tmpdir, devices):
    """Tests to ensure that the sync_dist flag works (should just return the original value)"""
    fake_result = 1
    model = LoggingSyncDistModel(fake_result)

    use_multiple_devices = devices > 1
    trainer = Trainer(
        max_epochs=1,
        default_root_dir=tmpdir,
        limit_train_batches=3,
        limit_val_batches=3,
        enable_model_summary=False,
        strategy="ddp_spawn" if use_multiple_devices else None,
        accelerator="auto",
        devices=devices,
Beispiel #3
0

def test_v_1_8_0_deprecated_device_stats_monitor_prefix_metric_keys():
    from pytorch_lightning.callbacks.device_stats_monitor import prefix_metric_keys

    with pytest.deprecated_call(match="in v1.6 and will be removed in v1.8"):
        prefix_metric_keys({"foo": 1.0}, "bar")


@pytest.mark.parametrize(
    "cls",
    [
        DDPPlugin,
        DDP2Plugin,
        DDPSpawnPlugin,
        pytest.param(DeepSpeedPlugin, marks=RunIf(deepspeed=True)),
        DataParallelPlugin,
        DDPFullyShardedPlugin,
        pytest.param(IPUPlugin, marks=RunIf(ipu=True)),
        DDPShardedPlugin,
        DDPSpawnShardedPlugin,
        TPUSpawnPlugin,
    ],
)
def test_v1_8_0_deprecated_training_type_plugin_classes(cls):
    old_name = cls.__name__
    new_name = old_name.replace("Plugin", "Strategy")
    with pytest.deprecated_call(
        match=f"{old_name}` is deprecated in v1.6 and will be removed in v1.8. Use .*{new_name}` instead."
    ):
        cls()
        "SLURM_PROCID": "0",
        "SLURM_LOCALID": "0",
    },
)
@mock.patch("torch.cuda.device_count", return_value=2)
@pytest.mark.parametrize("strategy,gpus", [("ddp", 2), ("ddp2", 2),
                                           ("ddp_spawn", 2)])
@pytest.mark.parametrize(
    "amp,custom_plugin,plugin_cls",
    [
        ("native", False, NativeMixedPrecisionPlugin),
        ("native", True, MyNativeAMP),
        pytest.param("apex",
                     False,
                     ApexMixedPrecisionPlugin,
                     marks=RunIf(amp_apex=True)),
        pytest.param("apex", True, MyApexPlugin, marks=RunIf(amp_apex=True)),
    ],
)
def test_amp_apex_ddp(mocked_device_count, strategy, gpus, amp, custom_plugin,
                      plugin_cls):
    plugin = None
    if custom_plugin:
        plugin = plugin_cls(16, "cpu") if amp == "native" else plugin_cls()
    trainer = Trainer(
        fast_dev_run=True,
        precision=16,
        amp_backend=amp,
        gpus=gpus,
        strategy=strategy,
        plugins=plugin,
        "GROUP_RANK": "1",
        "RANK": "3",
        "WORLD_SIZE": "4",
        "LOCAL_WORLD_SIZE": "2",
    }
    environment = TorchElasticEnvironment()
    yield environment, variables, expected


@pytest.mark.parametrize(
    "plugin_cls",
    [
        DDPPlugin,
        DDPShardedPlugin,
        DDP2Plugin,
        pytest.param(DeepSpeedPlugin, marks=RunIf(deepspeed=True)),
        pytest.param(RPCSequentialPlugin, marks=RunIf(fairscale_pipe=True)),
    ],
)
def test_ranks_available_manual_plugin_selection(plugin_cls):
    """ Test that the rank information is readily available after Trainer initialization. """
    num_nodes = 2
    for cluster, variables, expected in environment_combinations():

        if plugin_cls == DDP2Plugin:
            expected.update(global_rank=expected["node_rank"], world_size=num_nodes)

        with mock.patch.dict(os.environ, variables):
            plugin = plugin_cls(
                parallel_devices=[torch.device("cuda", 1), torch.device("cuda", 2)],
                cluster_environment=cluster,
Beispiel #6
0
                      DDPShardedPlugin)

    trainer = Trainer(plugins=plugin, num_processes=2)
    assert isinstance(trainer.accelerator.training_type_plugin,
                      DDPShardedPlugin)


@pytest.mark.parametrize(
    ["accelerator", "plugin"],
    [
        ("ddp", DDPPlugin),
        ("ddp_spawn", DDPSpawnPlugin),
        ("ddp_sharded", DDPShardedPlugin),
        ("ddp_sharded_spawn", DDPSpawnShardedPlugin),
        pytest.param("deepspeed", DeepSpeedPlugin,
                     marks=RunIf(deepspeed=True)),
    ],
)
@mock.patch("torch.cuda.is_available", return_value=True)
@mock.patch("torch.cuda.device_count", return_value=2)
@pytest.mark.parametrize("gpus", [1, 2])
def test_accelerator_choice_multi_node_gpu(mock_is_available,
                                           mock_device_count, tmpdir,
                                           accelerator: str,
                                           plugin: ParallelPlugin, gpus: int):
    trainer = Trainer(accelerator=accelerator,
                      default_root_dir=tmpdir,
                      num_nodes=2,
                      gpus=gpus)
    assert isinstance(trainer.training_type_plugin, plugin)
Beispiel #7
0
        if not self.early_stop_on_train:
            return
        self._epoch_end()

    def validation_epoch_end(self, outputs):
        if self.early_stop_on_train:
            return
        self._epoch_end()

    def on_train_end(self) -> None:
        assert self.trainer.current_epoch - 1 == self.expected_end_epoch, "Early Stopping Failed"


_ES_CHECK = dict(check_on_train_epoch_end=True)
_ES_CHECK_P3 = dict(patience=3, check_on_train_epoch_end=True)
_SPAWN_MARK = dict(marks=RunIf(skip_windows=True, skip_49370=True))


@pytest.mark.parametrize(
    "callbacks, expected_stop_epoch, check_on_train_epoch_end, strategy, devices",
    [
        ([EarlyStopping("abc"), EarlyStopping("cba", patience=3)], 3, False, None, 1),
        ([EarlyStopping("cba", patience=3), EarlyStopping("abc")], 3, False, None, 1),
        pytest.param([EarlyStopping("abc"), EarlyStopping("cba", patience=3)], 3, False, "ddp_spawn", 2, **_SPAWN_MARK),
        pytest.param([EarlyStopping("cba", patience=3), EarlyStopping("abc")], 3, False, "ddp_spawn", 2, **_SPAWN_MARK),
        ([EarlyStopping("abc", **_ES_CHECK), EarlyStopping("cba", **_ES_CHECK_P3)], 3, True, None, 1),
        ([EarlyStopping("cba", **_ES_CHECK_P3), EarlyStopping("abc", **_ES_CHECK)], 3, True, None, 1),
        pytest.param(
            [EarlyStopping("abc", **_ES_CHECK), EarlyStopping("cba", **_ES_CHECK_P3)],
            3,
            True,
    def on_train_end(self) -> None:
        assert self.trainer.current_epoch == self.expected_end_epoch, 'Early Stopping Failed'


@pytest.mark.parametrize(
    "callbacks, expected_stop_epoch, accelerator, num_processes",
    [
        ([EarlyStopping(monitor='abc'), EarlyStopping(monitor='cba', patience=3)], 3, None, 1),
        ([EarlyStopping(monitor='cba', patience=3),
          EarlyStopping(monitor='abc')], 3, None, 1),
        pytest.param([EarlyStopping(monitor='abc'),
                      EarlyStopping(monitor='cba', patience=3)],
                     3,
                     'ddp_cpu',
                     2,
                     marks=RunIf(skip_windows=True)),
        pytest.param([EarlyStopping(monitor='cba', patience=3),
                      EarlyStopping(monitor='abc')],
                     3,
                     'ddp_cpu',
                     2,
                     marks=RunIf(skip_windows=True)),
    ],
)
def test_multiple_early_stopping_callbacks(
    tmpdir, callbacks: List[EarlyStopping], expected_stop_epoch: int, accelerator: Optional[str], num_processes: int
):
    """Ensure when using multiple early stopping callbacks we stop if any signals we should stop."""

    model = EarlyStoppingModel(expected_stop_epoch)
    os.environ, {
        "CUDA_VISIBLE_DEVICES": "0,1",
        "SLURM_NTASKS": "2",
        "SLURM_JOB_NAME": "SOME_NAME",
        "SLURM_NODEID": "0",
        "LOCAL_RANK": "0",
        "SLURM_LOCALID": "0",
    })
@mock.patch('torch.cuda.device_count', return_value=2)
@pytest.mark.parametrize('ddp_backend,gpus', [('ddp', 2), ('ddp2', 2),
                                              ('ddp_spawn', 2)])
@pytest.mark.parametrize('amp,custom_plugin,plugin_cls', [
    pytest.param('native',
                 False,
                 NativeMixedPrecisionPlugin,
                 marks=RunIf(amp_native=True)),
    pytest.param('native', True, MyNativeAMP, marks=RunIf(amp_native=True)),
    pytest.param(
        'apex', False, ApexMixedPrecisionPlugin, marks=RunIf(amp_apex=True)),
    pytest.param('apex', True, MyApexPlugin, marks=RunIf(amp_apex=True))
])
def test_amp_apex_ddp(mocked_device_count, ddp_backend: str, gpus: int,
                      amp: str, custom_plugin: bool,
                      plugin_cls: MixedPrecisionPlugin):

    trainer = Trainer(
        fast_dev_run=True,
        precision=16,
        amp_backend=amp,
        gpus=gpus,
        accelerator=ddp_backend,
Beispiel #10
0
    model = BoringModel()

    trainer = Trainer(strategy="ddp_sharded_spawn",
                      accelerator="cpu",
                      devices=2,
                      fast_dev_run=True)

    trainer.fit(model, ckpt_path=checkpoint_path)


@RunIf(skip_windows=True, standalone=True, fairscale=True)
@pytest.mark.parametrize(
    "trainer_kwargs",
    (dict(accelerator="cpu", devices=2),
     pytest.param(dict(accelerator="gpu", devices=2),
                  marks=RunIf(min_gpus=2))),
)
def test_ddp_sharded_strategy_test_multigpu(tmpdir, trainer_kwargs):
    """Test to ensure we can use validate and test without fit."""
    model = BoringModel()
    trainer = Trainer(
        strategy="ddp_sharded_spawn",
        fast_dev_run=True,
        enable_progress_bar=False,
        enable_model_summary=False,
        **trainer_kwargs,
    )

    trainer.validate(model)
    trainer.test(model)
        # make sure can be torch.loaded
        filepath = str(tmpdir / "result")
        torch.save(result, filepath)
        torch.load(filepath)

        # assert metric state reset to default values
        result.reset()
        assert metric_a.x == metric_a._defaults["x"]
        assert metric_b.x == metric_b._defaults["x"]
        assert metric_c.x == metric_c._defaults["x"]

        batch_idx = None


@pytest.mark.parametrize("device",
                         ("cpu", pytest.param("cuda", marks=RunIf(min_gpus=1)))
                         )
def test_lightning_module_logging_result_collection(tmpdir, device):
    class LoggingModel(BoringModel):
        def __init__(self):
            super().__init__()
            self.metric = DummyMetric()

        def validation_step(self, batch, batch_idx):
            v = self.metric(batch_idx)
            self.log_dict({"v": v, "m": self.metric})
            return super().validation_step(batch, batch_idx)

        def on_save_checkpoint(self, checkpoint) -> None:
            results = self.trainer._results
            # simplify logic
Beispiel #12
0
                 on_step=False,
                 on_epoch=True,
                 sync_dist=True,
                 reduce_fx="mean")
        self.log("bar_3",
                 batch_idx + self.rank,
                 on_step=False,
                 on_epoch=True,
                 sync_dist=True,
                 reduce_fx="max")
        return super().validation_step(batch, batch_idx)


@pytest.mark.parametrize("gpus", [
    None,
    pytest.param(1, marks=RunIf(min_gpus=1)),
    pytest.param(2, marks=RunIf(min_gpus=2))
])
def test_logging_sync_dist_true(tmpdir, gpus):
    """Tests to ensure that the sync_dist flag works (should just return the original value)"""
    fake_result = 1
    model = LoggingSyncDistModel(fake_result)
    trainer = Trainer(
        max_epochs=1,
        default_root_dir=tmpdir,
        limit_train_batches=3,
        limit_val_batches=3,
        weights_summary=None,
        gpus=gpus,
    )
    trainer.fit(model)
    EmptyLite.seed_everything(3)

    lite = EmptyLite()
    lite_dataloader = lite.setup_dataloaders(DataLoader(Mock()))

    assert lite_dataloader.worker_init_fn.func is pl_worker_init_function
    assert os.environ == {"PL_GLOBAL_SEED": "3", "PL_SEED_WORKERS": "1"}


@pytest.mark.parametrize(
    "strategy",
    [
        _StrategyType.DP,
        _StrategyType.DDP,
        _StrategyType.DDP_SPAWN,
        pytest.param(_StrategyType.DEEPSPEED, marks=RunIf(deepspeed=True)),
        pytest.param(_StrategyType.DDP_SHARDED, marks=RunIf(fairscale=True)),
        pytest.param(_StrategyType.DDP_SHARDED_SPAWN, marks=RunIf(fairscale=True)),
    ],
)
def test_setup_dataloaders_replace_custom_sampler(strategy):
    """Test that asking to replace a custom sampler results in an error when a distributed sampler would be
    needed."""
    custom_sampler = Mock(spec=Sampler)
    dataloader = DataLoader(Mock(), sampler=custom_sampler)

    # explicitly asking to replace when a custom sampler is already configured raises an exception
    lite = EmptyLite(accelerator="cpu", strategy=strategy, devices=2)
    if lite._accelerator_connector.is_distributed:
        with pytest.raises(MisconfigurationException, match="You seem to have configured a sampler in your DataLoader"):
            lite.setup_dataloaders(dataloader, replace_sampler=True)
Beispiel #14
0
        optimizer_2 = torch.optim.SGD(self.layer.parameters(), lr=0.1)
        return optimizer, optimizer_2


@pytest.mark.parametrize(
    "kwargs",
    [
        {},
        pytest.param(
            {
                "accelerator": "gpu",
                "devices": 1,
                "precision": 16,
                "amp_backend": "native"
            },
            marks=RunIf(min_gpus=1)),
        pytest.param(
            {
                "accelerator": "gpu",
                "devices": 1,
                "precision": 16,
                "amp_backend": "apex",
                "amp_level": "O2"
            },
            marks=RunIf(min_gpus=1, amp_apex=True),
        ),
    ],
)
def test_multiple_optimizers_manual_no_return(tmpdir, kwargs):
    apex_optimizer_patches = []
    apex_optimizer_steps = []
Beispiel #15
0
@pytest.mark.parametrize("test_option,do_train,gpus", [
    pytest.param(0, True, 0, id='full_loop'),
    pytest.param(0, False, 0, id='test_only'),
    pytest.param(1,
                 False,
                 0,
                 id='test_only_mismatching_tensor',
                 marks=pytest.mark.xfail(raises=ValueError, match="Mism.*")),
    pytest.param(2, False, 0, id='mix_of_tensor_dims'),
    pytest.param(3, False, 0, id='string_list_predictions'),
    pytest.param(4, False, 0, id='int_list_predictions'),
    pytest.param(5, False, 0, id='nested_list_predictions'),
    pytest.param(6, False, 0, id='dict_list_predictions'),
    pytest.param(7, True, 0, id='write_dict_predictions'),
    pytest.param(
        0, True, 1, id='full_loop_single_gpu', marks=RunIf(min_gpus=1))
])
def test_result_obj_predictions(tmpdir, test_option: int, do_train: bool,
                                gpus: int):
    class CustomBoringModel(BoringModel):
        def test_step(self, batch, batch_idx, optimizer_idx=None):
            output = self(batch)
            test_loss = self.loss(batch, output)
            self.log('test_loss', test_loss)

            batch_size = batch.size(0)
            lst_of_str = [
                random.choice(['dog', 'cat']) for i in range(batch_size)
            ]
            lst_of_int = [random.randint(500, 1000) for i in range(batch_size)]
            lst_of_lst = [[x] for x in lst_of_int]
        "SLURM_NTASKS": "2",
        "SLURM_JOB_NAME": "SOME_NAME",
        "SLURM_NODEID": "0",
        "LOCAL_RANK": "0",
        "SLURM_PROCID": "0",
        "SLURM_LOCALID": "0",
    },
)
@mock.patch("torch.cuda.device_count", return_value=2)
@pytest.mark.parametrize("strategy,gpus", [("ddp", 2), ("ddp2", 2), ("ddp_spawn", 2)])
@pytest.mark.parametrize(
    "amp,custom_plugin,plugin_cls",
    [
        ("native", False, NativeMixedPrecisionPlugin),
        ("native", True, MyNativeAMP),
        pytest.param("apex", False, ApexMixedPrecisionPlugin, marks=RunIf(amp_apex=True)),
        pytest.param("apex", True, MyApexPlugin, marks=RunIf(amp_apex=True)),
    ],
)
def test_amp_apex_ddp(mocked_device_count, strategy, gpus, amp, custom_plugin, plugin_cls):
    plugin = None
    if custom_plugin:
        plugin = plugin_cls(16, "cpu") if amp == "native" else plugin_cls()
    trainer = Trainer(
        fast_dev_run=True,
        precision=16,
        amp_backend=amp,
        gpus=gpus,
        strategy=strategy,
        plugins=plugin,
    )
    checkpoint_plugin.reset_mock()
    ck = ModelCheckpoint(dirpath=tmpdir, save_last=True)

    model = BoringModel()
    device = torch.device("cpu")
    trainer = Trainer(
        default_root_dir=tmpdir,
        strategy=SingleDevicePlugin(device),
        plugins=[checkpoint_plugin],
        callbacks=ck,
        max_epochs=2,
    )
    trainer.fit(model)

    assert checkpoint_plugin.save_checkpoint.call_count == 5
    assert checkpoint_plugin.remove_checkpoint.call_count == 1

    trainer.test(model, ckpt_path=ck.last_model_path)
    checkpoint_plugin.load_checkpoint.assert_called_once()
    checkpoint_plugin.load_checkpoint.assert_called_with(tmpdir / "last.ckpt")


@pytest.mark.parametrize("plugin_cls", [
    pytest.param(DeepSpeedPlugin, marks=RunIf(deepspeed=True)), TPUSpawnPlugin
])
def test_no_checkpoint_io_plugin_support(plugin_cls):
    with pytest.raises(
            MisconfigurationException,
            match="currently does not support custom checkpoint plugins"):
        plugin_cls().checkpoint_io = CustomCheckpointIO()
                 on_step=False,
                 on_epoch=True,
                 sync_dist=True,
                 reduce_fx="mean")
        self.log("bar_3",
                 batch_idx + self.rank,
                 on_step=False,
                 on_epoch=True,
                 sync_dist=True,
                 reduce_fx="max")
        return super().validation_step(batch, batch_idx)


@pytest.mark.parametrize(
    "devices",
    [1, pytest.param(2, marks=RunIf(skip_windows=True, skip_49370=True))])
def test_logging_sync_dist_true(tmpdir, devices):
    """Tests to ensure that the sync_dist flag works (should just return the original value)"""
    fake_result = 1
    model = LoggingSyncDistModel(fake_result)

    use_multiple_devices = devices > 1
    trainer = Trainer(
        max_epochs=1,
        default_root_dir=tmpdir,
        limit_train_batches=3,
        limit_val_batches=3,
        enable_model_summary=False,
        strategy="ddp_spawn" if use_multiple_devices else None,
        accelerator="auto",
        devices=devices,
            trainer = Trainer(accelerator=accelerator, plugins=plugin, num_processes=2)
    assert isinstance(trainer.strategy, DDPShardedStrategy)

    with pytest.deprecated_call(match="Passing .* `strategy` to the `plugins`"):
        trainer = Trainer(plugins=plugin, num_processes=2)
    assert isinstance(trainer.strategy, DDPShardedStrategy)


@pytest.mark.parametrize(
    ["accelerator", "plugin"],
    [
        ("ddp", DDPStrategy),
        ("ddp_spawn", DDPSpawnStrategy),
        ("ddp_sharded", DDPShardedStrategy),
        ("ddp_sharded_spawn", DDPSpawnShardedStrategy),
        pytest.param("deepspeed", DeepSpeedStrategy, marks=RunIf(deepspeed=True)),
    ],
)
@mock.patch("torch.cuda.is_available", return_value=True)
@mock.patch("torch.cuda.device_count", return_value=2)
@pytest.mark.parametrize("gpus", [1, 2])
def test_accelerator_choice_multi_node_gpu(
    mock_is_available, mock_device_count, tmpdir, accelerator: str, plugin: ParallelStrategy, gpus: int
):
    with pytest.deprecated_call(match=r"accelerator=.*\)` has been deprecated"):
        trainer = Trainer(accelerator=accelerator, default_root_dir=tmpdir, num_nodes=2, gpus=gpus)
    assert isinstance(trainer.strategy, plugin)


@pytest.mark.skipif(torch.cuda.is_available(), reason="test doesn't require GPU")
def test_accelerator_cpu():
        if not self.early_stop_on_train:
            return
        self._epoch_end()

    def validation_epoch_end(self, outputs):
        if self.early_stop_on_train:
            return
        self._epoch_end()

    def on_train_end(self) -> None:
        assert self.trainer.current_epoch == self.expected_end_epoch, "Early Stopping Failed"


_ES_CHECK = dict(check_on_train_epoch_end=True)
_ES_CHECK_P3 = dict(patience=3, check_on_train_epoch_end=True)
_NO_WIN = dict(marks=RunIf(skip_windows=True))


@pytest.mark.parametrize(
    "callbacks, expected_stop_epoch, check_on_train_epoch_end, accelerator, num_processes",
    [
        ([EarlyStopping("abc"),
          EarlyStopping("cba", patience=3)], 3, False, None, 1),
        ([EarlyStopping("cba", patience=3),
          EarlyStopping("abc")], 3, False, None, 1),
        pytest.param([EarlyStopping("abc"),
                      EarlyStopping("cba", patience=3)], 3, False, "ddp_cpu",
                     2, **_NO_WIN),
        pytest.param([EarlyStopping("cba", patience=3),
                      EarlyStopping("abc")], 3, False, "ddp_cpu", 2, **
                     _NO_WIN),
Beispiel #21
0
import pytest
import torch

import tests.helpers.utils as tutils
from pytorch_lightning import Trainer
from pytorch_lightning.plugins import SingleDevicePlugin
from tests.accelerators.test_dp import CustomClassificationModelDP
from tests.helpers.boring_model import BoringModel
from tests.helpers.datamodules import ClassifDataModule
from tests.helpers.runif import RunIf


@pytest.mark.parametrize(
    "trainer_kwargs",
    (
        pytest.param(dict(gpus=1), marks=RunIf(min_gpus=1)),
        pytest.param(dict(accelerator="dp", gpus=2), marks=RunIf(min_gpus=2)),
        pytest.param(dict(accelerator="ddp_spawn", gpus=2),
                     marks=RunIf(min_gpus=2)),
    ),
)
def test_evaluate(tmpdir, trainer_kwargs):
    tutils.set_random_master_port()

    dm = ClassifDataModule()
    model = CustomClassificationModelDP()
    trainer = Trainer(default_root_dir=tmpdir,
                      max_epochs=2,
                      limit_train_batches=10,
                      limit_val_batches=10,
                      deterministic=True,
Beispiel #22
0
    Ensure that when a plugin and accelerator is passed in, that the plugin takes precedent.
    """
    trainer = Trainer(accelerator=accelerator, plugins=plugin, num_processes=2)
    assert isinstance(trainer.accelerator.training_type_plugin,
                      DDPShardedPlugin)

    trainer = Trainer(plugins=plugin, num_processes=2)
    assert isinstance(trainer.accelerator.training_type_plugin,
                      DDPShardedPlugin)


@pytest.mark.parametrize(["accelerator", "plugin"], [
    ('ddp', DDPPlugin),
    ('ddp_spawn', DDPSpawnPlugin),
    ('ddp_sharded', DDPShardedPlugin),
    ('ddp_sharded_spawn', DDPSpawnShardedPlugin),
    pytest.param('deepspeed', DeepSpeedPlugin, marks=RunIf(deepspeed=True)),
])
@mock.patch('torch.cuda.is_available', return_value=True)
@mock.patch('torch.cuda.device_count', return_value=2)
def test_accelerator_choice_multi_node_gpu(mock_is_available,
                                           mock_device_count, accelerator,
                                           plugin, tmpdir):
    trainer = Trainer(
        accelerator=accelerator,
        default_root_dir=tmpdir,
        num_nodes=2,
        gpus=2,
    )
    assert isinstance(trainer.training_type_plugin, plugin)
Beispiel #23
0
    if precision == 32:
        yield
        return
    if accelerator == "gpu":
        with torch.cuda.amp.autocast():
            yield
    elif accelerator == "cpu":
        with torch.cpu.amp.autocast():
            yield


@pytest.mark.parametrize(
    "precision, strategy, devices, accelerator",
    [
        pytest.param(32, None, 1, "cpu"),
        pytest.param(32, None, 1, "gpu", marks=RunIf(min_gpus=1)),
        pytest.param(16, None, 1, "gpu", marks=RunIf(min_gpus=1)),
        pytest.param("bf16", None, 1, "gpu", marks=RunIf(min_torch="1.10", min_gpus=1)),
    ],
)
def test_boring_lite_model_single_device(precision, strategy, devices, accelerator, tmpdir):
    LightningLite.seed_everything(42)
    train_dataloader = DataLoader(RandomDataset(32, 8))
    model = BoringModel()
    num_epochs = 1
    state_dict = deepcopy(model.state_dict())

    lite = LiteRunner(precision=precision, strategy=strategy, devices=devices, accelerator=accelerator)
    lite.run(model, train_dataloader, num_epochs=num_epochs)
    lite_state_dict = model.state_dict()
    trainer = Trainer(
        fast_dev_run=True,
        default_root_dir=tmpdir,
        plugins='deepspeed',
    )

    plugin = trainer.accelerator.training_type_plugin
    assert isinstance(plugin, DeepSpeedPlugin)
    assert plugin.parallel_devices == [torch.device('cpu')]
    assert plugin.config == deepspeed_config


@RunIf(amp_native=True, deepspeed=True)
@pytest.mark.parametrize("amp_backend", [
    pytest.param("native", marks=RunIf(amp_native=True)),
    pytest.param("apex", marks=RunIf(amp_apex=True)),
])
def test_deepspeed_precision_choice(amp_backend, tmpdir):
    """
    Test to ensure precision plugin is also correctly chosen.
    DeepSpeed handles precision via Custom DeepSpeedPrecisionPlugin
    """

    trainer = Trainer(
        fast_dev_run=True,
        default_root_dir=tmpdir,
        plugins='deepspeed',
        amp_backend=amp_backend,
        precision=16,
    )
Beispiel #25
0
    assert cli.model.num_classes == 5


class EarlyExitTestModel(BoringModel):
    def on_fit_start(self):
        raise Exception("Error on fit start")


@pytest.mark.parametrize("logger", (False, True))
@pytest.mark.parametrize(
    "trainer_kwargs",
    (
        dict(accelerator="ddp_cpu"),
        dict(accelerator="ddp_cpu",
             plugins="ddp_find_unused_parameters_false"),
        pytest.param({"tpu_cores": 1}, marks=RunIf(tpu=True)),
    ),
)
def test_cli_ddp_spawn_save_config_callback(tmpdir, logger, trainer_kwargs):
    with mock.patch("sys.argv", ["any.py", "fit"]), pytest.raises(
            Exception, match=r"Error on fit start"):
        LightningCLI(
            EarlyExitTestModel,
            trainer_defaults={
                "default_root_dir": str(tmpdir),
                "logger": logger,
                "max_steps": 1,
                "max_epochs": 1,
                **trainer_kwargs,
            },
        )
Beispiel #26
0
        strategy=strategy,
        precision=precision,
    )

    model = AMPTestModel()
    trainer.fit(model)
    trainer.test(model)
    trainer.predict(model, DataLoader(RandomDataset(32, 64)))

    assert trainer.state.finished, f"Training failed with {trainer.state}"


@RunIf(min_gpus=2, min_torch="1.10")
@pytest.mark.parametrize("strategy", [None, "dp", "ddp_spawn"])
@pytest.mark.parametrize(
    "precision", [16, pytest.param("bf16", marks=RunIf(bf16_cuda=True))])
@pytest.mark.parametrize("devices", [1, 2])
def test_amp_gpus(tmpdir, strategy, precision, devices):
    """Make sure combinations of AMP and strategies work if supported."""
    tutils.reset_seed()

    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=1,
        accelerator="gpu",
        devices=devices,
        strategy=strategy,
        precision=precision,
    )

    model = AMPTestModel()
Beispiel #27
0
else:

    class DictConfSubClassBoringModel:
        ...


@pytest.mark.parametrize(
    "cls",
    [
        CustomBoringModel,
        SubClassBoringModel,
        NonSavingSubClassBoringModel,
        SubSubClassBoringModel,
        AggSubClassBoringModel,
        UnconventionalArgsBoringModel,
        pytest.param(DictConfSubClassBoringModel, marks=RunIf(omegaconf=True)),
    ],
)
def test_collect_init_arguments(tmpdir, cls):
    """Test that the model automatically saves the arguments passed into the constructor."""
    extra_args = {}
    if cls is AggSubClassBoringModel:
        extra_args.update(my_loss=torch.nn.CosineEmbeddingLoss())
    elif cls is DictConfSubClassBoringModel:
        extra_args.update(dict_conf=OmegaConf.create(dict(
            my_param="anything")))

    model = cls(**extra_args)
    assert model.hparams.batch_size == 64
    model = cls(batch_size=179, **extra_args)
    assert model.hparams.batch_size == 179
    trainer.predict(model, dataloaders=dataloader, return_predictions=False)
    assert cb.write_on_batch_end.call_count == 4
    assert cb.write_on_epoch_end.call_count == 0

    DummyPredictionWriter.write_on_batch_end.reset_mock()
    DummyPredictionWriter.write_on_epoch_end.reset_mock()

    cb = DummyPredictionWriter("epoch")
    trainer = Trainer(limit_predict_batches=4, callbacks=cb)
    trainer.predict(model, dataloaders=dataloader, return_predictions=False)
    assert cb.write_on_batch_end.call_count == 0
    assert cb.write_on_epoch_end.call_count == 1


@pytest.mark.parametrize("num_workers",
                         [0, pytest.param(2, marks=RunIf(slow=True))])
def test_prediction_writer_batch_indices(tmpdir, num_workers):
    DummyPredictionWriter.write_on_batch_end = Mock()
    DummyPredictionWriter.write_on_epoch_end = Mock()

    dataloader = DataLoader(RandomDataset(32, 64),
                            batch_size=4,
                            num_workers=num_workers)
    model = BoringModel()
    writer = DummyPredictionWriter("batch_and_epoch")
    trainer = Trainer(limit_predict_batches=4, callbacks=writer)
    trainer.predict(model, dataloaders=dataloader)

    writer.write_on_batch_end.assert_has_calls([
        call(trainer, model, ANY, [0, 1, 2, 3], ANY, 0, 0),
        call(trainer, model, ANY, [4, 5, 6, 7], ANY, 1, 0),
    model = BoringModel()

    trainer = Trainer(
        accelerator='ddp_sharded_spawn',
        num_processes=2,
        fast_dev_run=True,
        resume_from_checkpoint=checkpoint_path,
    )

    trainer.fit(model)


@RunIf(skip_windows=True, special=True, fairscale=True)
@pytest.mark.parametrize("trainer_kwargs", (
    dict(num_processes=2),
    pytest.param(dict(gpus=2), marks=RunIf(min_gpus=2)),
))
def test_ddp_sharded_plugin_test_multigpu(tmpdir, trainer_kwargs):
    """
        Test to ensure we can use validate and test without fit
    """
    model = BoringModel()
    trainer = Trainer(
        accelerator='ddp_sharded_spawn',
        fast_dev_run=True,
        **trainer_kwargs,
    )

    trainer.validate(model)
    trainer.test(model)
Beispiel #30
0
                dict(name="predict_step", args=(ANY, i)),
                # TODO: `predict_step_end`
                dict(name="Callback.on_predict_batch_end",
                     args=(trainer, model, ANY, ANY, i, 0)),
                dict(name="on_predict_batch_end", args=(ANY, ANY, i, 0)),
            ])
        return out


@pytest.mark.parametrize(
    "kwargs",
    [
        {},
        # these precision plugins modify the optimization flow, so testing them explicitly
        pytest.param(dict(gpus=1, precision=16, plugins="deepspeed"),
                     marks=RunIf(deepspeed=True, min_gpus=1)),
        pytest.param(dict(gpus=1, precision=16, amp_backend="native"),
                     marks=RunIf(amp_native=True, min_gpus=1)),
        pytest.param(dict(gpus=1, precision=16, amp_backend="apex"),
                     marks=RunIf(amp_apex=True, min_gpus=1)),
    ],
)
@pytest.mark.parametrize("automatic_optimization", (True, False))
def test_trainer_model_hook_system_fit(tmpdir, kwargs, automatic_optimization):
    called = []

    class TestModel(HookedModel):
        def __init__(self, *args):
            super().__init__(*args)
            self.automatic_optimization = automatic_optimization