Ejemplo n.º 1
0
def test_manual_poptorch_opts_custom(tmpdir):
    """Ensure if the user passes manual poptorch Options with custom parameters set, we respect them in our
    poptorch options and the dataloaders."""

    model = IPUModel()
    training_opts = poptorch.Options()
    training_opts.deviceIterations(8)
    training_opts.replicationFactor(2)
    training_opts.Training.gradientAccumulation(2)

    inference_opts = poptorch.Options()
    inference_opts.deviceIterations(16)
    inference_opts.replicationFactor(1)
    inference_opts.Training.gradientAccumulation(1)

    class TestCallback(Callback):
        def on_fit_end(self, trainer: Trainer,
                       pl_module: LightningModule) -> None:
            # ensure dataloaders were correctly set up during training.
            plugin = trainer.strategy
            assert isinstance(plugin, IPUStrategy)
            assert plugin.training_opts.replication_factor == 2
            assert plugin.inference_opts.replication_factor == 1

            val_dataloader = trainer.val_dataloaders[0]
            train_dataloader = trainer.train_dataloader
            assert isinstance(train_dataloader, CombinedLoader)
            train_dataloader = train_dataloader.loaders
            assert isinstance(val_dataloader, poptorch.DataLoader)
            assert isinstance(train_dataloader, poptorch.DataLoader)
            assert train_dataloader.options.replication_factor == 2
            assert val_dataloader.options.replication_factor == 1

    plugin = IPUStrategy(inference_opts=inference_opts,
                         training_opts=training_opts)
    # ensure we default to the training options replication factor
    assert plugin.replication_factor == 2
    trainer = Trainer(default_root_dir=tmpdir,
                      fast_dev_run=True,
                      strategy=plugin,
                      callbacks=TestCallback())
    trainer.fit(model)

    plugin = trainer.strategy
    assert isinstance(plugin, IPUStrategy)

    training_opts = plugin.training_opts
    assert training_opts.device_iterations == 8
    assert training_opts.replication_factor == 2
    assert training_opts.Training.gradient_accumulation == 2

    inference_opts = plugin.inference_opts
    assert inference_opts.device_iterations == 16
    assert inference_opts.replication_factor == 1
    assert inference_opts.Training.gradient_accumulation == 1
Ejemplo n.º 2
0
def test_replication_factor(tmpdir):
    """Ensure if the user passes manual poptorch Options with custom parameters set, we set them correctly in the
    dataloaders."""

    plugin = IPUStrategy()
    trainer = Trainer(ipus=2,
                      default_root_dir=tmpdir,
                      fast_dev_run=True,
                      strategy=plugin)
    assert trainer.ipus == 2
    assert trainer.strategy.replication_factor == 2

    model = BoringModel()
    training_opts = poptorch.Options()
    inference_opts = poptorch.Options()
    training_opts.replicationFactor(8)
    inference_opts.replicationFactor(7)
    plugin = IPUStrategy(inference_opts=inference_opts,
                         training_opts=training_opts)

    trainer = Trainer(default_root_dir=tmpdir, ipus=1, strategy=plugin)
    trainer.optimizers = model.configure_optimizers()[0]
    plugin.model = model
    model.trainer = trainer
    trainer.state.fn = TrainerFn.FITTING
    trainer.strategy.setup(trainer)

    trainer.state.stage = RunningStage.TRAINING
    assert trainer.strategy.replication_factor == 8
    trainer.state.stage = RunningStage.VALIDATING
    assert trainer.strategy.replication_factor == 7

    for fn, stage in (
        (TrainerFn.VALIDATING, RunningStage.VALIDATING),
        (TrainerFn.TESTING, RunningStage.TESTING),
        (TrainerFn.PREDICTING, RunningStage.PREDICTING),
    ):
        trainer.state.fn = fn
        trainer.state.stage = stage
        trainer.strategy.setup(trainer)
        assert trainer.strategy.replication_factor == 7
Ejemplo n.º 3
0
def test_autoreport(tmpdir):
    """Ensure autoreport dumps to a file."""
    model = IPUModel()
    autoreport_path = os.path.join(tmpdir, "report/")
    trainer = Trainer(
        default_root_dir=tmpdir,
        ipus=1,
        fast_dev_run=True,
        strategy=IPUStrategy(autoreport=True, autoreport_dir=autoreport_path),
    )
    trainer.fit(model)
    assert os.path.exists(autoreport_path)
    assert os.path.isfile(autoreport_path + "profile.pop")
Ejemplo n.º 4
0
def test_poptorch_models_at_different_stages(tmpdir):
    plugin = IPUStrategy()
    trainer = Trainer(default_root_dir=tmpdir, strategy=plugin, ipus=8)
    model = BoringModel()
    model.trainer = trainer
    plugin.model = model

    trainer.optimizers = model.configure_optimizers()[0]
    trainer.state.fn = TrainerFn.FITTING
    trainer.strategy.setup(trainer)
    assert list(trainer.strategy.poptorch_models) == [
        RunningStage.TRAINING, RunningStage.VALIDATING
    ]

    for fn, stage in (
        (TrainerFn.VALIDATING, RunningStage.VALIDATING),
        (TrainerFn.TESTING, RunningStage.TESTING),
        (TrainerFn.PREDICTING, RunningStage.PREDICTING),
    ):
        trainer.state.fn = fn
        trainer.state.stage = stage
        trainer.strategy.setup(trainer)
        assert list(trainer.strategy.poptorch_models) == [stage]
Ejemplo n.º 5
0
def test_manual_poptorch_opts(tmpdir):
    """Ensure if the user passes manual poptorch Options, we run with the correct object."""
    model = IPUModel()
    inference_opts = poptorch.Options()
    training_opts = poptorch.Options()

    trainer = Trainer(
        default_root_dir=tmpdir,
        ipus=1,
        fast_dev_run=True,
        strategy=IPUStrategy(inference_opts=inference_opts,
                             training_opts=training_opts),
    )
    trainer.fit(model)

    assert isinstance(trainer.strategy, IPUStrategy)
    assert trainer.strategy.training_opts == training_opts
    assert trainer.strategy.inference_opts == inference_opts
Ejemplo n.º 6
0
def test_device_iterations_ipu_plugin(tmpdir):
    class TestCallback(Callback):
        def on_train_start(self, trainer: Trainer,
                           pl_module: LightningModule) -> None:
            assert trainer.strategy.device_iterations == 2
            # assert device iterations has been set correctly within the poptorch options
            poptorch_model = trainer.strategy.poptorch_models[
                RunningStage.TRAINING]
            assert poptorch_model._options.toDict()["device_iterations"] == 2
            raise SystemExit

    model = IPUModel()
    trainer = Trainer(
        default_root_dir=tmpdir,
        fast_dev_run=True,
        ipus=1,
        strategy=IPUStrategy(device_iterations=2),
        callbacks=TestCallback(),
    )
    assert isinstance(trainer.strategy, IPUStrategy)
    with pytest.raises(SystemExit):
        trainer.fit(model)
    def select_training_type_plugin(self) -> Strategy:
        if (isinstance(self.distributed_backend, Accelerator)
                and self.distributed_backend.training_type_plugin is not None):
            plugin = self.distributed_backend.training_type_plugin
        elif self.use_ddp2:
            plugin = DDP2Strategy(parallel_devices=self.parallel_devices,
                                  cluster_environment=self.cluster_environment)
        elif self.use_ddp and self.use_deepspeed:
            plugin = DeepSpeedStrategy(
                cluster_environment=self.select_cluster_environment(),
                parallel_devices=self.parallel_devices)
        elif self.use_ddp:
            use_slurm_ddp = self.use_ddp and self._is_slurm_managing_tasks()
            use_torchelastic_ddp = self.use_ddp and TorchElasticEnvironment.detect(
            )
            use_kubeflow_ddp = self.use_ddp and KubeflowEnvironment.detect()
            use_ddp_spawn = self._distrib_type == _StrategyType.DDP_SPAWN
            use_ddp_cpu_spawn = use_ddp_spawn and self.use_cpu
            use_tpu_spawn = self.use_tpu and self._distrib_type == _StrategyType.TPU_SPAWN
            use_ddp_cpu_torch_elastic = use_ddp_cpu_spawn and TorchElasticEnvironment.detect(
            )
            use_ddp_cpu_kubeflow = use_ddp_cpu_spawn and KubeflowEnvironment.detect(
            )
            use_ddp_cpu_slurm = use_ddp_cpu_spawn and self._is_slurm_managing_tasks(
            )
            use_ddp_sharded = self._distrib_type == _StrategyType.DDP_SHARDED
            use_ddp_sharded_spawn = self._distrib_type == _StrategyType.DDP_SHARDED_SPAWN
            use_ddp_fully_sharded = self._distrib_type == _StrategyType.DDP_FULLY_SHARDED

            if use_tpu_spawn:
                ddp_strategy_cls = TPUSpawnStrategy
            elif use_ddp_sharded:
                ddp_strategy_cls = DDPShardedStrategy
            elif use_ddp_sharded_spawn:
                ddp_strategy_cls = DDPSpawnShardedStrategy
            elif (use_ddp_cpu_slurm or use_slurm_ddp
                  or use_ddp_cpu_torch_elastic or use_torchelastic_ddp
                  or use_kubeflow_ddp or use_ddp_cpu_kubeflow):
                ddp_strategy_cls = DDPStrategy
            elif use_ddp_spawn or use_ddp_cpu_spawn:
                ddp_strategy_cls = DDPSpawnStrategy
            elif use_ddp_fully_sharded:
                ddp_strategy_cls = DDPFullyShardedStrategy
            else:
                ddp_strategy_cls = DDPStrategy

            plugin = ddp_strategy_cls(
                parallel_devices=self.parallel_devices,
                cluster_environment=self.cluster_environment)
        elif self.use_dp:
            plugin = DataParallelStrategy(
                parallel_devices=self.parallel_devices)
        elif self.use_horovod:
            plugin = HorovodStrategy(parallel_devices=self.parallel_devices)
        elif self.use_tpu and isinstance(self.tpu_cores, list):
            plugin = SingleTPUStrategy(self.tpu_id)
        elif self.use_ipu:
            plugin = IPUStrategy(parallel_devices=self.parallel_devices)
        else:
            single_gpu_ordinal = device_parser.determine_root_gpu_device(
                self.parallel_device_ids)
            plugin = SingleDeviceStrategy(device=torch.device(
                f"cuda:{single_gpu_ordinal}" if self.use_gpu else "cpu"))
        return plugin
Ejemplo n.º 8
0
def test_device_type_when_training_plugin_ipu_passed(tmpdir):

    trainer = Trainer(strategy=IPUStrategy(), ipus=8)
    assert isinstance(trainer.strategy, IPUStrategy)
    assert trainer._device_type == _AcceleratorType.IPU
    assert isinstance(trainer.accelerator, IPUAccelerator)
Ejemplo n.º 9
0
def test_strategy_choice_ipu_plugin(tmpdir):
    trainer = Trainer(strategy=IPUStrategy(), accelerator="ipu", devices=8)
    assert isinstance(trainer.strategy, IPUStrategy)
Ejemplo n.º 10
0
def test_no_warning_plugin(tmpdir):
    with pytest.warns(None) as record:
        Trainer(default_root_dir=tmpdir,
                max_epochs=1,
                strategy=IPUStrategy(training_opts=poptorch.Options()))
    assert len(record) == 0