def test_poptorch_models_at_different_stages(tmpdir): plugin = IPUStrategy() trainer = Trainer(default_root_dir=tmpdir, strategy=plugin, accelerator="ipu", devices=8) model = BoringModel() model.trainer = trainer plugin.model = model trainer.optimizers = model.configure_optimizers()[0] trainer.state.fn = TrainerFn.FITTING trainer.strategy.setup(trainer) assert list(trainer.strategy.poptorch_models) == [ RunningStage.TRAINING, RunningStage.VALIDATING ] for fn, stage in ( (TrainerFn.VALIDATING, RunningStage.VALIDATING), (TrainerFn.TESTING, RunningStage.TESTING), (TrainerFn.PREDICTING, RunningStage.PREDICTING), ): trainer.state.fn = fn trainer.state.stage = stage trainer.strategy.setup(trainer) assert list(trainer.strategy.poptorch_models) == [stage]
def test_manual_poptorch_dataloader(tmpdir): model_options = poptorch.Options() class IPUTestModel(IPUModel): def train_dataloader(self): dataloader = super().train_dataloader() # save to instance to compare the reference later self.poptorch_dataloader = poptorch.DataLoader(model_options, dataloader.dataset, drop_last=True) return self.poptorch_dataloader model = IPUTestModel() other_options = poptorch.Options() trainer = Trainer( default_root_dir=tmpdir, fast_dev_run=True, accelerator="ipu", devices=2, strategy=IPUStrategy(training_opts=other_options), ) trainer.fit(model) assert isinstance(trainer.strategy, IPUStrategy) assert trainer.strategy.training_opts is other_options dataloader = trainer.train_dataloader.loaders assert dataloader is model.poptorch_dataloader # exact object, was not recreated # dataloader uses the options in the model, not the strategy assert dataloader.options is model_options assert dataloader.options is not other_options assert dataloader.drop_last # was kept
def test_manual_poptorch_opts(tmpdir): """Ensure if the user passes manual poptorch Options, we run with the correct object.""" model = IPUModel() inference_opts = poptorch.Options() training_opts = poptorch.Options() trainer = Trainer( default_root_dir=tmpdir, accelerator="ipu", devices=2, fast_dev_run=True, strategy=IPUStrategy(inference_opts=inference_opts, training_opts=training_opts), ) trainer.fit(model) assert isinstance(trainer.strategy, IPUStrategy) assert trainer.strategy.training_opts == training_opts assert trainer.strategy.inference_opts == inference_opts dataloader = trainer.train_dataloader.loaders assert isinstance(dataloader, poptorch.DataLoader) assert dataloader.options == training_opts assert trainer.num_devices > 1 # testing this only makes sense in a distributed setting assert not isinstance(dataloader.sampler, DistributedSampler)
def test_replication_factor(tmpdir): """Ensure if the user passes manual poptorch Options with custom parameters set, we set them correctly in the dataloaders.""" plugin = IPUStrategy() trainer = Trainer(accelerator="ipu", devices=2, default_root_dir=tmpdir, fast_dev_run=True, strategy=plugin) assert isinstance(trainer.accelerator, IPUAccelerator) assert trainer.num_devices == 2 assert trainer.strategy.replication_factor == 2 model = BoringModel() training_opts = poptorch.Options() inference_opts = poptorch.Options() training_opts.replicationFactor(8) inference_opts.replicationFactor(7) plugin = IPUStrategy(inference_opts=inference_opts, training_opts=training_opts) trainer = Trainer(default_root_dir=tmpdir, accelerator="ipu", devices=1, strategy=plugin) trainer.optimizers = model.configure_optimizers()[0] plugin.model = model model.trainer = trainer trainer.state.fn = TrainerFn.FITTING trainer.strategy.setup(trainer) trainer.state.stage = RunningStage.TRAINING assert trainer.strategy.replication_factor == 8 trainer.state.stage = RunningStage.VALIDATING assert trainer.strategy.replication_factor == 7 for fn, stage in ( (TrainerFn.VALIDATING, RunningStage.VALIDATING), (TrainerFn.TESTING, RunningStage.TESTING), (TrainerFn.PREDICTING, RunningStage.PREDICTING), ): trainer.state.fn = fn trainer.state.stage = stage trainer.strategy.setup(trainer) assert trainer.strategy.replication_factor == 7
def test_manual_poptorch_opts_custom(tmpdir): """Ensure if the user passes manual poptorch Options with custom parameters set, we respect them in our poptorch options and the dataloaders.""" model = IPUModel() training_opts = poptorch.Options() training_opts.deviceIterations(8) training_opts.replicationFactor(2) training_opts.Training.gradientAccumulation(2) inference_opts = poptorch.Options() inference_opts.deviceIterations(16) inference_opts.replicationFactor(1) inference_opts.Training.gradientAccumulation(1) class TestCallback(Callback): def on_fit_end(self, trainer: Trainer, pl_module: LightningModule) -> None: # ensure dataloaders were correctly set up during training. plugin = trainer.strategy assert isinstance(plugin, IPUStrategy) assert plugin.training_opts.replication_factor == 2 assert plugin.inference_opts.replication_factor == 1 val_dataloader = trainer.val_dataloaders[0] train_dataloader = trainer.train_dataloader assert isinstance(train_dataloader, CombinedLoader) train_dataloader = train_dataloader.loaders assert isinstance(val_dataloader, poptorch.DataLoader) assert isinstance(train_dataloader, poptorch.DataLoader) assert train_dataloader.options.replication_factor == 2 assert val_dataloader.options.replication_factor == 1 plugin = IPUStrategy(inference_opts=inference_opts, training_opts=training_opts) # ensure we default to the training options replication factor assert plugin.replication_factor == 2 trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, strategy=plugin, callbacks=TestCallback()) trainer.fit(model) plugin = trainer.strategy assert isinstance(plugin, IPUStrategy) training_opts = plugin.training_opts assert training_opts.device_iterations == 8 assert training_opts.replication_factor == 2 assert training_opts.Training.gradient_accumulation == 2 inference_opts = plugin.inference_opts assert inference_opts.device_iterations == 16 assert inference_opts.replication_factor == 1 assert inference_opts.Training.gradient_accumulation == 1
def test_autoreport(tmpdir): """Ensure autoreport dumps to a file.""" model = IPUModel() autoreport_path = os.path.join(tmpdir, "report/") trainer = Trainer( default_root_dir=tmpdir, ipus=1, fast_dev_run=True, strategy=IPUStrategy(autoreport=True, autoreport_dir=autoreport_path), ) trainer.fit(model) assert os.path.exists(autoreport_path) assert os.path.isfile(autoreport_path + "profile.pop")
def test_manual_poptorch_opts(tmpdir): """Ensure if the user passes manual poptorch Options, we run with the correct object.""" model = IPUModel() inference_opts = poptorch.Options() training_opts = poptorch.Options() trainer = Trainer( default_root_dir=tmpdir, ipus=1, fast_dev_run=True, strategy=IPUStrategy(inference_opts=inference_opts, training_opts=training_opts), ) trainer.fit(model) assert isinstance(trainer.strategy, IPUStrategy) assert trainer.strategy.training_opts == training_opts assert trainer.strategy.inference_opts == inference_opts
def test_device_iterations_ipu_plugin(tmpdir): class TestCallback(Callback): def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None: assert trainer.strategy.device_iterations == 2 # assert device iterations has been set correctly within the poptorch options poptorch_model = trainer.strategy.poptorch_models[RunningStage.TRAINING] assert poptorch_model._options.toDict()["device_iterations"] == 2 raise SystemExit model = IPUModel() trainer = Trainer( default_root_dir=tmpdir, fast_dev_run=True, accelerator="ipu", devices=1, strategy=IPUStrategy(device_iterations=2), callbacks=TestCallback(), ) assert isinstance(trainer.strategy, IPUStrategy) with pytest.raises(SystemExit): trainer.fit(model)
def test_device_type_when_training_plugin_ipu_passed(tmpdir): trainer = Trainer(strategy=IPUStrategy(), accelerator="ipu", devices=8) assert isinstance(trainer.strategy, IPUStrategy) assert isinstance(trainer.accelerator, IPUAccelerator)
def test_strategy_choice_ipu_plugin(tmpdir): trainer = Trainer(strategy=IPUStrategy(), accelerator="ipu", devices=8) assert isinstance(trainer.strategy, IPUStrategy)
def test_no_warning_plugin(tmpdir): with pytest.warns(None) as record: Trainer(default_root_dir=tmpdir, max_epochs=1, strategy=IPUStrategy(training_opts=poptorch.Options())) assert len(record) == 0
def test_device_type_when_training_plugin_ipu_passed(tmpdir): trainer = Trainer(strategy=IPUStrategy(), ipus=8) assert isinstance(trainer.strategy, IPUStrategy) assert trainer._device_type == _AcceleratorType.IPU assert isinstance(trainer.accelerator, IPUAccelerator)