def test_model_summary_callback_present_trainer():

    trainer = Trainer()
    assert any(isinstance(cb, ModelSummary) for cb in trainer.callbacks)

    trainer = Trainer(callbacks=ModelSummary())
    assert any(isinstance(cb, ModelSummary) for cb in trainer.callbacks)
def test_checkpoint_callbacks_are_last(tmpdir):
    """Test that checkpoint callbacks always get moved to the end of the list, with preserved order."""
    checkpoint1 = ModelCheckpoint(tmpdir)
    checkpoint2 = ModelCheckpoint(tmpdir)
    model_summary = ModelSummary()
    early_stopping = EarlyStopping()
    lr_monitor = LearningRateMonitor()
    progress_bar = ProgressBar()

    # no model reference
    trainer = Trainer(callbacks=[checkpoint1, progress_bar, lr_monitor, model_summary, checkpoint2])
    cb_connector = CallbackConnector(trainer)
    cb_connector._attach_model_callbacks()
    assert trainer.callbacks == [progress_bar, lr_monitor, model_summary, checkpoint1, checkpoint2]

    # no model callbacks
    model = LightningModule()
    model.configure_callbacks = lambda: []
    trainer.model = model
    cb_connector._attach_model_callbacks()
    assert trainer.callbacks == [progress_bar, lr_monitor, model_summary, checkpoint1, checkpoint2]

    # with model-specific callbacks that substitute ones in Trainer
    model = LightningModule()
    model.configure_callbacks = lambda: [checkpoint1, early_stopping, model_summary, checkpoint2]
    trainer = Trainer(callbacks=[progress_bar, lr_monitor, ModelCheckpoint(tmpdir)])
    trainer.model = model
    cb_connector = CallbackConnector(trainer)
    cb_connector._attach_model_callbacks()
    assert trainer.callbacks == [progress_bar, lr_monitor, early_stopping, model_summary, checkpoint1, checkpoint2]
Esempio n. 3
0
def main(cfg) -> None:
    logging.info("\n\n************** Experiment configuration ***********")
    logging.info(f'\n{OmegaConf.to_yaml(cfg)}')

    megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False)
    plugins = [
        NLPDDPPlugin(
            no_ddp_communication_hook=(
                megatron_amp_o2 and cfg.trainer.precision == 'bf16'
            ),  # Only bf16 uses fp32_grad_accum.
            gradient_as_bucket_view=cfg.model.gradient_as_bucket_view,
            find_unused_parameters=False,
        )
    ]
    if cfg.trainer.precision in [16, 'bf16']:
        scaler = None
        if cfg.trainer.precision == 16:
            scaler = GradScaler(
                init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32),
                growth_interval=cfg.model.get('native_amp_growth_interval', 1000),
                hysteresis=cfg.model.get('hysteresis', 2),
            )
        if megatron_amp_o2:
            plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler))
        else:
            plugins.append(NativeMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler))

    if cfg.get('cluster_type', None) == 'BCP':
        plugins.append(TorchElasticEnvironment())

    trainer = Trainer(plugins=plugins, **cfg.trainer, callbacks=[ModelSummary(max_depth=3)])

    # tokenizers will be trained and and tarred training data will be created if needed
    # model config is then updated
    if cfg.model.preproc_out_dir is not None:
        MTDataPreproc(cfg=cfg.model, trainer=trainer)

    exp_manager(trainer, cfg.exp_manager)

    # update resume from checkpoint found by exp_manager
    resume_from_checkpoint = trainer._checkpoint_connector.resume_from_checkpoint_fit_path
    logging.info(f'Resuming training from checkpoint: {resume_from_checkpoint}')

    trainer._checkpoint_connector = CheckpointConnector(trainer, resume_from_checkpoint=resume_from_checkpoint)
    # Override timer callback to a stateless one
    for idx, callback in enumerate(trainer.callbacks):
        if isinstance(callback, Timer):
            trainer.callbacks[idx] = StatelessTimer(cfg.trainer.max_time,)

    # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams
    with open_dict(cfg):
        cfg.model.precision = cfg.trainer.precision

    model = MegatronNMTModel(cfg.model, trainer)
    if cfg.do_training:
        trainer.fit(model)

    if cfg.do_testing:
        trainer.test(model)
    def _configure_model_summary_callback(
            self,
            enable_model_summary: bool,
            weights_summary: Optional[str] = None) -> None:
        if weights_summary is None:
            rank_zero_deprecation(
                "Setting `Trainer(weights_summary=None)` is deprecated in v1.5 and will be removed"
                " in v1.7. Please set `Trainer(enable_model_summary=False)` instead."
            )
            return
        if not enable_model_summary:
            return

        model_summary_cbs = [
            type(cb) for cb in self.trainer.callbacks
            if isinstance(cb, ModelSummary)
        ]
        if model_summary_cbs:
            rank_zero_info(
                f"Trainer already configured with model summary callbacks: {model_summary_cbs}."
                " Skipping setting a default `ModelSummary` callback.")
            return

        if weights_summary == "top":
            # special case the default value for weights_summary to preserve backward compatibility
            max_depth = 1
        else:
            rank_zero_deprecation(
                f"Setting `Trainer(weights_summary={weights_summary})` is deprecated in v1.5 and will be removed"
                " in v1.7. Please pass `pytorch_lightning.callbacks.model_summary.ModelSummary` with"
                " `max_depth` directly to the Trainer's `callbacks` argument instead."
            )
            if weights_summary not in ModelSummaryMode.supported_types():
                raise MisconfigurationException(
                    f"`weights_summary` can be None, {', '.join(ModelSummaryMode.supported_types())}",
                    f" but got {weights_summary}",
                )
            max_depth = ModelSummaryMode.get_max_depth(weights_summary)

        progress_bar_callback = self.trainer.progress_bar_callback
        is_progress_bar_rich = isinstance(progress_bar_callback,
                                          RichProgressBar)

        if progress_bar_callback is not None and is_progress_bar_rich:
            model_summary = RichModelSummary(max_depth=max_depth)
        else:
            model_summary = ModelSummary(max_depth=max_depth)
        self.trainer.callbacks.append(model_summary)
        self.trainer._weights_summary = weights_summary
    def _configure_model_summary_callback(self,
                                          enable_model_summary: bool) -> None:
        if not enable_model_summary:
            return

        model_summary_cbs = [
            type(cb) for cb in self.trainer.callbacks
            if isinstance(cb, ModelSummary)
        ]
        if model_summary_cbs:
            rank_zero_info(
                f"Trainer already configured with model summary callbacks: {model_summary_cbs}."
                " Skipping setting a default `ModelSummary` callback.")
            return

        progress_bar_callback = self.trainer.progress_bar_callback
        is_progress_bar_rich = isinstance(progress_bar_callback,
                                          RichProgressBar)

        if progress_bar_callback is not None and is_progress_bar_rich:
            model_summary = RichModelSummary()
        else:
            model_summary = ModelSummary()
        self.trainer.callbacks.append(model_summary)
Esempio n. 6
0
def main(cfg) -> None:
    logging.info("\n\n************** Experiment configuration ***********")
    logging.info(f'\n{OmegaConf.to_yaml(cfg)}')

    megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False)
    plugins = [
        NLPDDPPlugin(
            no_ddp_communication_hook=True,
            gradient_as_bucket_view=cfg.model.gradient_as_bucket_view,
            find_unused_parameters=False,
        )
    ]
    if cfg.trainer.precision in [16, 'bf16']:
        scaler = None
        if cfg.trainer.precision == 16:
            scaler = GradScaler(
                init_scale=cfg.model.get('native_amp_init_scale', 2**32),
                growth_interval=cfg.model.get('native_amp_growth_interval',
                                              1000),
                hysteresis=cfg.model.get('hysteresis', 2),
            )
        if megatron_amp_o2:
            plugins.append(
                MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision,
                                            device='cuda',
                                            scaler=scaler))
        else:
            plugins.append(
                PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision,
                                             device='cuda',
                                             scaler=scaler))

    if cfg.get('cluster_type', None) == 'BCP':
        plugins.append(TorchElasticEnvironment())

    trainer = Trainer(plugins=plugins,
                      **cfg.trainer,
                      callbacks=[ModelSummary(max_depth=3)])

    # tokenizers will be trained and and tarred training data will be created if needed
    # model config is then updated
    if cfg.model.preproc_out_dir is not None:
        MTDataPreproc(cfg=cfg.model, trainer=trainer)

    exp_manager(trainer, cfg.exp_manager)

    # update resume from checkpoint found by exp_manager
    if cfg.model.resume_from_checkpoint is not None:
        resume_from_checkpoint = cfg.model.resume_from_checkpoint
    else:
        resume_from_checkpoint = trainer._checkpoint_connector.resume_from_checkpoint_fit_path
    logging.info(
        f'Resuming training from checkpoint: {resume_from_checkpoint}')

    trainer._checkpoint_connector = CheckpointConnector(
        trainer, resume_from_checkpoint=resume_from_checkpoint)
    # Override timer callback to a stateless one
    for idx, callback in enumerate(trainer.callbacks):
        if isinstance(callback, Timer):
            trainer.callbacks[idx] = StatelessTimer(cfg.trainer.max_time, )

    # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams
    with open_dict(cfg):
        cfg.model.precision = cfg.trainer.precision

    if hasattr(cfg.model, 'pretrained_model_path'
               ) and cfg.model.pretrained_model_path is not None:
        if not hasattr(cfg.model, 'pretrained_model_type'):
            raise ValueError(f"Pretrained model type must be in [T5, BART].")

        assert cfg.model.pretrained_model_type in ['T5', 'BART']
        if cfg.model.pretrained_model_type == 'T5':
            pretrained_cfg = MegatronT5Model.restore_from(
                cfg.model.pretrained_model_path,
                trainer=trainer,
                return_config=True)
        else:
            pretrained_cfg = MegatronBARTModel.restore_from(
                cfg.model.pretrained_model_path,
                trainer=trainer,
                return_config=True)
        OmegaConf.set_struct(pretrained_cfg, True)
        with open_dict(pretrained_cfg):
            pretrained_cfg.masked_softmax_fusion = False
            # Set source and target language/multilingual
            pretrained_cfg.src_language = cfg.model.src_language
            pretrained_cfg.tgt_language = cfg.model.tgt_language
            pretrained_cfg.multilingual = cfg.model.multilingual
            pretrained_cfg.shared_tokenizer = True

            # Max generation delta
            pretrained_cfg.max_generation_delta = cfg.model.max_generation_delta

            # Set label smoothing
            pretrained_cfg.label_smoothing = cfg.model.label_smoothing

            # Set tokenizer paths:
            pretrained_cfg.encoder_tokenizer = pretrained_cfg.tokenizer
            pretrained_cfg.decoder_tokenizer = pretrained_cfg.tokenizer

            # Pre-trained models should use the legacy sentencepiece tokenizer ex: mT5
            pretrained_cfg.encoder_tokenizer.sentencepiece_legacy = True
            pretrained_cfg.decoder_tokenizer.sentencepiece_legacy = True

            # Override dropout
            pretrained_cfg.hidden_dropout = cfg.model.hidden_dropout
            pretrained_cfg.attention_dropout = cfg.model.attention_dropout

            # Override precision
            pretrained_cfg.precision = cfg.model.precision  # Set above from trainer.precision

            # Override data and global/micro batch size.
            pretrained_cfg.train_ds = cfg.model.train_ds
            pretrained_cfg.validation_ds = cfg.model.validation_ds
            pretrained_cfg.test_ds = cfg.model.test_ds

            pretrained_cfg.micro_batch_size = cfg.model.micro_batch_size
            pretrained_cfg.global_batch_size = cfg.model.global_batch_size

            # Class target for the new class being restored.
            pretrained_cfg.target = (
                "nemo.collections.nlp.models.machine_translation.megatron_nmt_model.MegatronNMTModel"
            )

            # Optimizer overrides.
            pretrained_cfg.optim = cfg.model.optim

        model = MegatronNMTModel.restore_from(
            cfg.model.pretrained_model_path,
            trainer=trainer,
            override_config_path=pretrained_cfg,
            save_restore_connector=NLPSaveRestoreConnector(),
        )
    else:
        model = MegatronNMTModel(cfg.model, trainer)
    if cfg.do_training:
        trainer.fit(model)

    if cfg.do_testing:
        trainer.test(model)
def test_model_summary_callback_override_weights_summary_flag():

    trainer = Trainer(callbacks=ModelSummary(), weights_summary=None)
    assert any(isinstance(cb, ModelSummary) for cb in trainer.callbacks)
Esempio n. 8
0
def test_model_summary_callback_override_weights_summary_flag():
    with pytest.deprecated_call(match=r"weights_summary=None\)` is deprecated"):
        trainer = Trainer(callbacks=ModelSummary(), weights_summary=None)
    assert any(isinstance(cb, ModelSummary) for cb in trainer.callbacks)