class PunctuationCapitalizationConfig(NemoConfig):
    """
    A config for punctuation model training and testing.

    See an example of full config in
    `nemo/examples/nlp/token_classification/conf/punctuation_capitalization_config.yaml
    <https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/token_classification/conf/punctuation_capitalization_config.yaml>`_
    """

    pretrained_model: Optional[str] = None
    """Can be an NVIDIA's NGC cloud model or a path to a .nemo checkpoint. You can get list of possible cloud options
    by calling method
    :func:`~nemo.collections.nlp.models.token_classification.punctuation_capitalization_model.PunctuationCapitalizationModel.list_available_models`.
    """

    name: Optional[str] = 'Punctuation_and_Capitalization'
    """A name of the model. Used for naming output directories and ``.nemo`` checkpoints."""

    do_training: bool = True
    """Whether to perform training of the model."""

    do_testing: bool = False
    """Whether ot perform testing of the model."""

    model: PunctuationCapitalizationModelConfig = PunctuationCapitalizationModelConfig()
    """A configuration for the
    :class:`~nemo.collections.nlp.models.token_classification.punctuation_capitalization_model.PunctuationCapitalizationModel`
    model."""

    trainer: Optional[TrainerConfig] = TrainerConfig()
    """Contains ``Trainer`` Lightning class constructor parameters."""

    exp_manager: Optional[ExpManagerConfig] = ExpManagerConfig(name=name, files_to_copy=[])
    """A configuration with various NeMo training options such as output directories, resuming from checkpoint,
Beispiel #2
0
class ParallelAlignmentConfig:
    model: Optional[str] = None  # name
    predict_ds: ASRDatasetConfig = ASRDatasetConfig(return_sample_id=True, num_workers=4)
    aligner_args: K2AlignerWrapperModelConfig = K2AlignerWrapperModelConfig()
    output_path: str = MISSING
    model_stride: int = 8

    trainer: TrainerConfig = TrainerConfig(gpus=-1, accelerator="ddp")

    # there arguments will be ignored
    return_predictions: bool = False
    use_cer: bool = False
class AppConfig(Config):
    """
    This is structured config for this application.

    Args:
        name: Description of the application.
        trainer: configuration of the trainer.
        model: configuation of the model.
    """

    name: str = "Training of a LeNet-5 Model using a pure PyTorchLightning approach - using DDP on 2 GPUs."
    trainer: TrainerConfig = TrainerConfig(gpus=2, accelerator="ddp")
    model: MNISTLeNet5Config = MNISTLeNet5Config()
Beispiel #4
0
class ParallelTranscriptionConfig:
    model: Optional[str] = None  # name
    predict_ds: ASRDatasetConfig = ASRDatasetConfig(return_sample_id=True,
                                                    num_workers=4)
    output_path: str = MISSING

    # when return_predictions is enabled, the prediction call would keep all the predictions in memory and return them when prediction is done
    return_predictions: bool = False
    use_cer: bool = False

    # decoding strategy for RNNT models
    rnnt_decoding: RNNTDecodingConfig = RNNTDecodingConfig()
    trainer: TrainerConfig = TrainerConfig(gpus=-1, accelerator="ddp")
Beispiel #5
0
def nemo_convert(argv):
    args = get_args(argv)
    loglevel = logging.INFO
    # assuming loglevel is bound to the string value obtained from the
    # command line argument. Convert to upper case to allow the user to
    # specify --log=DEBUG or --log=debug
    if args.verbose is not None:
        numeric_level = getattr(logging, args.verbose.upper(), None)
        if not isinstance(numeric_level, int):
            raise ValueError('Invalid log level: %s' % numeric_level)
        loglevel = numeric_level

    logger = logging.getLogger(__name__)
    if logger.handlers:
        for handler in logger.handlers:
            logger.removeHandler(handler)
    logging.basicConfig(level=loglevel,
                        format='%(asctime)s [%(levelname)s] %(message)s')
    logging.info("Logging level set to {}".format(loglevel))
    """Convert a .nemo saved model trained on previous versions of nemo into a nemo fie with current version."""
    nemo_in = args.source
    out = args.out

    # Create a PL trainer object which is required for restoring Megatron models
    cfg_trainer = TrainerConfig(
        gpus=1,
        accelerator="ddp",
        num_nodes=1,
        # Need to set the following two to False as ExpManager will take care of them differently.
        logger=False,
        checkpoint_callback=False,
    )
    trainer = pl.Trainer(cfg_trainer)

    logging.info("Restoring NeMo model from '{}'".format(nemo_in))
    try:
        # If the megatron based NLP model was trained on NeMo < 1.5, then we need to update the lm_checkpoint on the model config
        if args.megatron_legacy:
            if args.megatron_checkpoint:
                connector = NLPSaveRestoreConnector()
                model_cfg = ModelPT.restore_from(
                    restore_path=nemo_in,
                    save_restore_connector=connector,
                    trainer=trainer,
                    return_config=True)
                OmegaConf.set_struct(model_cfg, True)
                with open_dict(model_cfg):
                    model_cfg.language_model.lm_checkpoint = args.megatron_checkpoint
                    model_cfg['megatron_legacy'] = True
                    model_cfg['masked_softmax_fusion'] = False
                    model_cfg['bias_gelu_fusion'] = False
                model = ModelPT.restore_from(
                    restore_path=nemo_in,
                    save_restore_connector=connector,
                    trainer=trainer,
                    override_config_path=model_cfg,
                )
            else:
                logging.error(
                    "Megatron Checkpoint must be provided if Megatron legacy is chosen"
                )
        else:
            model = ModelPT.restore_from(restore_path=nemo_in, trainer=trainer)
        logging.info("Model {} restored from '{}'".format(
            model.cfg.target, nemo_in))

        # Save the model
        model.save_to(out)
        logging.info("Successfully converted to {}".format(out))

        del model
    except Exception as e:
        logging.error(
            "Failed to restore model from NeMo file : {}. Please make sure you have the latest NeMo package installed with [all] dependencies."
            .format(nemo_in))
        raise e