Python EncDecCTCModel.EncDecCTCModel 예제들, nemo.collections.asr.models.EncDecCTCModel.EncDecCTCModel Python 예제들

예제 #1

0

파일 보기

def main(cfg):
    if cfg.n_gpus > 0:
        cfg.model.train_ds.batch_size //= cfg.n_gpus

    logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg, resolve=True)}')

    pl.utilities.seed.seed_everything(cfg.seed)

    trainer = pl.Trainer(**cfg.trainer)
    exp_manager(trainer, cfg.get("exp_manager", None))
    if "tokenizer" in cfg.model:
        asr_model = EncDecCTCModelBPE(cfg=cfg.model, trainer=trainer)
    else:
        asr_model = EncDecCTCModel(cfg=cfg.model, trainer=trainer)

    # Initialize the weights of the model from another model, if provided via config
    asr_model.maybe_init_from_pretrained_checkpoint(cfg)

    trainer.fit(asr_model)

    if hasattr(cfg.model,
               'test_ds') and cfg.model.test_ds.manifest_filepath is not None:
        gpu = 1 if cfg.trainer.gpus != 0 else 0
        test_trainer = pl.Trainer(
            gpus=gpu,
            precision=trainer.precision,
            amp_level=trainer.accelerator_connector.amp_level,
            amp_backend=cfg.trainer.get("amp_backend", "native"),
        )
        if asr_model.prepare_test(test_trainer):
            test_trainer.test(asr_model)

예제 #2

0

파일 보기

파일: quartznet_loader.py 프로젝트: aymanehachcham/Conversational-API

    def __init__(self, torch_device=None):
        if torch_device is None:
            if torch.cuda.is_available():
                torch_device = torch.device('cuda')
            else:
                torch_device = torch.device('cpu')

        self.file_config = path.join(WORK_DIR, _MODEL_CONFIG)
        self.file_checkpoints = path.join(WORK_DIR, _MODEL_WEIGHTS)

        model_config = OmegaConf.load(self.file_config)
        OmegaConf.set_struct(model_config, True)

        if isinstance(model_config, DictConfig):
            self.config = OmegaConf.to_container(model_config, resolve=True)
            self.config = OmegaConf.create(self.config)
            OmegaConf.set_struct(self.config, True)

        # EncDecCTCModel.set_model_restore_state(is_being_restored=True)
        instance = EncDecCTCModel(cfg=self.config)

        self.model_instance = instance
        self.model_instance.to(torch_device)
        self.model_instance.load_state_dict(
            torch.load(self.file_checkpoints, torch_device), False)

예제 #3

0

파일 보기

파일: test_asr_exportables.py 프로젝트: NVIDIA/NeMo

    def test_EncDecCTCModel_adapted_export_to_onnx(self):
        model_config = DictConfig({
            'preprocessor': DictConfig(self.preprocessor),
            'encoder': DictConfig(self.encoder_dict),
            'decoder': DictConfig(self.decoder_dict),
        })

        # support adapter in encoder
        model_config.encoder.cls = model_config.encoder.cls + 'Adapter'  # ConvASREncoderAdapter

        # load model
        model = EncDecCTCModel(cfg=model_config)

        # add adapter
        adapter_cfg = OmegaConf.structured(
            LinearAdapterConfig(
                in_features=model_config.encoder.params.jasper[0].filters,
                dim=32))
        model.add_adapter('temp', cfg=adapter_cfg)

        model = model.cuda()

        with tempfile.TemporaryDirectory() as tmpdir:
            filename = os.path.join(tmpdir, 'qn.onnx')
            model.export(
                output=filename,
                check_trace=True,
            )
            onnx_model = onnx.load(filename)
            onnx.checker.check_model(onnx_model,
                                     full_check=True)  # throws when failed
            assert onnx_model.graph.input[0].name == 'audio_signal'
            assert onnx_model.graph.output[0].name == 'logprobs'

예제 #4

0

파일 보기

 def test_EncDecCTCModel_export_to_onnx(self):
     model_config = DictConfig({
         'preprocessor': DictConfig(self.preprocessor),
         'encoder': DictConfig(self.encoder_dict),
         'decoder': DictConfig(self.decoder_dict),
     })
     model = EncDecCTCModel(cfg=model_config)
     with tempfile.TemporaryDirectory() as tmpdir:
         filename = os.path.join(tmpdir, 'qn.onnx')
         model.export(output=filename)
         onnx_model = onnx.load(filename)
         onnx.checker.check_model(onnx_model,
                                  full_check=True)  # throws when failed
         assert onnx_model.graph.input[0].name == 'audio_signal'
         assert onnx_model.graph.output[0].name == 'logprobs'

예제 #5

0

파일 보기

파일: speech_to_text_ctc.py 프로젝트: quuhua911/NeMo

def main(cfg):
    logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}')

    trainer = pl.Trainer(**cfg.trainer)
    exp_manager(trainer, cfg.get("exp_manager", None))
    asr_model = EncDecCTCModel(cfg=cfg.model, trainer=trainer)

    # Initialize the weights of the model from another model, if provided via config
    asr_model.maybe_init_from_pretrained_checkpoint(cfg)

    trainer.fit(asr_model)

    if hasattr(cfg.model,
               'test_ds') and cfg.model.test_ds.manifest_filepath is not None:
        if asr_model.prepare_test(trainer):
            trainer.test(asr_model)

예제 #6

0

파일 보기

파일: test_asr_exportables.py 프로젝트: NVIDIA/NeMo

def conformer_model():
    preprocessor = {
        'cls':
        'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor',
        'params': dict({})
    }
    encoder = {
        'cls': 'nemo.collections.asr.modules.ConformerEncoder',
        'params': {
            'feat_in': 80,
            'feat_out': -1,
            'n_layers': 2,
            'd_model': 256,
            'subsampling': 'striding',
            'subsampling_factor': 4,
            'subsampling_conv_channels': 512,
            'ff_expansion_factor': 4,
            'self_attention_model': 'rel_pos',
            'n_heads': 8,
            'att_context_size': [-1, -1],
            'xscaling': True,
            'untie_biases': True,
            'pos_emb_max_len': 500,
            'conv_kernel_size': 31,
            'dropout': 0.1,
            'dropout_emb': 0.0,
            'dropout_att': 0.1,
        },
    }

    decoder = {
        'cls': 'nemo.collections.asr.modules.ConvASRDecoder',
        'params': {
            'feat_in': 256,
            'num_classes': 1024,
            'vocabulary': list(chr(i % 28) for i in range(0, 1024))
        },
    }

    modelConfig = DictConfig({
        'preprocessor': DictConfig(preprocessor),
        'encoder': DictConfig(encoder),
        'decoder': DictConfig(decoder)
    })
    conformer_model = EncDecCTCModel(cfg=modelConfig)
    return conformer_model

예제 #7

0

파일 보기

파일: speech_to_text.py 프로젝트: zt706/NeMo

def main(cfg):
    logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}')
    trainer = pl.Trainer(**cfg.trainer)
    exp_manager(trainer, cfg.get("exp_manager", None))
    asr_model = EncDecCTCModel(cfg=cfg.model, trainer=trainer)

    trainer.fit(asr_model)

    if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.manifest_filepath is not None:
        gpu = 1 if cfg.trainer.gpus != 0 else 0
        trainer = pl.Trainer(
            gpus=gpu,
            precision=cfg.trainer.precision,
            amp_level=cfg.trainer.amp_level,
            amp_backend=cfg.trainer.amp_backend,
        )
        if asr_model.prepare_test(trainer):
            trainer.test(asr_model)

예제 #8

0

파일 보기

파일: speech_to_text_hybrid.py 프로젝트: piraka9011/NeMo

def main(cfg):
    # Generate default asr model config
    asr_model_config = configs.EncDecCTCModelConfig()

    # Merge hydra updates with model config
    # `drop_missing_subconfig=True` is necessary here. Without it, while the data class will instantiate and be added
    # to the config, it contains test_ds.sample_rate = MISSING and test_ds.labels = MISSING.
    # This will raise a OmegaConf MissingMandatoryValue error when processing the dataloaders inside
    # model_utils.resolve_test_dataloaders(model=self) (used for multi data loader support).
    # In general, any operation that tries to use a DictConfig with MISSING in it will fail,
    # other than explicit update operations to change MISSING to some actual value.
    asr_model_config = update_model_config(asr_model_config,
                                           cfg,
                                           drop_missing_subconfigs=True)

    # From here on out, its a general OmegaConf DictConfig, directly usable by our code.
    trainer = pl.Trainer(**asr_model_config.trainer)
    exp_manager(trainer, asr_model_config.get("exp_manager", None))
    asr_model = EncDecCTCModel(cfg=asr_model_config.model, trainer=trainer)

    trainer.fit(asr_model)

예제 #9

0

파일 보기

파일: speech_to_text.py 프로젝트: blisc/NeMo

def main(cfg):
    logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}')

    trainer = pl.Trainer(**cfg.trainer)
    exp_manager(trainer, cfg.get("exp_manager", None))
    asr_model = EncDecCTCModel(cfg=cfg.model, trainer=trainer)

    # Initialize the weights of the model from another model, if provided via config
    asr_model.maybe_init_from_pretrained_checkpoint(cfg)

    trainer.fit(asr_model)

    if hasattr(cfg.model,
               'test_ds') and cfg.model.test_ds.manifest_filepath is not None:
        gpu = 1 if cfg.trainer.gpus != 0 else 0
        test_trainer = pl.Trainer(
            gpus=gpu,
            precision=trainer.precision,
            amp_level=trainer.accelerator_connector.amp_level,
            amp_backend=cfg.trainer.get("amp_backend", "native"),
        )
        if asr_model.prepare_test(test_trainer):
            test_trainer.test(asr_model)

예제 #10

0

파일 보기

파일: speech_to_text_structured_v2.py 프로젝트: blisc/NeMo

def main():
    # NeMo Model config
    cfg = modelPT.NemoConfig(name='Custom QuartzNet')

    # Generate default asr model config
    builder = configs.EncDecCTCModelConfigBuilder(name='quartznet_15x5')

    # set model global values
    builder.set_labels(LABELS)
    builder.set_optim(cfg=optim_cfg, sched_cfg=sched_cfg)

    model_cfg = builder.build()

    # set the model config to the NeMo Model
    cfg.model = model_cfg

    # Update values
    # MODEL UPDATES
    # train ds
    model_cfg.train_ds.manifest_filepath = ""

    # validation ds
    model_cfg.validation_ds.manifest_filepath = ""

    # Trainer config
    cfg.trainer.gpus = 1
    cfg.trainer.max_epochs = 5

    # Exp Manager config
    cfg.exp_manager.name = cfg.name

    # Note usage of asdict
    trainer = pl.Trainer(**asdict(cfg.trainer))
    exp_manager(trainer, asdict(cfg.exp_manager))
    asr_model = EncDecCTCModel(cfg=cfg.model, trainer=trainer)

    trainer.fit(asr_model)

예제 #11

0

파일 보기

파일: test_asr_ctcencdec_model.py 프로젝트: toonday/NeMo

def asr_model():
    preprocessor = {
        'cls':
        'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor',
        'params': dict({})
    }
    encoder = {
        '_target_':
        'nemo.collections.asr.modules.ConvASREncoder',
        'feat_in':
        64,
        'activation':
        'relu',
        'conv_mask':
        True,
        'jasper': [{
            'filters': 1024,
            'repeat': 1,
            'kernel': [1],
            'stride': [1],
            'dilation': [1],
            'dropout': 0.0,
            'residual': False,
            'separable': True,
            'se': True,
            'se_context_size': -1,
        }],
    }

    decoder = {
        '_target_':
        'nemo.collections.asr.modules.ConvASRDecoder',
        'feat_in':
        1024,
        'num_classes':
        28,
        'vocabulary': [
            ' ',
            'a',
            'b',
            'c',
            'd',
            'e',
            'f',
            'g',
            'h',
            'i',
            'j',
            'k',
            'l',
            'm',
            'n',
            'o',
            'p',
            'q',
            'r',
            's',
            't',
            'u',
            'v',
            'w',
            'x',
            'y',
            'z',
            "'",
        ],
    }
    modelConfig = DictConfig({
        'preprocessor': DictConfig(preprocessor),
        'encoder': DictConfig(encoder),
        'decoder': DictConfig(decoder)
    })

    model_instance = EncDecCTCModel(cfg=modelConfig)
    return model_instance

예제 #12

0

파일 보기

파일: test_asr_ctcencdec_model.py 프로젝트: vadam5/NeMo

    def test_dataclass_instantiation(self, asr_model):
        model_cfg = configs.EncDecCTCModelConfig()

        # Update mandatory values
        vocabulary = asr_model.decoder.vocabulary
        model_cfg.model.labels = vocabulary

        # Update encoder
        model_cfg.model.encoder.activation = 'relu'
        model_cfg.model.encoder.feat_in = 64
        model_cfg.model.encoder.jasper = [
            nemo_asr.modules.conv_asr.JasperEncoderConfig(
                filters=1024,
                repeat=1,
                kernel=[1],
                stride=[1],
                dilation=[1],
                dropout=0.0,
                residual=False,
                se=True,
                se_context_size=-1,
            )
        ]

        # Update decoder
        model_cfg.model.decoder.feat_in = 1024
        model_cfg.model.decoder.num_classes = 28
        model_cfg.model.decoder.vocabulary = vocabulary

        # Construct the model
        asr_cfg = OmegaConf.create({'model': asr_model.cfg})
        model_cfg_v1 = update_model_config(model_cfg, asr_cfg)
        new_model = EncDecCTCModel(cfg=model_cfg_v1.model)

        assert new_model.num_weights == asr_model.num_weights
        # trainer and exp manager should be there
        # assert 'trainer' in model_cfg_v1
        # assert 'exp_manager' in model_cfg_v1
        # datasets and optim/sched should not be there after ModelPT.update_model_dataclass()
        assert 'train_ds' not in model_cfg_v1.model
        assert 'validation_ds' not in model_cfg_v1.model
        assert 'test_ds' not in model_cfg_v1.model
        assert 'optim' not in model_cfg_v1.model

        # Construct the model, without dropping additional keys
        asr_cfg = OmegaConf.create({'model': asr_model.cfg})
        model_cfg_v2 = update_model_config(model_cfg,
                                           asr_cfg,
                                           drop_missing_subconfigs=False)

        # Assert all components are in config
        # assert 'trainer' in model_cfg_v2
        # assert 'exp_manager' in model_cfg_v2
        assert 'train_ds' in model_cfg_v2.model
        assert 'validation_ds' in model_cfg_v2.model
        assert 'test_ds' in model_cfg_v2.model
        assert 'optim' in model_cfg_v2.model

        # Remove extra components (optim and sched can be kept without issue)
        with open_dict(model_cfg_v2.model):
            model_cfg_v2.model.pop('train_ds')
            model_cfg_v2.model.pop('validation_ds')
            model_cfg_v2.model.pop('test_ds')

        new_model = EncDecCTCModel(cfg=model_cfg_v2.model)

        assert new_model.num_weights == asr_model.num_weights

예제 #13

0

파일 보기

def citrinet_model():
    preprocessor = {'cls': 'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor', 'params': dict({})}
    encoder = {
        'cls': 'nemo.collections.asr.modules.ConvASREncoder',
        'params': {
            'feat_in': 80,
            'activation': 'relu',
            'conv_mask': True,
            'jasper': [
                {
                    'filters': 512,
                    'repeat': 1,
                    'kernel': [5],
                    'stride': [1],
                    'dilation': [1],
                    'dropout': 0.0,
                    'residual': False,
                    'separable': True,
                    'se': True,
                    'se_context_size': -1,
                },
                {
                    'filters': 512,
                    'repeat': 5,
                    'kernel': [11],
                    'stride': [2],
                    'dilation': [1],
                    'dropout': 0.1,
                    'residual': True,
                    'separable': True,
                    'se': True,
                    'se_context_size': -1,
                    'stride_last': True,
                    'residual_mode': 'stride_add',
                },
                {
                    'filters': 512,
                    'repeat': 5,
                    'kernel': [13],
                    'stride': [1],
                    'dilation': [1],
                    'dropout': 0.1,
                    'residual': True,
                    'separable': True,
                    'se': True,
                    'se_context_size': -1,
                },
                {
                    'filters': 640,
                    'repeat': 1,
                    'kernel': [41],
                    'stride': [1],
                    'dilation': [1],
                    'dropout': 0.0,
                    'residual': True,
                    'separable': True,
                    'se': True,
                    'se_context_size': -1,
                },
            ],
        },
    }

    decoder = {
        'cls': 'nemo.collections.asr.modules.ConvASRDecoder',
        'params': {'feat_in': 640, 'num_classes': 1024, 'vocabulary': list(chr(i % 28) for i in range(0, 1024))},
    }

    modelConfig = DictConfig(
        {'preprocessor': DictConfig(preprocessor), 'encoder': DictConfig(encoder), 'decoder': DictConfig(decoder)}
    )
    citri_model = EncDecCTCModel(cfg=modelConfig)
    return citri_model

예제 #14

0

파일 보기

파일: speech_to_text_structured.py 프로젝트: blisc/NeMo

def main():
    # Update values
    # MODEL UPDATES
    cfg.name = "Mini QuartzNet"
    cfg.model.labels = LABELS

    # train ds
    cfg.model.train_ds.manifest_filepath = "<path to train dataset>"
    cfg.model.train_ds.labels = LABELS
    cfg.model.train_ds.sample_rate = cfg.model.sample_rate

    # validation ds
    cfg.model.validation_ds.manifest_filepath = "<path to test dataset>"
    cfg.model.validation_ds.labels = LABELS
    cfg.model.validation_ds.sample_rate = cfg.model.sample_rate

    # del `test_ds` does not work!
    # Refer - https://stackoverflow.com/questions/58119758/how-to-remove-dataclass-attributes
    # Hydra/OmegaConf dont allow custom .asdict() methods either
    # For now, explicitly set parameters
    cfg.model.test_ds.sample_rate = cfg.model.sample_rate
    cfg.model.test_ds.labels = cfg.model.labels

    # preprocessor
    cfg.model.preprocessor.sample_rate = cfg.model.sample_rate

    # spec aug
    cfg.model.spec_augment.rect_masks = 5
    cfg.model.spec_augment.rect_freq = 50
    cfg.model.spec_augment.rect_time = 120

    # encoder
    cfg.model.encoder.feat_in = cfg.model.preprocessor.features
    cfg.model.encoder.activation = 'relu'
    cfg.model.encoder.jasper = qn_15x5

    # decoder
    cfg.model.decoder.feat_in = qn_15x5[-1].filters
    cfg.model.decoder.num_classes = len(LABELS)
    cfg.model.decoder.vocabulary = LABELS

    # optim
    cfg.model.optim.name = 'novograd'
    cfg.model.optim.lr = 0.01

    # `betas` dont exist inside the base config,
    # so they cannot be added as such!
    # Same for `weight_decay`.
    cfg.model.optim.betas = [0.8, 0.5]
    cfg.model.optim.weight_decay = 0.001

    # sched
    # As parameters such as warmup_steps and warmup_ratio
    # dont exist inside the shell config, these values are not added!
    cfg.model.optim.sched.name = "CosineAnnealing"
    cfg.model.optim.sched.warmup_steps = None
    cfg.model.optim.sched.warmup_ratio = 0.01

    # Trainer config
    cfg.trainer.gpus = 1
    cfg.trainer.max_epochs = 5

    # Exp Manager config
    cfg.exp_manager.name = cfg.name

    # Note usage of asdict
    trainer = pl.Trainer(**asdict(cfg.trainer))
    exp_manager(trainer, asdict(cfg.exp_manager))
    asr_model = EncDecCTCModel(cfg=cfg.model, trainer=trainer)

    trainer.fit(asr_model)

예제 #15

0

파일 보기

파일: speech_to_text.py 프로젝트: vinayphadnis/NeMo

def main(cfg):
    trainer = pl.Trainer(**cfg.trainer)
    exp_manager(trainer, cfg.get("exp_manager", None))
    asr_model = EncDecCTCModel(cfg=cfg.model, trainer=trainer)

    trainer.fit(asr_model)