def __init__( self, freq_masks=0, time_masks=0, freq_width=10, time_width=10, rect_masks=0, rect_time=5, rect_freq=20, rng=None, mask_value=0.0, use_numba_spec_augment: bool = True, ): super().__init__() if rect_masks > 0: self.spec_cutout = SpecCutout( rect_masks=rect_masks, rect_time=rect_time, rect_freq=rect_freq, rng=rng, ) # self.spec_cutout.to(self._device) else: self.spec_cutout = lambda input_spec: input_spec if freq_masks + time_masks > 0: self.spec_augment = SpecAugment( freq_masks=freq_masks, time_masks=time_masks, freq_width=freq_width, time_width=time_width, rng=rng, mask_value=mask_value, ) else: self.spec_augment = lambda input_spec: input_spec # Check if numba is supported, and use a Numba kernel if it is if use_numba_spec_augment and numba_utils.numba_cuda_is_supported( __NUMBA_MINIMUM_VERSION__): self.spec_augment_numba = SpecAugmentNumba( freq_masks=freq_masks, time_masks=time_masks, freq_width=freq_width, time_width=time_width, rng=rng, mask_value=mask_value, ) else: self.spec_augment_numba = None
import pytest import torch.cuda from omegaconf import DictConfig, ListConfig from nemo.collections.asr.models import ( EncDecClassificationModel, EncDecCTCModel, EncDecRNNTModel, EncDecSpeakerLabelModel, ) from nemo.collections.asr.modules import ConvASRDecoder, ConvASREncoder from nemo.collections.asr.parts.utils import asr_module_utils from nemo.core.utils import numba_utils from nemo.core.utils.numba_utils import __NUMBA_MINIMUM_VERSION__ NUMBA_RNNT_LOSS_AVAILABLE = numba_utils.numba_cuda_is_supported(__NUMBA_MINIMUM_VERSION__) class TestExportable: @pytest.mark.run_only_on('GPU') @pytest.mark.unit def test_EncDecCTCModel_export_to_onnx(self): model_config = DictConfig( { 'preprocessor': DictConfig(self.preprocessor), 'encoder': DictConfig(self.encoder_dict), 'decoder': DictConfig(self.decoder_dict), } ) model = EncDecCTCModel(cfg=model_config).cuda() with tempfile.TemporaryDirectory() as tmpdir:
import shutil import tempfile import pytest import torch from omegaconf import DictConfig from nemo.collections.asr.models.rnnt_bpe_models import EncDecRNNTBPEModel from nemo.collections.asr.parts.submodules import rnnt_beam_decoding as beam_decode from nemo.collections.asr.parts.submodules import rnnt_greedy_decoding as greedy_decode from nemo.collections.common import tokenizers from nemo.core.utils import numba_utils from nemo.core.utils.numba_utils import __NUMBA_MINIMUM_VERSION__ NUMBA_RNNT_LOSS_AVAILABLE = numba_utils.numba_cpu_is_supported( __NUMBA_MINIMUM_VERSION__) or numba_utils.numba_cuda_is_supported( __NUMBA_MINIMUM_VERSION__) @pytest.fixture() def asr_model(test_data_dir): preprocessor = { 'cls': 'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor', 'params': dict({}) } model_defaults = {'enc_hidden': 1024, 'pred_hidden': 64} encoder = { 'cls': 'nemo.collections.asr.modules.ConvASREncoder', 'params': {