def main(cfg): if cfg.n_gpus > 0: cfg.model.train_ds.batch_size //= cfg.n_gpus logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg, resolve=True)}') pl.utilities.seed.seed_everything(cfg.seed) trainer = pl.Trainer(**cfg.trainer) exp_manager(trainer, cfg.get("exp_manager", None)) if "tokenizer" in cfg.model: asr_model = EncDecCTCModelBPE(cfg=cfg.model, trainer=trainer) else: asr_model = EncDecCTCModel(cfg=cfg.model, trainer=trainer) # Initialize the weights of the model from another model, if provided via config asr_model.maybe_init_from_pretrained_checkpoint(cfg) trainer.fit(asr_model) if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.manifest_filepath is not None: gpu = 1 if cfg.trainer.gpus != 0 else 0 test_trainer = pl.Trainer( gpus=gpu, precision=trainer.precision, amp_level=trainer.accelerator_connector.amp_level, amp_backend=cfg.trainer.get("amp_backend", "native"), ) if asr_model.prepare_test(test_trainer): test_trainer.test(asr_model)
def __init__(self, torch_device=None): if torch_device is None: if torch.cuda.is_available(): torch_device = torch.device('cuda') else: torch_device = torch.device('cpu') self.file_config = path.join(WORK_DIR, _MODEL_CONFIG) self.file_checkpoints = path.join(WORK_DIR, _MODEL_WEIGHTS) model_config = OmegaConf.load(self.file_config) OmegaConf.set_struct(model_config, True) if isinstance(model_config, DictConfig): self.config = OmegaConf.to_container(model_config, resolve=True) self.config = OmegaConf.create(self.config) OmegaConf.set_struct(self.config, True) # EncDecCTCModel.set_model_restore_state(is_being_restored=True) instance = EncDecCTCModel(cfg=self.config) self.model_instance = instance self.model_instance.to(torch_device) self.model_instance.load_state_dict( torch.load(self.file_checkpoints, torch_device), False)
def test_EncDecCTCModel_adapted_export_to_onnx(self): model_config = DictConfig({ 'preprocessor': DictConfig(self.preprocessor), 'encoder': DictConfig(self.encoder_dict), 'decoder': DictConfig(self.decoder_dict), }) # support adapter in encoder model_config.encoder.cls = model_config.encoder.cls + 'Adapter' # ConvASREncoderAdapter # load model model = EncDecCTCModel(cfg=model_config) # add adapter adapter_cfg = OmegaConf.structured( LinearAdapterConfig( in_features=model_config.encoder.params.jasper[0].filters, dim=32)) model.add_adapter('temp', cfg=adapter_cfg) model = model.cuda() with tempfile.TemporaryDirectory() as tmpdir: filename = os.path.join(tmpdir, 'qn.onnx') model.export( output=filename, check_trace=True, ) onnx_model = onnx.load(filename) onnx.checker.check_model(onnx_model, full_check=True) # throws when failed assert onnx_model.graph.input[0].name == 'audio_signal' assert onnx_model.graph.output[0].name == 'logprobs'
def test_EncDecCTCModel_export_to_onnx(self): model_config = DictConfig({ 'preprocessor': DictConfig(self.preprocessor), 'encoder': DictConfig(self.encoder_dict), 'decoder': DictConfig(self.decoder_dict), }) model = EncDecCTCModel(cfg=model_config) with tempfile.TemporaryDirectory() as tmpdir: filename = os.path.join(tmpdir, 'qn.onnx') model.export(output=filename) onnx_model = onnx.load(filename) onnx.checker.check_model(onnx_model, full_check=True) # throws when failed assert onnx_model.graph.input[0].name == 'audio_signal' assert onnx_model.graph.output[0].name == 'logprobs'
def main(cfg): logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}') trainer = pl.Trainer(**cfg.trainer) exp_manager(trainer, cfg.get("exp_manager", None)) asr_model = EncDecCTCModel(cfg=cfg.model, trainer=trainer) # Initialize the weights of the model from another model, if provided via config asr_model.maybe_init_from_pretrained_checkpoint(cfg) trainer.fit(asr_model) if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.manifest_filepath is not None: if asr_model.prepare_test(trainer): trainer.test(asr_model)
def conformer_model(): preprocessor = { 'cls': 'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor', 'params': dict({}) } encoder = { 'cls': 'nemo.collections.asr.modules.ConformerEncoder', 'params': { 'feat_in': 80, 'feat_out': -1, 'n_layers': 2, 'd_model': 256, 'subsampling': 'striding', 'subsampling_factor': 4, 'subsampling_conv_channels': 512, 'ff_expansion_factor': 4, 'self_attention_model': 'rel_pos', 'n_heads': 8, 'att_context_size': [-1, -1], 'xscaling': True, 'untie_biases': True, 'pos_emb_max_len': 500, 'conv_kernel_size': 31, 'dropout': 0.1, 'dropout_emb': 0.0, 'dropout_att': 0.1, }, } decoder = { 'cls': 'nemo.collections.asr.modules.ConvASRDecoder', 'params': { 'feat_in': 256, 'num_classes': 1024, 'vocabulary': list(chr(i % 28) for i in range(0, 1024)) }, } modelConfig = DictConfig({ 'preprocessor': DictConfig(preprocessor), 'encoder': DictConfig(encoder), 'decoder': DictConfig(decoder) }) conformer_model = EncDecCTCModel(cfg=modelConfig) return conformer_model
def main(cfg): logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}') trainer = pl.Trainer(**cfg.trainer) exp_manager(trainer, cfg.get("exp_manager", None)) asr_model = EncDecCTCModel(cfg=cfg.model, trainer=trainer) trainer.fit(asr_model) if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.manifest_filepath is not None: gpu = 1 if cfg.trainer.gpus != 0 else 0 trainer = pl.Trainer( gpus=gpu, precision=cfg.trainer.precision, amp_level=cfg.trainer.amp_level, amp_backend=cfg.trainer.amp_backend, ) if asr_model.prepare_test(trainer): trainer.test(asr_model)
def main(cfg): # Generate default asr model config asr_model_config = configs.EncDecCTCModelConfig() # Merge hydra updates with model config # `drop_missing_subconfig=True` is necessary here. Without it, while the data class will instantiate and be added # to the config, it contains test_ds.sample_rate = MISSING and test_ds.labels = MISSING. # This will raise a OmegaConf MissingMandatoryValue error when processing the dataloaders inside # model_utils.resolve_test_dataloaders(model=self) (used for multi data loader support). # In general, any operation that tries to use a DictConfig with MISSING in it will fail, # other than explicit update operations to change MISSING to some actual value. asr_model_config = update_model_config(asr_model_config, cfg, drop_missing_subconfigs=True) # From here on out, its a general OmegaConf DictConfig, directly usable by our code. trainer = pl.Trainer(**asr_model_config.trainer) exp_manager(trainer, asr_model_config.get("exp_manager", None)) asr_model = EncDecCTCModel(cfg=asr_model_config.model, trainer=trainer) trainer.fit(asr_model)
def main(cfg): logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}') trainer = pl.Trainer(**cfg.trainer) exp_manager(trainer, cfg.get("exp_manager", None)) asr_model = EncDecCTCModel(cfg=cfg.model, trainer=trainer) # Initialize the weights of the model from another model, if provided via config asr_model.maybe_init_from_pretrained_checkpoint(cfg) trainer.fit(asr_model) if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.manifest_filepath is not None: gpu = 1 if cfg.trainer.gpus != 0 else 0 test_trainer = pl.Trainer( gpus=gpu, precision=trainer.precision, amp_level=trainer.accelerator_connector.amp_level, amp_backend=cfg.trainer.get("amp_backend", "native"), ) if asr_model.prepare_test(test_trainer): test_trainer.test(asr_model)
def main(): # NeMo Model config cfg = modelPT.NemoConfig(name='Custom QuartzNet') # Generate default asr model config builder = configs.EncDecCTCModelConfigBuilder(name='quartznet_15x5') # set model global values builder.set_labels(LABELS) builder.set_optim(cfg=optim_cfg, sched_cfg=sched_cfg) model_cfg = builder.build() # set the model config to the NeMo Model cfg.model = model_cfg # Update values # MODEL UPDATES # train ds model_cfg.train_ds.manifest_filepath = "" # validation ds model_cfg.validation_ds.manifest_filepath = "" # Trainer config cfg.trainer.gpus = 1 cfg.trainer.max_epochs = 5 # Exp Manager config cfg.exp_manager.name = cfg.name # Note usage of asdict trainer = pl.Trainer(**asdict(cfg.trainer)) exp_manager(trainer, asdict(cfg.exp_manager)) asr_model = EncDecCTCModel(cfg=cfg.model, trainer=trainer) trainer.fit(asr_model)
def asr_model(): preprocessor = { 'cls': 'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor', 'params': dict({}) } encoder = { '_target_': 'nemo.collections.asr.modules.ConvASREncoder', 'feat_in': 64, 'activation': 'relu', 'conv_mask': True, 'jasper': [{ 'filters': 1024, 'repeat': 1, 'kernel': [1], 'stride': [1], 'dilation': [1], 'dropout': 0.0, 'residual': False, 'separable': True, 'se': True, 'se_context_size': -1, }], } decoder = { '_target_': 'nemo.collections.asr.modules.ConvASRDecoder', 'feat_in': 1024, 'num_classes': 28, 'vocabulary': [ ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', "'", ], } modelConfig = DictConfig({ 'preprocessor': DictConfig(preprocessor), 'encoder': DictConfig(encoder), 'decoder': DictConfig(decoder) }) model_instance = EncDecCTCModel(cfg=modelConfig) return model_instance
def test_dataclass_instantiation(self, asr_model): model_cfg = configs.EncDecCTCModelConfig() # Update mandatory values vocabulary = asr_model.decoder.vocabulary model_cfg.model.labels = vocabulary # Update encoder model_cfg.model.encoder.activation = 'relu' model_cfg.model.encoder.feat_in = 64 model_cfg.model.encoder.jasper = [ nemo_asr.modules.conv_asr.JasperEncoderConfig( filters=1024, repeat=1, kernel=[1], stride=[1], dilation=[1], dropout=0.0, residual=False, se=True, se_context_size=-1, ) ] # Update decoder model_cfg.model.decoder.feat_in = 1024 model_cfg.model.decoder.num_classes = 28 model_cfg.model.decoder.vocabulary = vocabulary # Construct the model asr_cfg = OmegaConf.create({'model': asr_model.cfg}) model_cfg_v1 = update_model_config(model_cfg, asr_cfg) new_model = EncDecCTCModel(cfg=model_cfg_v1.model) assert new_model.num_weights == asr_model.num_weights # trainer and exp manager should be there # assert 'trainer' in model_cfg_v1 # assert 'exp_manager' in model_cfg_v1 # datasets and optim/sched should not be there after ModelPT.update_model_dataclass() assert 'train_ds' not in model_cfg_v1.model assert 'validation_ds' not in model_cfg_v1.model assert 'test_ds' not in model_cfg_v1.model assert 'optim' not in model_cfg_v1.model # Construct the model, without dropping additional keys asr_cfg = OmegaConf.create({'model': asr_model.cfg}) model_cfg_v2 = update_model_config(model_cfg, asr_cfg, drop_missing_subconfigs=False) # Assert all components are in config # assert 'trainer' in model_cfg_v2 # assert 'exp_manager' in model_cfg_v2 assert 'train_ds' in model_cfg_v2.model assert 'validation_ds' in model_cfg_v2.model assert 'test_ds' in model_cfg_v2.model assert 'optim' in model_cfg_v2.model # Remove extra components (optim and sched can be kept without issue) with open_dict(model_cfg_v2.model): model_cfg_v2.model.pop('train_ds') model_cfg_v2.model.pop('validation_ds') model_cfg_v2.model.pop('test_ds') new_model = EncDecCTCModel(cfg=model_cfg_v2.model) assert new_model.num_weights == asr_model.num_weights
def citrinet_model(): preprocessor = {'cls': 'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor', 'params': dict({})} encoder = { 'cls': 'nemo.collections.asr.modules.ConvASREncoder', 'params': { 'feat_in': 80, 'activation': 'relu', 'conv_mask': True, 'jasper': [ { 'filters': 512, 'repeat': 1, 'kernel': [5], 'stride': [1], 'dilation': [1], 'dropout': 0.0, 'residual': False, 'separable': True, 'se': True, 'se_context_size': -1, }, { 'filters': 512, 'repeat': 5, 'kernel': [11], 'stride': [2], 'dilation': [1], 'dropout': 0.1, 'residual': True, 'separable': True, 'se': True, 'se_context_size': -1, 'stride_last': True, 'residual_mode': 'stride_add', }, { 'filters': 512, 'repeat': 5, 'kernel': [13], 'stride': [1], 'dilation': [1], 'dropout': 0.1, 'residual': True, 'separable': True, 'se': True, 'se_context_size': -1, }, { 'filters': 640, 'repeat': 1, 'kernel': [41], 'stride': [1], 'dilation': [1], 'dropout': 0.0, 'residual': True, 'separable': True, 'se': True, 'se_context_size': -1, }, ], }, } decoder = { 'cls': 'nemo.collections.asr.modules.ConvASRDecoder', 'params': {'feat_in': 640, 'num_classes': 1024, 'vocabulary': list(chr(i % 28) for i in range(0, 1024))}, } modelConfig = DictConfig( {'preprocessor': DictConfig(preprocessor), 'encoder': DictConfig(encoder), 'decoder': DictConfig(decoder)} ) citri_model = EncDecCTCModel(cfg=modelConfig) return citri_model
def main(): # Update values # MODEL UPDATES cfg.name = "Mini QuartzNet" cfg.model.labels = LABELS # train ds cfg.model.train_ds.manifest_filepath = "<path to train dataset>" cfg.model.train_ds.labels = LABELS cfg.model.train_ds.sample_rate = cfg.model.sample_rate # validation ds cfg.model.validation_ds.manifest_filepath = "<path to test dataset>" cfg.model.validation_ds.labels = LABELS cfg.model.validation_ds.sample_rate = cfg.model.sample_rate # del `test_ds` does not work! # Refer - https://stackoverflow.com/questions/58119758/how-to-remove-dataclass-attributes # Hydra/OmegaConf dont allow custom .asdict() methods either # For now, explicitly set parameters cfg.model.test_ds.sample_rate = cfg.model.sample_rate cfg.model.test_ds.labels = cfg.model.labels # preprocessor cfg.model.preprocessor.sample_rate = cfg.model.sample_rate # spec aug cfg.model.spec_augment.rect_masks = 5 cfg.model.spec_augment.rect_freq = 50 cfg.model.spec_augment.rect_time = 120 # encoder cfg.model.encoder.feat_in = cfg.model.preprocessor.features cfg.model.encoder.activation = 'relu' cfg.model.encoder.jasper = qn_15x5 # decoder cfg.model.decoder.feat_in = qn_15x5[-1].filters cfg.model.decoder.num_classes = len(LABELS) cfg.model.decoder.vocabulary = LABELS # optim cfg.model.optim.name = 'novograd' cfg.model.optim.lr = 0.01 # `betas` dont exist inside the base config, # so they cannot be added as such! # Same for `weight_decay`. cfg.model.optim.betas = [0.8, 0.5] cfg.model.optim.weight_decay = 0.001 # sched # As parameters such as warmup_steps and warmup_ratio # dont exist inside the shell config, these values are not added! cfg.model.optim.sched.name = "CosineAnnealing" cfg.model.optim.sched.warmup_steps = None cfg.model.optim.sched.warmup_ratio = 0.01 # Trainer config cfg.trainer.gpus = 1 cfg.trainer.max_epochs = 5 # Exp Manager config cfg.exp_manager.name = cfg.name # Note usage of asdict trainer = pl.Trainer(**asdict(cfg.trainer)) exp_manager(trainer, asdict(cfg.exp_manager)) asr_model = EncDecCTCModel(cfg=cfg.model, trainer=trainer) trainer.fit(asr_model)
def main(cfg): trainer = pl.Trainer(**cfg.trainer) exp_manager(trainer, cfg.get("exp_manager", None)) asr_model = EncDecCTCModel(cfg=cfg.model, trainer=trainer) trainer.fit(asr_model)