def test_eff_save_restore_from_nemo_file_encrypted(self, asr_model): """" Test makes sure that after encrypted save-restore the model has the same weights. """ with tempfile.NamedTemporaryFile() as fp: filename = fp.name # Set key - use checkpoint encryption. NeMoArchive.set_encryption_key("test_key") # Save model (with random artifact). with tempfile.NamedTemporaryFile() as artifact: asr_model.register_artifact(config_path=None, src=artifact.name) asr_model.save_to(save_path=filename) # Try to restore the encrypted archive (weights) without the encryption key. NeMoArchive.set_encryption_key(None) with pytest.raises(PermissionError): # Restore the model. asr_model2 = EncDecCTCModel.restore_from(restore_path=filename) # Restore the model. NeMoArchive.set_encryption_key("test_key") asr_model3 = EncDecCTCModel.restore_from(restore_path=filename) # Reset encryption so it won't mess up with other save/restore. NeMoArchive.set_encryption_key(None) assert asr_model.num_weights == asr_model3.num_weights
def __init__( self, encoder_decoder: EncDecCTCModel, batch_size: int = 1, dither: float = 0.0, pad_to: int = 0, device: str = "cuda", **kwargs ) -> None: cfg = encoder_decoder._cfg OmegaConf.set_struct(cfg.preprocessor, value=False) cfg.preprocessor.params.dither = dither cfg.preprocessor.params.pad_to = pad_to cfg.preprocessor.params.normalize = spectrogram_normalization() OmegaConf.set_struct(cfg.preprocessor, value=True) encoder_decoder.preprocessor = encoder_decoder.from_config_dict( cfg.preprocessor ) self.params = ASRInferenceParameters.from_omega(cfg, **kwargs) self.overlap_timesteps = self.params.compute_overlap_timesteps() self.buffer = np.zeros(self.params.buffer_size, dtype=np.float32) sample_rate = self.params.sample_rate self.audio_encoder_decoder = ASRAudioEncoderDecoder( encoder_decoder, sample_rate, batch_size, device=device, ) self.prev_char = "" self.reset()
def main(): parser = ArgumentParser() parser.add_argument( "--asr_model", type=str, default="QuartzNet15x5Base-En", required=True, help="Pass: '******'", ) parser.add_argument("--dataset", type=str, required=True, help="path to evaluation data") parser.add_argument("--batch_size", type=int, default=4) parser.add_argument("--wer_tolerance", type=float, default=1.0, help="used by test") parser.add_argument( "--normalize_text", default=True, type=bool, help="Normalize transcripts or not. Set to False for non-English." ) args = parser.parse_args() torch.set_grad_enabled(False) if args.asr_model.endswith('.nemo'): logging.info(f"Using local ASR model from {args.asr_model}") asr_model = EncDecCTCModel.restore_from(restore_path=args.asr_model) else: logging.info(f"Using NGC cloud ASR model {args.asr_model}") asr_model = EncDecCTCModel.from_pretrained(model_name=args.asr_model) asr_model.setup_test_data( test_data_config={ 'sample_rate': 16000, 'manifest_filepath': args.dataset, 'labels': asr_model.decoder.vocabulary, 'batch_size': args.batch_size, 'normalize_transcripts': args.normalize_text, } ) if can_gpu: asr_model = asr_model.cuda() asr_model.eval() labels_map = dict([(i, asr_model.decoder.vocabulary[i]) for i in range(len(asr_model.decoder.vocabulary))]) wer = WER(vocabulary=asr_model.decoder.vocabulary) hypotheses = [] references = [] for test_batch in asr_model.test_dataloader(): if can_gpu: test_batch = [x.cuda() for x in test_batch] with autocast(): log_probs, encoded_len, greedy_predictions = asr_model( input_signal=test_batch[0], input_signal_length=test_batch[1] ) hypotheses += wer.ctc_decoder_predictions_tensor(greedy_predictions) for batch_ind in range(greedy_predictions.shape[0]): reference = ''.join([labels_map[c] for c in test_batch[2][batch_ind].cpu().detach().numpy()]) references.append(reference) del test_batch wer_value = word_error_rate(hypotheses=hypotheses, references=references) if wer_value > args.wer_tolerance: raise ValueError(f"Got WER of {wer_value}. It was higher than {args.wer_tolerance}") logging.info(f'Got WER of {wer_value}. Tolerance was {args.wer_tolerance}')
def test_EncDecCTCModel_export_to_onnx(self): model_config = DictConfig({ 'preprocessor': DictConfig(self.preprocessor), 'encoder': DictConfig(self.encoder_dict), 'decoder': DictConfig(self.decoder_dict), }) model = EncDecCTCModel(cfg=model_config) with tempfile.TemporaryDirectory() as tmpdir: filename = os.path.join(tmpdir, 'qn.onnx') model.export(output=filename) onnx_model = onnx.load(filename) onnx.checker.check_model(onnx_model, full_check=True) # throws when failed assert onnx_model.graph.input[0].name == 'audio_signal' assert onnx_model.graph.output[0].name == 'logprobs'
def __init__(self, torch_device=None): if torch_device is None: if torch.cuda.is_available(): torch_device = torch.device('cuda') else: torch_device = torch.device('cpu') self.file_config = path.join(WORK_DIR, _MODEL_CONFIG) self.file_checkpoints = path.join(WORK_DIR, _MODEL_WEIGHTS) model_config = OmegaConf.load(self.file_config) OmegaConf.set_struct(model_config, True) if isinstance(model_config, DictConfig): self.config = OmegaConf.to_container(model_config, resolve=True) self.config = OmegaConf.create(self.config) OmegaConf.set_struct(self.config, True) # EncDecCTCModel.set_model_restore_state(is_being_restored=True) instance = EncDecCTCModel(cfg=self.config) self.model_instance = instance self.model_instance.to(torch_device) self.model_instance.load_state_dict( torch.load(self.file_checkpoints, torch_device), False)
def main( nemo_file, enemo_file, onnx_file, model_type='asr', ): if model_type == 'asr': logging.info("Preparing ASR model") model = EncDecCTCModel.restore_from(nemo_file) elif model_type == 'speech_label': logging.info("Preparing Speech Label Classification model") model = EncDecClassificationModel.restore_from(nemo_file) elif model_type == 'speaker': logging.info("Preparing Speaker Recognition model") model = EncDecSpeakerLabelModel.restore_from(nemo_file) else: raise NameError( "Available model names are asr, speech_label and speaker") logging.info("Writing onnx file") model.export(onnx_file, onnx_opset_version=12) logging.info("succesfully ported onnx file") with tarfile.open(nemo_file, 'r') as archive: archive.extract('./model_config.yaml') with tarfile.open(enemo_file, 'w') as enemo_archive: enemo_archive.add('./model_config.yaml') enemo_archive.addfile(tarfile.TarInfo("model_graph.onnx"), open(onnx_file))
def oth_quartznet15x5_ru34(pretrained=False, num_classes=34, **kwargs): from nemo.collections.asr.models import EncDecCTCModel quartznet_nemo_path = path_pref + "QuartzNet15x5_golos_1a63a2d8.nemo" raw_net = EncDecCTCModel.restore_from(quartznet_nemo_path) net = QuartzNet(raw_net=raw_net, num_classes=num_classes) net = net.cpu() return net#, raw_net
def test_constructor(self, asr_model): asr_model.train() # TODO: make proper config and assert correct number of weights # Check to/from config_dict: confdict = asr_model.to_config_dict() instance2 = EncDecCTCModel.from_config_dict(confdict) assert isinstance(instance2, EncDecCTCModel)
def main(cfg): logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}') trainer = pl.Trainer(**cfg.trainer) exp_manager(trainer, cfg.get("exp_manager", None)) asr_model = EncDecCTCModel(cfg=cfg.model, trainer=trainer) # Initialize the weights of the model from another model, if provided via config asr_model.maybe_init_from_pretrained_checkpoint(cfg) trainer.fit(asr_model) if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.manifest_filepath is not None: if asr_model.prepare_test(trainer): trainer.test(asr_model)
def test_save_model_level_pt_ckpt(self, asr_model): with tempfile.TemporaryDirectory() as ckpt_dir: nemo_file = os.path.join(ckpt_dir, 'asr.nemo') asr_model.save_to(nemo_file) # Save model level PT checkpoint asr_model.extract_state_dict_from(nemo_file, ckpt_dir) ckpt_path = os.path.join(ckpt_dir, 'model_weights.ckpt') assert os.path.exists(ckpt_path) # Restore the model. asr_model2 = EncDecCTCModel.restore_from(restore_path=nemo_file) assert len(asr_model.decoder.vocabulary) == len(asr_model2.decoder.vocabulary) assert asr_model.num_weights == asr_model2.num_weights # Change weights values asr_model2.encoder.encoder[0].mconv[0].conv.weight.data += 1.0 w1 = asr_model.encoder.encoder[0].mconv[0].conv.weight.data.detach().cpu().numpy() w2 = asr_model2.encoder.encoder[0].mconv[0].conv.weight.data.detach().cpu().numpy() assert not np.array_equal(w1, w2) # Restore from checkpoint asr_model2.load_state_dict(torch.load(ckpt_path)) w1 = asr_model.encoder.encoder[0].mconv[0].conv.weight.data.detach().cpu().numpy() w2 = asr_model2.encoder.encoder[0].mconv[0].conv.weight.data.detach().cpu().numpy() assert np.array_equal(w1, w2)
def oth_quartznet15x5_ru(pretrained=False, num_classes=35, **kwargs): from nemo.collections.asr.models import EncDecCTCModel quartznet_nemo_path = path_pref + "stt_ru_quartznet15x5_88a3e5aa.nemo" raw_net = EncDecCTCModel.restore_from(quartznet_nemo_path) net = QuartzNet(raw_net=raw_net, num_classes=num_classes) net = net.cpu() return net
def oth_jasperdr10x5_en(pretrained=False, num_classes=29, **kwargs): from nemo.collections.asr.models import EncDecCTCModel quartznet_nemo_path = path_pref + "Jasper10x5Dr-En_2b94c9d1.nemo" raw_net = EncDecCTCModel.restore_from(quartznet_nemo_path) net = QuartzNet(raw_net=raw_net, num_classes=num_classes) net = net.cpu() return net
def main( nemo_file, enemo_file, onnx_file, model_type="asr", ): if model_type == "asr": logging.info("Preparing ASR model") model = EncDecCTCModel.restore_from(nemo_file) elif model_type == "speech_label": logging.info("Preparing Speech Label Classification model") model = EncDecClassificationModel.restore_from(nemo_file) elif model_type == "speaker": logging.info("Preparing Speaker Recognition model") model = EncDecSpeakerLabelModel.restore_from(nemo_file) else: raise NameError( "Available model names are asr, speech_label and speaker") logging.info("Writing onnx file") model.export(onnx_file, onnx_opset_version=12) logging.info("succesfully ported onnx file") with tarfile.open(nemo_file, "r") as archive: archive.extract("./model_config.yaml") with tarfile.open(enemo_file, "w") as enemo_archive: enemo_archive.add("./model_config.yaml") copyfile(onnx_file, "model_graph.onnx") enemo_archive.add("model_graph.onnx") os.remove("model_graph.onnx") # cleanup extra file
def oth_quartznet15x5_en_nr(pretrained=False, num_classes=29, **kwargs): from nemo.collections.asr.models import EncDecCTCModel quartznet_nemo_path = path_pref + "QuartzNet15x5NR-En_b05e34f3.nemo" raw_net = EncDecCTCModel.restore_from(quartznet_nemo_path) net = QuartzNet(raw_net=raw_net, num_classes=num_classes) net = net.cpu() return net
def oth_jasperdr10x5_en_nr(pretrained=False, num_classes=29, **kwargs): from nemo.collections.asr.models import EncDecCTCModel quartznet_nemo_path = path_pref + "stt_en_jasper10x5dr_0d5ebc6c.nemo" raw_net = EncDecCTCModel.restore_from(quartznet_nemo_path) net = QuartzNet(raw_net=raw_net, num_classes=num_classes) net = net.cpu() return net#, raw_net
def test_EncDecCTCModel(self): # TODO: Switch to using named configs because here we don't really care about weights qn = EncDecCTCModel.from_pretrained(model_name="QuartzNet15x5Base-En") self.__test_restore_elsewhere( model=qn, attr_for_eq_check=set(["decoder._feat_in", "decoder._num_classes"]))
def main(cfg): if cfg.n_gpus > 0: cfg.model.train_ds.batch_size //= cfg.n_gpus logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg, resolve=True)}') pl.utilities.seed.seed_everything(cfg.seed) trainer = pl.Trainer(**cfg.trainer) exp_manager(trainer, cfg.get("exp_manager", None)) if "tokenizer" in cfg.model: asr_model = EncDecCTCModelBPE(cfg=cfg.model, trainer=trainer) else: asr_model = EncDecCTCModel(cfg=cfg.model, trainer=trainer) # Initialize the weights of the model from another model, if provided via config asr_model.maybe_init_from_pretrained_checkpoint(cfg) trainer.fit(asr_model) if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.manifest_filepath is not None: gpu = 1 if cfg.trainer.gpus != 0 else 0 test_trainer = pl.Trainer( gpus=gpu, precision=trainer.precision, amp_level=trainer.accelerator_connector.amp_level, amp_backend=cfg.trainer.get("amp_backend", "native"), ) if asr_model.prepare_test(test_trainer): test_trainer.test(asr_model)
def test_save_restore_from_nemo_file(self, asr_model): """" Test makes sure that the second instance created from the same configuration AND checkpoint has the same weights. """ with tempfile.NamedTemporaryFile() as fp: filename = fp.name # Save model (with random artifact). with tempfile.NamedTemporaryFile() as artifact: asr_model.register_artifact(config_path=None, src=artifact.name) asr_model.save_to(save_path=filename) # Restore the model. asr_model2 = EncDecCTCModel.restore_from(restore_path=filename) assert len(asr_model.decoder.vocabulary) == len( asr_model2.decoder.vocabulary) assert asr_model.num_weights == asr_model2.num_weights w1 = asr_model.encoder.encoder[0].mconv[0].conv.weight.data.detach( ).cpu().numpy() w2 = asr_model2.encoder.encoder[0].mconv[ 0].conv.weight.data.detach().cpu().numpy() assert np.array_equal(w1, w2)
def set_asr_model(self, asr_model): """ Setup the parameters for the given ASR model Currently, the following models are supported: stt_en_conformer_ctc_large stt_en_conformer_ctc_medium stt_en_conformer_ctc_small QuartzNet15x5Base-En """ if 'QuartzNet' in asr_model: self.run_ASR = self.run_ASR_QuartzNet_CTC asr_model = EncDecCTCModel.from_pretrained(model_name=asr_model, strict=False) self.params['offset'] = -0.18 self.model_stride_in_secs = 0.02 self.asr_delay_sec = -1 * self.params['offset'] elif 'conformer_ctc' in asr_model: self.run_ASR = self.run_ASR_BPE_CTC asr_model = EncDecCTCModelBPE.from_pretrained(model_name=asr_model, strict=False) self.model_stride_in_secs = 0.04 self.asr_delay_sec = 0.0 self.params['offset'] = 0 self.chunk_len_in_sec = 1.6 self.total_buffer_in_secs = 4 elif 'citrinet' in asr_model: self.run_ASR = self.run_ASR_BPE_CTC asr_model = EncDecCTCModelBPE.from_pretrained(model_name=asr_model, strict=False) self.model_stride_in_secs = 0.08 self.asr_delay_sec = 0.0 self.params['offset'] = 0 self.chunk_len_in_sec = 1.6 self.total_buffer_in_secs = 4 elif 'conformer_transducer' in asr_model or 'contextnet' in asr_model: self.run_ASR = self.run_ASR_BPE_RNNT asr_model = EncDecRNNTBPEModel.from_pretrained( model_name=asr_model, strict=False) self.model_stride_in_secs = 0.04 self.asr_delay_sec = 0.0 self.params['offset'] = 0 self.chunk_len_in_sec = 1.6 self.total_buffer_in_secs = 4 else: raise ValueError(f"ASR model name not found: {asr_model}") self.params['time_stride'] = self.model_stride_in_secs self.asr_batch_size = 16 asr_model.eval() self.audio_file_list = [ value['audio_filepath'] for _, value in self.AUDIO_RTTM_MAP.items() ] return asr_model
def generate_ref_hyps(asr_model: EncDecCTCModel, search: str, arpa: str): if can_gpu: asr_model = asr_model.cuda() print("USING GPU!") asr_model.eval() vocabulary = asr_model.decoder.vocabulary labels_map = dict([(i, vocabulary[i]) for i in range(len(vocabulary))]) wer = WER(vocabulary=vocabulary) if search == "kenlm" or search == "beamsearch": arpa_file = prepare_arpa_file(arpa) lm_path = arpa_file if search == "kenlm" else None beamsearcher = nemo_asr.modules.BeamSearchDecoderWithLM( vocab=list(vocabulary), beam_width=16, alpha=2, beta=1.5, lm_path=lm_path, num_cpus=max(os.cpu_count(), 1), input_tensor=True, ) for batch in asr_model.test_dataloader(): # TODO(tilo): test_loader should return dict or some typed object not tuple of tensors!! if can_gpu: batch = [x.cuda() for x in batch] input_signal, inpsig_len, transcript, transc_len = batch with autocast(): log_probs, encoded_len, greedy_predictions = asr_model( input_signal=input_signal, input_signal_length=inpsig_len) if search == "greedy": decoded = wer.ctc_decoder_predictions_tensor(greedy_predictions) else: decoded = beamsearch_forward(beamsearcher, log_probs=log_probs, log_probs_length=encoded_len) for i, hyp in enumerate(decoded): reference = "".join([ labels_map[c] for c in transcript[i].cpu().detach().numpy()[:transc_len[i]] ]) yield reference, hyp
def __init__( self, model: EncDecCTCModel, sample_rate: int, batch_size: int = 1, device: str = "cuda", ) -> None: super(ASRAudioEncoderDecoder, self).__init__() self.online_audio = ASROnlineAudioData(sample_rate) self.data_loader = DataLoader( dataset=self.online_audio, batch_size=batch_size, collate_fn=self.online_audio.collate_fn, ) model.eval() self.device = torch.device(device) self.model = model.to(self.device)
def test_EncDecCTCModel_adapted_export_to_onnx(self): model_config = DictConfig({ 'preprocessor': DictConfig(self.preprocessor), 'encoder': DictConfig(self.encoder_dict), 'decoder': DictConfig(self.decoder_dict), }) # support adapter in encoder model_config.encoder.cls = model_config.encoder.cls + 'Adapter' # ConvASREncoderAdapter # load model model = EncDecCTCModel(cfg=model_config) # add adapter adapter_cfg = OmegaConf.structured( LinearAdapterConfig( in_features=model_config.encoder.params.jasper[0].filters, dim=32)) model.add_adapter('temp', cfg=adapter_cfg) model = model.cuda() with tempfile.TemporaryDirectory() as tmpdir: filename = os.path.join(tmpdir, 'qn.onnx') model.export( output=filename, check_trace=True, ) onnx_model = onnx.load(filename) onnx.checker.check_model(onnx_model, full_check=True) # throws when failed assert onnx_model.graph.input[0].name == 'audio_signal' assert onnx_model.graph.output[0].name == 'logprobs'
def main(cfg): logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}') trainer = pl.Trainer(**cfg.trainer) exp_manager(trainer, cfg.get("exp_manager", None)) asr_model = EncDecCTCModel(cfg=cfg.model, trainer=trainer) trainer.fit(asr_model) if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.manifest_filepath is not None: gpu = 1 if cfg.trainer.gpus != 0 else 0 trainer = pl.Trainer( gpus=gpu, precision=cfg.trainer.precision, amp_level=cfg.trainer.amp_level, amp_backend=cfg.trainer.amp_backend, ) if asr_model.prepare_test(trainer): trainer.test(asr_model)
def batch_inference(args: argparse.Namespace): torch.set_grad_enabled(False) if args.asr_model.endswith(".nemo"): print(f"Using local ASR model from {args.asr_model}") asr_model = EncDecCTCModel.restore_from(restore_path=args.asr_model) else: print(f"Using NGC cloud ASR model {args.asr_model}") asr_model = EncDecCTCModel.from_pretrained(model_name=args.asr_model) manifest = prepare_manifest(args.corpora_dir, args.limit) asr_model.setup_test_data( test_data_config={ "sample_rate": 16000, "manifest_filepath": manifest, "labels": asr_model.decoder.vocabulary, "batch_size": args.batch_size, "normalize_transcripts": args.normalize_text, }) refs_hyps = list(tqdm(generate_ref_hyps(asr_model, args.search, args.arpa))) references, hypotheses = [list(k) for k in zip(*refs_hyps)] os.makedirs(args.results_dir, exist_ok=True) data_io.write_lines(f"{args.results_dir}/refs.txt.gz", references) data_io.write_lines(f"{args.results_dir}/hyps.txt.gz", hypotheses) wer_value = word_error_rate(hypotheses=hypotheses, references=references) sys.stdout.flush() stats = { "wer": wer_value, "args": args.__dict__, } data_io.write_json(f"{args.results_dir}/stats.txt", stats) print(f"Got WER of {wer_value}") return stats
def main(cfg): logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}') trainer = pl.Trainer(**cfg.trainer) exp_manager(trainer, cfg.get("exp_manager", None)) asr_model = EncDecCTCModel(cfg=cfg.model, trainer=trainer) # Initialize the weights of the model from another model, if provided via config asr_model.maybe_init_from_pretrained_checkpoint(cfg) trainer.fit(asr_model) if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.manifest_filepath is not None: gpu = 1 if cfg.trainer.gpus != 0 else 0 test_trainer = pl.Trainer( gpus=gpu, precision=trainer.precision, amp_level=trainer.accelerator_connector.amp_level, amp_backend=cfg.trainer.get("amp_backend", "native"), ) if asr_model.prepare_test(test_trainer): test_trainer.test(asr_model)
def conformer_model(): preprocessor = { 'cls': 'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor', 'params': dict({}) } encoder = { 'cls': 'nemo.collections.asr.modules.ConformerEncoder', 'params': { 'feat_in': 80, 'feat_out': -1, 'n_layers': 2, 'd_model': 256, 'subsampling': 'striding', 'subsampling_factor': 4, 'subsampling_conv_channels': 512, 'ff_expansion_factor': 4, 'self_attention_model': 'rel_pos', 'n_heads': 8, 'att_context_size': [-1, -1], 'xscaling': True, 'untie_biases': True, 'pos_emb_max_len': 500, 'conv_kernel_size': 31, 'dropout': 0.1, 'dropout_emb': 0.0, 'dropout_att': 0.1, }, } decoder = { 'cls': 'nemo.collections.asr.modules.ConvASRDecoder', 'params': { 'feat_in': 256, 'num_classes': 1024, 'vocabulary': list(chr(i % 28) for i in range(0, 1024)) }, } modelConfig = DictConfig({ 'preprocessor': DictConfig(preprocessor), 'encoder': DictConfig(encoder), 'decoder': DictConfig(decoder) }) conformer_model = EncDecCTCModel(cfg=modelConfig) return conformer_model
def infer(model, audiofiles, batch_size=4): asr_model = EncDecCTCModel.restore_from(model) mode = asr_model.training device = next(asr_model.parameters()).device asr_model.eval() vocab = asr_model._cfg.train_ds.labels with tempfile.TemporaryDirectory() as tmpdir: with open(os.path.join(tmpdir, 'manifest.json'), 'w') as fp: for file in audiofiles: entry = { 'audio_filepath': file, 'duration': 100000, 'text': 'nothing' } fp.write(json.dumps(entry) + '\n') config = { 'paths2audio_files': audiofiles, 'batch_size': batch_size, 'temp_dir': tmpdir } characters = [] log_probs = [] temporary_datalayer = asr_model._setup_transcribe_dataloader(config) for test_batch in temporary_datalayer: log_prob, encoded_len, greedy_predictions = asr_model.forward( input_signal=test_batch[0].to(device), input_signal_length=test_batch[1].to(device)) character = asr_model._wer.ctc_decoder_predictions_tensor( greedy_predictions) characters += character encoded_len = encoded_len.long().cpu() log_prob = log_prob.float().cpu() for i in range(0, encoded_len.shape[0]): el = encoded_len[i].detach().numpy().tolist() lp = log_prob[i].detach().numpy().tolist() log_probs += [lp[0:el]] del test_batch asr_model.train(mode) return characters, log_probs, vocab
def test_save_restore_from_nemo_file_with_override(self, asr_model, tmpdir): """" Test makes sure that the second instance created from the same configuration AND checkpoint has the same weights. Args: tmpdir: fixture providing a temporary directory unique to the test invocation. """ # Name of the archive in tmp folder. filename = os.path.join(tmpdir, "eff.nemo") # Get path where the command is executed - the artifacts will be "retrieved" there. # (original .nemo behavior) cwd = os.getcwd() with tempfile.NamedTemporaryFile(mode='a+') as conf_fp: # Create a "random artifact". with tempfile.NamedTemporaryFile(mode="w", delete=False) as artifact: artifact.write("magic content 42") # Remember the filename of the artifact. _, artifact_filename = os.path.split(artifact.name) # Add artifact to model. asr_model.register_artifact(config_path=None, src=artifact.name) # Save model (with "random artifact"). asr_model.save_to(save_path=filename) # Modify config slightly cfg = asr_model.cfg cfg.encoder.params.activation = 'swish' yaml_cfg = OmegaConf.to_yaml(cfg) conf_fp.write(yaml_cfg) conf_fp.seek(0) # Restore the model. asr_model2 = EncDecCTCModel.restore_from(restore_path=filename, override_config_path=conf_fp.name) assert len(asr_model.decoder.vocabulary) == len(asr_model2.decoder.vocabulary) assert asr_model.num_weights == asr_model2.num_weights w1 = asr_model.encoder.encoder[0].mconv[0].conv.weight.data.detach().cpu().numpy() w2 = asr_model2.encoder.encoder[0].mconv[0].conv.weight.data.detach().cpu().numpy() assert np.array_equal(w1, w2) assert asr_model2.cfg.encoder.params.activation == 'swish'
def test_to_from_config_file(self, asr_model): """" Test makes sure that the second instance created with the same configuration (BUT NOT checkpoint) has different weights. """ with tempfile.NamedTemporaryFile() as fp: yaml_filename = fp.name asr_model.to_config_file(path2yaml_file=yaml_filename) next_instance = EncDecCTCModel.from_config_file(path2yaml_file=yaml_filename) assert isinstance(next_instance, EncDecCTCModel) assert len(next_instance.decoder.vocabulary) == 28 assert asr_model.num_weights == next_instance.num_weights w1 = asr_model.encoder.encoder[0].mconv[0].conv.weight.data.detach().cpu().numpy() w2 = next_instance.encoder.encoder[0].mconv[0].conv.weight.data.detach().cpu().numpy() assert not np.array_equal(w1, w2)
def main( nemo_file, onnx_file, model_type='asr', ): if model_type == 'asr': logging.info("Preparing ASR model") model = EncDecCTCModel.restore_from(nemo_file) elif model_type == 'speech_label': logging.info("Preparing Speech Label Classification model") model = EncDecClassificationModel.restore_from(nemo_file) elif model_type == 'speaker': logging.info("Preparing Speaker Recognition model") model = EncDecSpeakerLabelModel.restore_from(nemo_file) else: raise NameError("Available model names are asr, speech_label and speaker") logging.info("Writing onnx file") model.export(onnx_file, onnx_opset_version=12) logging.info("succesfully ported onnx file")