Esempio n. 1
0
    def test_eff_save_restore_from_nemo_file_encrypted(self, asr_model):
        """" Test makes sure that after encrypted save-restore the model has the same weights. """

        with tempfile.NamedTemporaryFile() as fp:
            filename = fp.name

            # Set key - use checkpoint encryption.
            NeMoArchive.set_encryption_key("test_key")

            # Save model (with random artifact).
            with tempfile.NamedTemporaryFile() as artifact:
                asr_model.register_artifact(config_path=None, src=artifact.name)
                asr_model.save_to(save_path=filename)

            # Try to restore the encrypted archive (weights) without the encryption key.
            NeMoArchive.set_encryption_key(None)
            with pytest.raises(PermissionError):
                # Restore the model.
                asr_model2 = EncDecCTCModel.restore_from(restore_path=filename)

            # Restore the model.
            NeMoArchive.set_encryption_key("test_key")
            asr_model3 = EncDecCTCModel.restore_from(restore_path=filename)
            # Reset encryption so it won't mess up with other save/restore.
            NeMoArchive.set_encryption_key(None)

            assert asr_model.num_weights == asr_model3.num_weights
Esempio n. 2
0
 def __init__(
     self,
     encoder_decoder: EncDecCTCModel,
     batch_size: int = 1,
     dither: float = 0.0,
     pad_to: int = 0,
     device: str = "cuda",
     **kwargs
 ) -> None:
     cfg = encoder_decoder._cfg
     OmegaConf.set_struct(cfg.preprocessor, value=False)
     cfg.preprocessor.params.dither = dither
     cfg.preprocessor.params.pad_to = pad_to
     cfg.preprocessor.params.normalize = spectrogram_normalization()
     OmegaConf.set_struct(cfg.preprocessor, value=True)
     encoder_decoder.preprocessor = encoder_decoder.from_config_dict(
         cfg.preprocessor
     )
     self.params = ASRInferenceParameters.from_omega(cfg, **kwargs)
     self.overlap_timesteps = self.params.compute_overlap_timesteps()
     self.buffer = np.zeros(self.params.buffer_size, dtype=np.float32)
     sample_rate = self.params.sample_rate
     self.audio_encoder_decoder = ASRAudioEncoderDecoder(
         encoder_decoder, sample_rate, batch_size, device=device,
     )
     self.prev_char = ""
     self.reset()
Esempio n. 3
0
def main():
    parser = ArgumentParser()
    parser.add_argument(
        "--asr_model", type=str, default="QuartzNet15x5Base-En", required=True, help="Pass: '******'",
    )
    parser.add_argument("--dataset", type=str, required=True, help="path to evaluation data")
    parser.add_argument("--batch_size", type=int, default=4)
    parser.add_argument("--wer_tolerance", type=float, default=1.0, help="used by test")
    parser.add_argument(
        "--normalize_text", default=True, type=bool, help="Normalize transcripts or not. Set to False for non-English."
    )
    args = parser.parse_args()
    torch.set_grad_enabled(False)

    if args.asr_model.endswith('.nemo'):
        logging.info(f"Using local ASR model from {args.asr_model}")
        asr_model = EncDecCTCModel.restore_from(restore_path=args.asr_model)
    else:
        logging.info(f"Using NGC cloud ASR model {args.asr_model}")
        asr_model = EncDecCTCModel.from_pretrained(model_name=args.asr_model)
    asr_model.setup_test_data(
        test_data_config={
            'sample_rate': 16000,
            'manifest_filepath': args.dataset,
            'labels': asr_model.decoder.vocabulary,
            'batch_size': args.batch_size,
            'normalize_transcripts': args.normalize_text,
        }
    )
    if can_gpu:
        asr_model = asr_model.cuda()
    asr_model.eval()
    labels_map = dict([(i, asr_model.decoder.vocabulary[i]) for i in range(len(asr_model.decoder.vocabulary))])
    wer = WER(vocabulary=asr_model.decoder.vocabulary)
    hypotheses = []
    references = []
    for test_batch in asr_model.test_dataloader():
        if can_gpu:
            test_batch = [x.cuda() for x in test_batch]
        with autocast():
            log_probs, encoded_len, greedy_predictions = asr_model(
                input_signal=test_batch[0], input_signal_length=test_batch[1]
            )
        hypotheses += wer.ctc_decoder_predictions_tensor(greedy_predictions)
        for batch_ind in range(greedy_predictions.shape[0]):
            reference = ''.join([labels_map[c] for c in test_batch[2][batch_ind].cpu().detach().numpy()])
            references.append(reference)
        del test_batch
    wer_value = word_error_rate(hypotheses=hypotheses, references=references)
    if wer_value > args.wer_tolerance:
        raise ValueError(f"Got WER of {wer_value}. It was higher than {args.wer_tolerance}")
    logging.info(f'Got WER of {wer_value}. Tolerance was {args.wer_tolerance}')
Esempio n. 4
0
 def test_EncDecCTCModel_export_to_onnx(self):
     model_config = DictConfig({
         'preprocessor': DictConfig(self.preprocessor),
         'encoder': DictConfig(self.encoder_dict),
         'decoder': DictConfig(self.decoder_dict),
     })
     model = EncDecCTCModel(cfg=model_config)
     with tempfile.TemporaryDirectory() as tmpdir:
         filename = os.path.join(tmpdir, 'qn.onnx')
         model.export(output=filename)
         onnx_model = onnx.load(filename)
         onnx.checker.check_model(onnx_model,
                                  full_check=True)  # throws when failed
         assert onnx_model.graph.input[0].name == 'audio_signal'
         assert onnx_model.graph.output[0].name == 'logprobs'
    def __init__(self, torch_device=None):
        if torch_device is None:
            if torch.cuda.is_available():
                torch_device = torch.device('cuda')
            else:
                torch_device = torch.device('cpu')

        self.file_config = path.join(WORK_DIR, _MODEL_CONFIG)
        self.file_checkpoints = path.join(WORK_DIR, _MODEL_WEIGHTS)

        model_config = OmegaConf.load(self.file_config)
        OmegaConf.set_struct(model_config, True)

        if isinstance(model_config, DictConfig):
            self.config = OmegaConf.to_container(model_config, resolve=True)
            self.config = OmegaConf.create(self.config)
            OmegaConf.set_struct(self.config, True)

        # EncDecCTCModel.set_model_restore_state(is_being_restored=True)
        instance = EncDecCTCModel(cfg=self.config)

        self.model_instance = instance
        self.model_instance.to(torch_device)
        self.model_instance.load_state_dict(
            torch.load(self.file_checkpoints, torch_device), False)
Esempio n. 6
0
def main(
    nemo_file,
    enemo_file,
    onnx_file,
    model_type='asr',
):
    if model_type == 'asr':
        logging.info("Preparing ASR model")
        model = EncDecCTCModel.restore_from(nemo_file)
    elif model_type == 'speech_label':
        logging.info("Preparing Speech Label Classification model")
        model = EncDecClassificationModel.restore_from(nemo_file)
    elif model_type == 'speaker':
        logging.info("Preparing Speaker Recognition model")
        model = EncDecSpeakerLabelModel.restore_from(nemo_file)
    else:
        raise NameError(
            "Available model names are asr, speech_label and speaker")

    logging.info("Writing onnx file")
    model.export(onnx_file, onnx_opset_version=12)
    logging.info("succesfully ported onnx file")

    with tarfile.open(nemo_file, 'r') as archive:
        archive.extract('./model_config.yaml')
        with tarfile.open(enemo_file, 'w') as enemo_archive:
            enemo_archive.add('./model_config.yaml')
            enemo_archive.addfile(tarfile.TarInfo("model_graph.onnx"),
                                  open(onnx_file))
Esempio n. 7
0
def oth_quartznet15x5_ru34(pretrained=False, num_classes=34, **kwargs):
    from nemo.collections.asr.models import EncDecCTCModel
    quartznet_nemo_path = path_pref + "QuartzNet15x5_golos_1a63a2d8.nemo"
    raw_net = EncDecCTCModel.restore_from(quartznet_nemo_path)
    net = QuartzNet(raw_net=raw_net, num_classes=num_classes)
    net = net.cpu()
    return net#, raw_net
Esempio n. 8
0
 def test_constructor(self, asr_model):
     asr_model.train()
     # TODO: make proper config and assert correct number of weights
     # Check to/from config_dict:
     confdict = asr_model.to_config_dict()
     instance2 = EncDecCTCModel.from_config_dict(confdict)
     assert isinstance(instance2, EncDecCTCModel)
Esempio n. 9
0
def main(cfg):
    logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}')

    trainer = pl.Trainer(**cfg.trainer)
    exp_manager(trainer, cfg.get("exp_manager", None))
    asr_model = EncDecCTCModel(cfg=cfg.model, trainer=trainer)

    # Initialize the weights of the model from another model, if provided via config
    asr_model.maybe_init_from_pretrained_checkpoint(cfg)

    trainer.fit(asr_model)

    if hasattr(cfg.model,
               'test_ds') and cfg.model.test_ds.manifest_filepath is not None:
        if asr_model.prepare_test(trainer):
            trainer.test(asr_model)
Esempio n. 10
0
    def test_save_model_level_pt_ckpt(self, asr_model):
        with tempfile.TemporaryDirectory() as ckpt_dir:
            nemo_file = os.path.join(ckpt_dir, 'asr.nemo')
            asr_model.save_to(nemo_file)

            # Save model level PT checkpoint
            asr_model.extract_state_dict_from(nemo_file, ckpt_dir)
            ckpt_path = os.path.join(ckpt_dir, 'model_weights.ckpt')

            assert os.path.exists(ckpt_path)

            # Restore the model.
            asr_model2 = EncDecCTCModel.restore_from(restore_path=nemo_file)

            assert len(asr_model.decoder.vocabulary) == len(asr_model2.decoder.vocabulary)
            assert asr_model.num_weights == asr_model2.num_weights

            # Change weights values
            asr_model2.encoder.encoder[0].mconv[0].conv.weight.data += 1.0

            w1 = asr_model.encoder.encoder[0].mconv[0].conv.weight.data.detach().cpu().numpy()
            w2 = asr_model2.encoder.encoder[0].mconv[0].conv.weight.data.detach().cpu().numpy()

            assert not np.array_equal(w1, w2)

            # Restore from checkpoint
            asr_model2.load_state_dict(torch.load(ckpt_path))

            w1 = asr_model.encoder.encoder[0].mconv[0].conv.weight.data.detach().cpu().numpy()
            w2 = asr_model2.encoder.encoder[0].mconv[0].conv.weight.data.detach().cpu().numpy()

            assert np.array_equal(w1, w2)
Esempio n. 11
0
def oth_quartznet15x5_ru(pretrained=False, num_classes=35, **kwargs):
    from nemo.collections.asr.models import EncDecCTCModel
    quartznet_nemo_path = path_pref + "stt_ru_quartznet15x5_88a3e5aa.nemo"
    raw_net = EncDecCTCModel.restore_from(quartznet_nemo_path)
    net = QuartzNet(raw_net=raw_net, num_classes=num_classes)
    net = net.cpu()
    return net
Esempio n. 12
0
def oth_jasperdr10x5_en(pretrained=False, num_classes=29, **kwargs):
    from nemo.collections.asr.models import EncDecCTCModel
    quartznet_nemo_path = path_pref + "Jasper10x5Dr-En_2b94c9d1.nemo"
    raw_net = EncDecCTCModel.restore_from(quartznet_nemo_path)
    net = QuartzNet(raw_net=raw_net, num_classes=num_classes)
    net = net.cpu()
    return net
Esempio n. 13
0
def main(
    nemo_file,
    enemo_file,
    onnx_file,
    model_type="asr",
):
    if model_type == "asr":
        logging.info("Preparing ASR model")
        model = EncDecCTCModel.restore_from(nemo_file)
    elif model_type == "speech_label":
        logging.info("Preparing Speech Label Classification model")
        model = EncDecClassificationModel.restore_from(nemo_file)
    elif model_type == "speaker":
        logging.info("Preparing Speaker Recognition model")
        model = EncDecSpeakerLabelModel.restore_from(nemo_file)
    else:
        raise NameError(
            "Available model names are asr, speech_label and speaker")

    logging.info("Writing onnx file")
    model.export(onnx_file, onnx_opset_version=12)
    logging.info("succesfully ported onnx file")

    with tarfile.open(nemo_file, "r") as archive:
        archive.extract("./model_config.yaml")
        with tarfile.open(enemo_file, "w") as enemo_archive:
            enemo_archive.add("./model_config.yaml")
            copyfile(onnx_file, "model_graph.onnx")
            enemo_archive.add("model_graph.onnx")
            os.remove("model_graph.onnx")  # cleanup extra file
Esempio n. 14
0
def oth_quartznet15x5_en_nr(pretrained=False, num_classes=29, **kwargs):
    from nemo.collections.asr.models import EncDecCTCModel
    quartznet_nemo_path = path_pref + "QuartzNet15x5NR-En_b05e34f3.nemo"
    raw_net = EncDecCTCModel.restore_from(quartznet_nemo_path)
    net = QuartzNet(raw_net=raw_net, num_classes=num_classes)
    net = net.cpu()
    return net
Esempio n. 15
0
def oth_jasperdr10x5_en_nr(pretrained=False, num_classes=29, **kwargs):
    from nemo.collections.asr.models import EncDecCTCModel
    quartznet_nemo_path = path_pref + "stt_en_jasper10x5dr_0d5ebc6c.nemo"
    raw_net = EncDecCTCModel.restore_from(quartznet_nemo_path)
    net = QuartzNet(raw_net=raw_net, num_classes=num_classes)
    net = net.cpu()
    return net#, raw_net
Esempio n. 16
0
 def test_EncDecCTCModel(self):
     # TODO: Switch to using named configs because here we don't really care about weights
     qn = EncDecCTCModel.from_pretrained(model_name="QuartzNet15x5Base-En")
     self.__test_restore_elsewhere(
         model=qn,
         attr_for_eq_check=set(["decoder._feat_in",
                                "decoder._num_classes"]))
Esempio n. 17
0
def main(cfg):
    if cfg.n_gpus > 0:
        cfg.model.train_ds.batch_size //= cfg.n_gpus

    logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg, resolve=True)}')

    pl.utilities.seed.seed_everything(cfg.seed)

    trainer = pl.Trainer(**cfg.trainer)
    exp_manager(trainer, cfg.get("exp_manager", None))
    if "tokenizer" in cfg.model:
        asr_model = EncDecCTCModelBPE(cfg=cfg.model, trainer=trainer)
    else:
        asr_model = EncDecCTCModel(cfg=cfg.model, trainer=trainer)

    # Initialize the weights of the model from another model, if provided via config
    asr_model.maybe_init_from_pretrained_checkpoint(cfg)

    trainer.fit(asr_model)

    if hasattr(cfg.model,
               'test_ds') and cfg.model.test_ds.manifest_filepath is not None:
        gpu = 1 if cfg.trainer.gpus != 0 else 0
        test_trainer = pl.Trainer(
            gpus=gpu,
            precision=trainer.precision,
            amp_level=trainer.accelerator_connector.amp_level,
            amp_backend=cfg.trainer.get("amp_backend", "native"),
        )
        if asr_model.prepare_test(test_trainer):
            test_trainer.test(asr_model)
Esempio n. 18
0
    def test_save_restore_from_nemo_file(self, asr_model):
        """" Test makes sure that the second instance created from the same configuration AND checkpoint 
        has the same weights. """

        with tempfile.NamedTemporaryFile() as fp:
            filename = fp.name

            # Save model (with random artifact).
            with tempfile.NamedTemporaryFile() as artifact:
                asr_model.register_artifact(config_path=None,
                                            src=artifact.name)
                asr_model.save_to(save_path=filename)

            # Restore the model.
            asr_model2 = EncDecCTCModel.restore_from(restore_path=filename)

            assert len(asr_model.decoder.vocabulary) == len(
                asr_model2.decoder.vocabulary)
            assert asr_model.num_weights == asr_model2.num_weights

            w1 = asr_model.encoder.encoder[0].mconv[0].conv.weight.data.detach(
            ).cpu().numpy()
            w2 = asr_model2.encoder.encoder[0].mconv[
                0].conv.weight.data.detach().cpu().numpy()

            assert np.array_equal(w1, w2)
Esempio n. 19
0
    def set_asr_model(self, asr_model):
        """
        Setup the parameters for the given ASR model
        Currently, the following models are supported:
            stt_en_conformer_ctc_large
            stt_en_conformer_ctc_medium
            stt_en_conformer_ctc_small
            QuartzNet15x5Base-En
        """

        if 'QuartzNet' in asr_model:
            self.run_ASR = self.run_ASR_QuartzNet_CTC
            asr_model = EncDecCTCModel.from_pretrained(model_name=asr_model,
                                                       strict=False)
            self.params['offset'] = -0.18
            self.model_stride_in_secs = 0.02
            self.asr_delay_sec = -1 * self.params['offset']

        elif 'conformer_ctc' in asr_model:
            self.run_ASR = self.run_ASR_BPE_CTC
            asr_model = EncDecCTCModelBPE.from_pretrained(model_name=asr_model,
                                                          strict=False)
            self.model_stride_in_secs = 0.04
            self.asr_delay_sec = 0.0
            self.params['offset'] = 0
            self.chunk_len_in_sec = 1.6
            self.total_buffer_in_secs = 4

        elif 'citrinet' in asr_model:
            self.run_ASR = self.run_ASR_BPE_CTC
            asr_model = EncDecCTCModelBPE.from_pretrained(model_name=asr_model,
                                                          strict=False)
            self.model_stride_in_secs = 0.08
            self.asr_delay_sec = 0.0
            self.params['offset'] = 0
            self.chunk_len_in_sec = 1.6
            self.total_buffer_in_secs = 4

        elif 'conformer_transducer' in asr_model or 'contextnet' in asr_model:
            self.run_ASR = self.run_ASR_BPE_RNNT
            asr_model = EncDecRNNTBPEModel.from_pretrained(
                model_name=asr_model, strict=False)
            self.model_stride_in_secs = 0.04
            self.asr_delay_sec = 0.0
            self.params['offset'] = 0
            self.chunk_len_in_sec = 1.6
            self.total_buffer_in_secs = 4
        else:
            raise ValueError(f"ASR model name not found: {asr_model}")
        self.params['time_stride'] = self.model_stride_in_secs
        self.asr_batch_size = 16
        asr_model.eval()

        self.audio_file_list = [
            value['audio_filepath']
            for _, value in self.AUDIO_RTTM_MAP.items()
        ]

        return asr_model
Esempio n. 20
0
def generate_ref_hyps(asr_model: EncDecCTCModel, search: str, arpa: str):

    if can_gpu:
        asr_model = asr_model.cuda()
        print("USING GPU!")

    asr_model.eval()
    vocabulary = asr_model.decoder.vocabulary
    labels_map = dict([(i, vocabulary[i]) for i in range(len(vocabulary))])
    wer = WER(vocabulary=vocabulary)

    if search == "kenlm" or search == "beamsearch":
        arpa_file = prepare_arpa_file(arpa)
        lm_path = arpa_file if search == "kenlm" else None

        beamsearcher = nemo_asr.modules.BeamSearchDecoderWithLM(
            vocab=list(vocabulary),
            beam_width=16,
            alpha=2,
            beta=1.5,
            lm_path=lm_path,
            num_cpus=max(os.cpu_count(), 1),
            input_tensor=True,
        )

    for batch in asr_model.test_dataloader():
        # TODO(tilo): test_loader should return dict or some typed object not tuple of tensors!!
        if can_gpu:
            batch = [x.cuda() for x in batch]
        input_signal, inpsig_len, transcript, transc_len = batch
        with autocast():
            log_probs, encoded_len, greedy_predictions = asr_model(
                input_signal=input_signal, input_signal_length=inpsig_len)
        if search == "greedy":
            decoded = wer.ctc_decoder_predictions_tensor(greedy_predictions)
        else:
            decoded = beamsearch_forward(beamsearcher,
                                         log_probs=log_probs,
                                         log_probs_length=encoded_len)

        for i, hyp in enumerate(decoded):
            reference = "".join([
                labels_map[c]
                for c in transcript[i].cpu().detach().numpy()[:transc_len[i]]
            ])
            yield reference, hyp
Esempio n. 21
0
 def __init__(
     self,
     model: EncDecCTCModel,
     sample_rate: int,
     batch_size: int = 1,
     device: str = "cuda",
 ) -> None:
     super(ASRAudioEncoderDecoder, self).__init__()
     self.online_audio = ASROnlineAudioData(sample_rate)
     self.data_loader = DataLoader(
         dataset=self.online_audio,
         batch_size=batch_size,
         collate_fn=self.online_audio.collate_fn,
     )
     model.eval()
     self.device = torch.device(device)
     self.model = model.to(self.device)
Esempio n. 22
0
    def test_EncDecCTCModel_adapted_export_to_onnx(self):
        model_config = DictConfig({
            'preprocessor': DictConfig(self.preprocessor),
            'encoder': DictConfig(self.encoder_dict),
            'decoder': DictConfig(self.decoder_dict),
        })

        # support adapter in encoder
        model_config.encoder.cls = model_config.encoder.cls + 'Adapter'  # ConvASREncoderAdapter

        # load model
        model = EncDecCTCModel(cfg=model_config)

        # add adapter
        adapter_cfg = OmegaConf.structured(
            LinearAdapterConfig(
                in_features=model_config.encoder.params.jasper[0].filters,
                dim=32))
        model.add_adapter('temp', cfg=adapter_cfg)

        model = model.cuda()

        with tempfile.TemporaryDirectory() as tmpdir:
            filename = os.path.join(tmpdir, 'qn.onnx')
            model.export(
                output=filename,
                check_trace=True,
            )
            onnx_model = onnx.load(filename)
            onnx.checker.check_model(onnx_model,
                                     full_check=True)  # throws when failed
            assert onnx_model.graph.input[0].name == 'audio_signal'
            assert onnx_model.graph.output[0].name == 'logprobs'
Esempio n. 23
0
def main(cfg):
    logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}')
    trainer = pl.Trainer(**cfg.trainer)
    exp_manager(trainer, cfg.get("exp_manager", None))
    asr_model = EncDecCTCModel(cfg=cfg.model, trainer=trainer)

    trainer.fit(asr_model)

    if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.manifest_filepath is not None:
        gpu = 1 if cfg.trainer.gpus != 0 else 0
        trainer = pl.Trainer(
            gpus=gpu,
            precision=cfg.trainer.precision,
            amp_level=cfg.trainer.amp_level,
            amp_backend=cfg.trainer.amp_backend,
        )
        if asr_model.prepare_test(trainer):
            trainer.test(asr_model)
Esempio n. 24
0
def batch_inference(args: argparse.Namespace):

    torch.set_grad_enabled(False)

    if args.asr_model.endswith(".nemo"):
        print(f"Using local ASR model from {args.asr_model}")
        asr_model = EncDecCTCModel.restore_from(restore_path=args.asr_model)
    else:
        print(f"Using NGC cloud ASR model {args.asr_model}")
        asr_model = EncDecCTCModel.from_pretrained(model_name=args.asr_model)

    manifest = prepare_manifest(args.corpora_dir, args.limit)
    asr_model.setup_test_data(
        test_data_config={
            "sample_rate": 16000,
            "manifest_filepath": manifest,
            "labels": asr_model.decoder.vocabulary,
            "batch_size": args.batch_size,
            "normalize_transcripts": args.normalize_text,
        })

    refs_hyps = list(tqdm(generate_ref_hyps(asr_model, args.search,
                                            args.arpa)))
    references, hypotheses = [list(k) for k in zip(*refs_hyps)]

    os.makedirs(args.results_dir, exist_ok=True)
    data_io.write_lines(f"{args.results_dir}/refs.txt.gz", references)
    data_io.write_lines(f"{args.results_dir}/hyps.txt.gz", hypotheses)

    wer_value = word_error_rate(hypotheses=hypotheses, references=references)
    sys.stdout.flush()
    stats = {
        "wer": wer_value,
        "args": args.__dict__,
    }
    data_io.write_json(f"{args.results_dir}/stats.txt", stats)
    print(f"Got WER of {wer_value}")
    return stats
Esempio n. 25
0
def main(cfg):
    logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}')

    trainer = pl.Trainer(**cfg.trainer)
    exp_manager(trainer, cfg.get("exp_manager", None))
    asr_model = EncDecCTCModel(cfg=cfg.model, trainer=trainer)

    # Initialize the weights of the model from another model, if provided via config
    asr_model.maybe_init_from_pretrained_checkpoint(cfg)

    trainer.fit(asr_model)

    if hasattr(cfg.model,
               'test_ds') and cfg.model.test_ds.manifest_filepath is not None:
        gpu = 1 if cfg.trainer.gpus != 0 else 0
        test_trainer = pl.Trainer(
            gpus=gpu,
            precision=trainer.precision,
            amp_level=trainer.accelerator_connector.amp_level,
            amp_backend=cfg.trainer.get("amp_backend", "native"),
        )
        if asr_model.prepare_test(test_trainer):
            test_trainer.test(asr_model)
Esempio n. 26
0
def conformer_model():
    preprocessor = {
        'cls':
        'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor',
        'params': dict({})
    }
    encoder = {
        'cls': 'nemo.collections.asr.modules.ConformerEncoder',
        'params': {
            'feat_in': 80,
            'feat_out': -1,
            'n_layers': 2,
            'd_model': 256,
            'subsampling': 'striding',
            'subsampling_factor': 4,
            'subsampling_conv_channels': 512,
            'ff_expansion_factor': 4,
            'self_attention_model': 'rel_pos',
            'n_heads': 8,
            'att_context_size': [-1, -1],
            'xscaling': True,
            'untie_biases': True,
            'pos_emb_max_len': 500,
            'conv_kernel_size': 31,
            'dropout': 0.1,
            'dropout_emb': 0.0,
            'dropout_att': 0.1,
        },
    }

    decoder = {
        'cls': 'nemo.collections.asr.modules.ConvASRDecoder',
        'params': {
            'feat_in': 256,
            'num_classes': 1024,
            'vocabulary': list(chr(i % 28) for i in range(0, 1024))
        },
    }

    modelConfig = DictConfig({
        'preprocessor': DictConfig(preprocessor),
        'encoder': DictConfig(encoder),
        'decoder': DictConfig(decoder)
    })
    conformer_model = EncDecCTCModel(cfg=modelConfig)
    return conformer_model
Esempio n. 27
0
def infer(model, audiofiles, batch_size=4):

    asr_model = EncDecCTCModel.restore_from(model)

    mode = asr_model.training
    device = next(asr_model.parameters()).device
    asr_model.eval()
    vocab = asr_model._cfg.train_ds.labels
    with tempfile.TemporaryDirectory() as tmpdir:
        with open(os.path.join(tmpdir, 'manifest.json'), 'w') as fp:
            for file in audiofiles:
                entry = {
                    'audio_filepath': file,
                    'duration': 100000,
                    'text': 'nothing'
                }
                fp.write(json.dumps(entry) + '\n')

        config = {
            'paths2audio_files': audiofiles,
            'batch_size': batch_size,
            'temp_dir': tmpdir
        }

        characters = []
        log_probs = []
        temporary_datalayer = asr_model._setup_transcribe_dataloader(config)
        for test_batch in temporary_datalayer:
            log_prob, encoded_len, greedy_predictions = asr_model.forward(
                input_signal=test_batch[0].to(device),
                input_signal_length=test_batch[1].to(device))
            character = asr_model._wer.ctc_decoder_predictions_tensor(
                greedy_predictions)
            characters += character
            encoded_len = encoded_len.long().cpu()
            log_prob = log_prob.float().cpu()
            for i in range(0, encoded_len.shape[0]):
                el = encoded_len[i].detach().numpy().tolist()
                lp = log_prob[i].detach().numpy().tolist()

                log_probs += [lp[0:el]]
            del test_batch

    asr_model.train(mode)
    return characters, log_probs, vocab
Esempio n. 28
0
    def test_save_restore_from_nemo_file_with_override(self, asr_model, tmpdir):
        """" Test makes sure that the second instance created from the same configuration AND checkpoint
        has the same weights.

        Args:
            tmpdir: fixture providing a temporary directory unique to the test invocation.
        """
        # Name of the archive in tmp folder.
        filename = os.path.join(tmpdir, "eff.nemo")

        # Get path where the command is executed - the artifacts will be "retrieved" there.
        # (original .nemo behavior)
        cwd = os.getcwd()

        with tempfile.NamedTemporaryFile(mode='a+') as conf_fp:

            # Create a "random artifact".
            with tempfile.NamedTemporaryFile(mode="w", delete=False) as artifact:
                artifact.write("magic content 42")
            # Remember the filename of the artifact.
            _, artifact_filename = os.path.split(artifact.name)
            # Add artifact to model.
            asr_model.register_artifact(config_path=None, src=artifact.name)
            # Save model (with "random artifact").
            asr_model.save_to(save_path=filename)

            # Modify config slightly
            cfg = asr_model.cfg
            cfg.encoder.params.activation = 'swish'
            yaml_cfg = OmegaConf.to_yaml(cfg)
            conf_fp.write(yaml_cfg)
            conf_fp.seek(0)

            # Restore the model.
            asr_model2 = EncDecCTCModel.restore_from(restore_path=filename, override_config_path=conf_fp.name)

            assert len(asr_model.decoder.vocabulary) == len(asr_model2.decoder.vocabulary)
            assert asr_model.num_weights == asr_model2.num_weights

            w1 = asr_model.encoder.encoder[0].mconv[0].conv.weight.data.detach().cpu().numpy()
            w2 = asr_model2.encoder.encoder[0].mconv[0].conv.weight.data.detach().cpu().numpy()

            assert np.array_equal(w1, w2)

            assert asr_model2.cfg.encoder.params.activation == 'swish'
Esempio n. 29
0
    def test_to_from_config_file(self, asr_model):
        """" Test makes sure that the second instance created with the same configuration (BUT NOT checkpoint)
        has different weights. """

        with tempfile.NamedTemporaryFile() as fp:
            yaml_filename = fp.name
            asr_model.to_config_file(path2yaml_file=yaml_filename)
            next_instance = EncDecCTCModel.from_config_file(path2yaml_file=yaml_filename)

            assert isinstance(next_instance, EncDecCTCModel)

            assert len(next_instance.decoder.vocabulary) == 28
            assert asr_model.num_weights == next_instance.num_weights

            w1 = asr_model.encoder.encoder[0].mconv[0].conv.weight.data.detach().cpu().numpy()
            w2 = next_instance.encoder.encoder[0].mconv[0].conv.weight.data.detach().cpu().numpy()

            assert not np.array_equal(w1, w2)
Esempio n. 30
0
def main(
    nemo_file, onnx_file, model_type='asr',
):
    if model_type == 'asr':
        logging.info("Preparing ASR model")
        model = EncDecCTCModel.restore_from(nemo_file)
    elif model_type == 'speech_label':
        logging.info("Preparing Speech Label Classification model")
        model = EncDecClassificationModel.restore_from(nemo_file)
    elif model_type == 'speaker':
        logging.info("Preparing Speaker Recognition model")
        model = EncDecSpeakerLabelModel.restore_from(nemo_file)
    else:
        raise NameError("Available model names are asr, speech_label and speaker")

    logging.info("Writing onnx file")
    model.export(onnx_file, onnx_opset_version=12)
    logging.info("succesfully ported onnx file")