Esempio n. 1
0
def make_processor_with_lm(recognizer_dir='', save=True):
    if not recognizer_dir:
        print('no directory provided, using default at:', path)
        recognizer_dir = path
    processor = load_pretrained_processor(recognizer_dir)
    sorted_vocab = load_and_sort_vocab(processor)
    decoder = make_ctc_decoder(sorted_vocab)
    processor_with_lm = Wav2Vec2ProcessorWithLM(
        feature_extractor=processor.feature_extractor,
        tokenizer=processor.tokenizer,
        decoder=decoder)
    if save:
        processor_with_lm.save_pretrained(recognizer_dir)
    return processor_with_lm
Esempio n. 2
0
    def test_wav2vec2_with_lm(self):
        ds = load_dataset("common_voice", "es", split="test", streaming=True)
        sample = next(iter(ds))

        resampled_audio = librosa.resample(sample["audio"]["array"], 48_000, 16_000)

        model = FlaxWav2Vec2ForCTC.from_pretrained("patrickvonplaten/wav2vec2-large-xlsr-53-spanish-with-lm")
        processor = Wav2Vec2ProcessorWithLM.from_pretrained("patrickvonplaten/wav2vec2-large-xlsr-53-spanish-with-lm")

        input_values = processor(resampled_audio, return_tensors="np").input_values

        logits = model(input_values).logits

        transcription = processor.batch_decode(np.array(logits)).text

        self.assertEqual(transcription[0], "bien y qué regalo vas a abrir primero")
Esempio n. 3
0
    def test_wav2vec2_with_lm(self):
        downloaded_folder = snapshot_download(
            "patrickvonplaten/common_voice_es_sample")
        file_path = glob.glob(downloaded_folder + "/*")[0]
        sample = librosa.load(file_path, sr=16_000)[0]

        model = TFWav2Vec2ForCTC.from_pretrained(
            "patrickvonplaten/wav2vec2-large-xlsr-53-spanish-with-lm")
        processor = Wav2Vec2ProcessorWithLM.from_pretrained(
            "patrickvonplaten/wav2vec2-large-xlsr-53-spanish-with-lm")

        input_values = processor(sample, return_tensors="tf").input_values

        logits = model(input_values).logits

        transcription = processor.batch_decode(logits.numpy()).text

        self.assertEqual(transcription[0],
                         "el libro ha sido escrito por cervantes")
Esempio n. 4
0
def load_processor_with_lm(recognizer_dir=''):
    if not recognizer_dir: recognizer_dir = path
    return Wav2Vec2ProcessorWithLM.from_pretrained(recognizer_dir)