def test_wav2vec2_with_lm(self): ds = load_dataset("common_voice", "es", split="test", streaming=True) sample = next(iter(ds)) resampled_audio = librosa.resample(sample["audio"]["array"], 48_000, 16_000) model = FlaxWav2Vec2ForCTC.from_pretrained("patrickvonplaten/wav2vec2-large-xlsr-53-spanish-with-lm") processor = Wav2Vec2ProcessorWithLM.from_pretrained("patrickvonplaten/wav2vec2-large-xlsr-53-spanish-with-lm") input_values = processor(resampled_audio, return_tensors="np").input_values logits = model(input_values).logits transcription = processor.batch_decode(np.array(logits)).text self.assertEqual(transcription[0], "bien y qué regalo vas a abrir primero")
def test_wav2vec2_with_lm(self): downloaded_folder = snapshot_download( "patrickvonplaten/common_voice_es_sample") file_path = glob.glob(downloaded_folder + "/*")[0] sample = librosa.load(file_path, sr=16_000)[0] model = TFWav2Vec2ForCTC.from_pretrained( "patrickvonplaten/wav2vec2-large-xlsr-53-spanish-with-lm") processor = Wav2Vec2ProcessorWithLM.from_pretrained( "patrickvonplaten/wav2vec2-large-xlsr-53-spanish-with-lm") input_values = processor(sample, return_tensors="tf").input_values logits = model(input_values).logits transcription = processor.batch_decode(logits.numpy()).text self.assertEqual(transcription[0], "el libro ha sido escrito por cervantes")
def load_processor_with_lm(recognizer_dir=''): if not recognizer_dir: recognizer_dir = path return Wav2Vec2ProcessorWithLM.from_pretrained(recognizer_dir)