def make_processor_with_lm(recognizer_dir='', save=True): if not recognizer_dir: print('no directory provided, using default at:', path) recognizer_dir = path processor = load_pretrained_processor(recognizer_dir) sorted_vocab = load_and_sort_vocab(processor) decoder = make_ctc_decoder(sorted_vocab) processor_with_lm = Wav2Vec2ProcessorWithLM( feature_extractor=processor.feature_extractor, tokenizer=processor.tokenizer, decoder=decoder) if save: processor_with_lm.save_pretrained(recognizer_dir) return processor_with_lm
def test_wav2vec2_with_lm(self): ds = load_dataset("common_voice", "es", split="test", streaming=True) sample = next(iter(ds)) resampled_audio = librosa.resample(sample["audio"]["array"], 48_000, 16_000) model = FlaxWav2Vec2ForCTC.from_pretrained("patrickvonplaten/wav2vec2-large-xlsr-53-spanish-with-lm") processor = Wav2Vec2ProcessorWithLM.from_pretrained("patrickvonplaten/wav2vec2-large-xlsr-53-spanish-with-lm") input_values = processor(resampled_audio, return_tensors="np").input_values logits = model(input_values).logits transcription = processor.batch_decode(np.array(logits)).text self.assertEqual(transcription[0], "bien y qué regalo vas a abrir primero")
def test_wav2vec2_with_lm(self): downloaded_folder = snapshot_download( "patrickvonplaten/common_voice_es_sample") file_path = glob.glob(downloaded_folder + "/*")[0] sample = librosa.load(file_path, sr=16_000)[0] model = TFWav2Vec2ForCTC.from_pretrained( "patrickvonplaten/wav2vec2-large-xlsr-53-spanish-with-lm") processor = Wav2Vec2ProcessorWithLM.from_pretrained( "patrickvonplaten/wav2vec2-large-xlsr-53-spanish-with-lm") input_values = processor(sample, return_tensors="tf").input_values logits = model(input_values).logits transcription = processor.batch_decode(logits.numpy()).text self.assertEqual(transcription[0], "el libro ha sido escrito por cervantes")
def load_processor_with_lm(recognizer_dir=''): if not recognizer_dir: recognizer_dir = path return Wav2Vec2ProcessorWithLM.from_pretrained(recognizer_dir)