Example #1
0
def convert_wav2vec2_checkpoint(checkpoint_path,
                                pytorch_dump_folder_path,
                                dict_path=None):
    """
    Copy/paste/tweak model's weights to transformers design.
    """
    hf_wav2vec = Wav2Vec2ForMaskedLM(Wav2Vec2Config())

    model, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task(
        [checkpoint_path], arg_overrides={"data": dict_path})
    model = model[0].eval()

    recursively_load_weights(model, hf_wav2vec)

    hf_wav2vec.save_pretrained(pytorch_dump_folder_path)
    def test_inference_masked_lm_normal(self):
        model = Wav2Vec2ForMaskedLM.from_pretrained("facebook/wav2vec2-base-960h")
        model.to(torch_device)
        tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h", do_lower_case=True)

        input_speech = self._load_datasamples(1)

        input_values = tokenizer(input_speech, return_tensors="pt").input_values.to(torch_device)

        with torch.no_grad():
            logits = model(input_values).logits

        predicted_ids = torch.argmax(logits, dim=-1)
        predicted_trans = tokenizer.batch_decode(predicted_ids)

        EXPECTED_TRANSCRIPTIONS = ["a man said to the universe sir i exist"]
        self.assertListEqual(predicted_trans, EXPECTED_TRANSCRIPTIONS)
    def test_inference_masked_lm_normal_batched(self):
        model = Wav2Vec2ForMaskedLM.from_pretrained("facebook/wav2vec2-base-960h")
        model.to(torch_device)
        tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h", do_lower_case=True)

        input_speech = self._load_datasamples(2)

        input_values = tokenizer(input_speech, return_tensors="pt", padding=True, truncation=True).input_values.to(
            torch_device
        )

        with torch.no_grad():
            logits = model(input_values).logits

        predicted_ids = torch.argmax(logits, dim=-1)
        predicted_trans = tokenizer.batch_decode(predicted_ids)

        EXPECTED_TRANSCRIPTIONS = [
            "a man said to the universe sir i exist",
            "sweat covered brion's body trickling into the tight lowing cloth that was the only garment he wore",
        ]
        self.assertListEqual(predicted_trans, EXPECTED_TRANSCRIPTIONS)
    def test_inference_masked_lm_robust_batched(self):
        model = Wav2Vec2ForMaskedLM.from_pretrained("facebook/wav2vec2-large-960h-lv60-self").to(torch_device)
        tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-large-960h-lv60-self", do_lower_case=True)

        input_speech = self._load_datasamples(4)

        input_values = tokenizer(input_speech, return_tensors="pt", padding=True, truncation=True).input_values.to(
            torch_device
        )

        with torch.no_grad():
            logits = model(input_values).logits

        predicted_ids = torch.argmax(logits, dim=-1)
        predicted_trans = tokenizer.batch_decode(predicted_ids)

        EXPECTED_TRANSCRIPTIONS = [
            "a man said to the universe sir i exist",
            "sweat covered brion's body trickling into the tight loin cloth that was the only garment he wore",
            "the cut on his chest still dripping blood the ache of his overstrained eyes even the soaring arena around him with the thousands of spectators were trivialities not worth thinking about",
            "his instant panic was followed by a small sharp blow high on his chest",
        ]
        self.assertListEqual(predicted_trans, EXPECTED_TRANSCRIPTIONS)
Example #5
0
if 'speechmetrics_features' in feature_sets:
	import speechmetrics_features
if 'standard_features' in feature_sets:
	import standard_features
if 'surfboard_features' in feature_sets:
	import surfboard_features

# transcription imports
if 'azure' in default_audio_transcribers:
	import azure.cognitiveservices.speech as speechsdk
if 'wav2vec' in default_audio_transcribers:
	import os, pandas as pd, soundfile as sf, torch, glob
	from pathlib import Path
	from transformers import Wav2Vec2ForMaskedLM, Wav2Vec2Tokenizer
	tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h")
	wav_model = Wav2Vec2ForMaskedLM.from_pretrained("facebook/wav2vec2-base-960h")
else:
	tokenizer=''
	wav_model=''
if 'hubert' in default_audio_transcribers and 'hubert_features' not in feature_sets:
	import torch
	from transformers import HubertModel, HubertConfig
	from transformers import Wav2Vec2Processor, HubertForCTC
	import soundfile as sf

	# Hubert transcript
	hubert_processor_ = Wav2Vec2Processor.from_pretrained("facebook/hubert-large-ls960-ft")
	hubert_model_ = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft")

else:
	hubert_processor_=''