Ejemplos de Speech2TextForConditionalGeneration.from_pretrained en Python, ejemplos de transformers.Speech2TextForConditionalGeneration.from_pretrained en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: test_pipelines_automatic_speech_recognition.py Proyecto: maximedb/transformers

    def test_simple_s2t(self):

        model = Speech2TextForConditionalGeneration.from_pretrained(
            "facebook/s2t-small-mustc-en-it-st")
        tokenizer = AutoTokenizer.from_pretrained(
            "facebook/s2t-small-mustc-en-it-st")
        feature_extractor = AutoFeatureExtractor.from_pretrained(
            "facebook/s2t-small-mustc-en-it-st")

        asr = AutomaticSpeechRecognitionPipeline(
            model=model,
            tokenizer=tokenizer,
            feature_extractor=feature_extractor)

        waveform = np.tile(np.arange(1000, dtype=np.float32), 34)

        output = asr(waveform)
        self.assertEqual(output, {"text": "(Applausi)"})

        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy",
                          "clean",
                          split="validation").sort("id")
        filename = ds[40]["file"]
        output = asr(filename)
        self.assertEqual(
            output,
            {"text": "Un uomo disse all'universo: \"Signore, io esisto."})

        filename = ds[40]["file"]
        with open(filename, "rb") as f:
            data = f.read()
        output = asr(data)
        self.assertEqual(
            output,
            {"text": "Un uomo disse all'universo: \"Signore, io esisto."})

Ejemplo n.º 2

0

Mostrar archivo

    def test_generation_librispeech_batched(self):
        model = Speech2TextForConditionalGeneration.from_pretrained(
            "facebook/s2t-small-librispeech-asr")
        model.to(torch_device)
        processor = self.default_processor

        input_speech = self._load_datasamples(4)

        inputs = processor(input_speech, return_tensors="pt", padding=True)

        input_features = inputs.input_features.to(torch_device)
        attention_mask = inputs.attention_mask.to(torch_device)

        generated_ids = model.generate(input_features,
                                       attention_mask=attention_mask)
        generated_transcripts = processor.batch_decode(
            generated_ids, skip_special_tokens=True)

        EXPECTED_TRANSCRIPTIONS = [
            "mister quilter is the apostle of the middle classes and we are glad to welcome his gospel",
            "nor is mister cultar's manner less interesting than his matter",
            "he tells us that at this festive season of the year with christmas and roast beef looming before us similes drawn from eating and its results occur most readily to the mind",
            "he has grave doubts whether sir frederick leyton's work is really greek after all and can discover in it but little of rocky ithaca",
        ]

        self.assertListEqual(generated_transcripts, EXPECTED_TRANSCRIPTIONS)

Ejemplo n.º 3

0

Mostrar archivo

    def test_generation_librispeech_batched(self):
        model = Speech2TextForConditionalGeneration.from_pretrained(
            "facebook/s2t-small-librispeech-asr")
        model.to(torch_device)
        processor = self.default_processor

        input_speech = self._load_datasamples(4)

        inputs = processor(input_speech, return_tensors="pt", padding=True)

        input_features = inputs.input_features.to(torch_device)
        attention_mask = inputs.attention_mask.to(torch_device)

        generated_ids = model.generate(input_features,
                                       attention_mask=attention_mask)
        generated_transcripts = processor.batch_decode(
            generated_ids, skip_special_tokens=True)

        EXPECTED_TRANSCRIPTIONS = [
            "a man said to the universe sir i exist",
            "sweat covered brion's body trickling into the titleing cloth that was the only garment he wore",
            "the cut on his chest still dripping blood the ache of his overstrained eyes even the soaring arena around him with the thousands of spectators were trivialities not worth thinking about",
            "his instant of panic was followed by a small sharp blow high on his chest",
        ]

        self.assertListEqual(generated_transcripts, EXPECTED_TRANSCRIPTIONS)

Ejemplo n.º 4

0

Mostrar archivo

Archivo: model.py Proyecto: avigupta2612/Voice-recoder-app

def s2t_predictions(audio_file):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    audio_array = s2t_audio_to_array(audio_file)
    model = Speech2TextForConditionalGeneration.from_pretrained(
        "facebook/s2t-small-librispeech-asr").to(device).eval()
    processor = Speech2TextProcessor.from_pretrained(
        "facebook/s2t-small-librispeech-asr", do_upper_case=True)
    features = processor(audio_array, sampling_rate=16000, return_tensors="pt")
    input_features = features.input_features.to(device)
    attention_mask = features.attention_mask.to(device)
    gen_tokens = model.generate(input_ids=input_features)
    text = processor.batch_decode(gen_tokens, skip_special_tokens=True)
    return text

Ejemplo n.º 5

0

Mostrar archivo

Archivo: test_modeling_speech_to_text.py Proyecto: code-nic/transformers-help-loss

    def test_generation_librispeech(self):
        model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-librispeech-asr")
        model.to(torch_device)
        processor = self.default_processor

        input_speech = self._load_datasamples(1)

        input_features = processor(input_speech, return_tensors="pt").input_features.to(torch_device)

        generated_ids = model.generate(input_features)
        generated_transcript = processor.batch_decode(generated_ids, skip_special_tokens=True)

        EXPECTED_TRANSCRIPTIONS = ["a man said to the universe sir i exist"]
        self.assertListEqual(generated_transcript, EXPECTED_TRANSCRIPTIONS)

Ejemplo n.º 6

0

Mostrar archivo

Archivo: test_modeling_speech_to_text.py Proyecto: Kevin-Zhao-Github/oLMpics

    def test_generation_librispeech(self):
        model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-librispeech-asr")
        model.to(torch_device)
        processor = self.default_processor

        input_speech = self._load_datasamples(1)

        input_features = processor(input_speech, return_tensors="pt").input_features.to(torch_device)

        generated_ids = model.generate(input_features)
        generated_transcript = processor.batch_decode(generated_ids, skip_special_tokens=True)

        EXPECTED_TRANSCRIPTIONS = [
            "mister quilter is the apostle of the middle classes and we are glad to welcome his gospel"
        ]
        self.assertListEqual(generated_transcript, EXPECTED_TRANSCRIPTIONS)

Ejemplo n.º 7

0

Mostrar archivo

Archivo: eval.py Proyecto: KodeWorker/transformer_demo

def translate(data,
              sampling_rate,
              pretrained_model_name="facebook/s2t-small-librispeech-asr"):
    model = Speech2TextForConditionalGeneration.from_pretrained(
        pretrained_model_name)

    feature_extractor = Speech2TextFeatureExtractor.from_pretrained(
        pretrained_model_name)
    tokenizer = Speech2TextTokenizer.from_pretrained(pretrained_model_name)
    processor = Speech2TextProcessor(feature_extractor=feature_extractor,
                                     tokenizer=tokenizer)

    inputs = processor(data, sampling_rate=sampling_rate, return_tensors="pt")
    generated_ids = model.generate(input_ids=inputs["input_features"],
                                   attention_mask=inputs["attention_mask"])

    transcription = processor.batch_decode(generated_ids)

    return transcription

Ejemplo n.º 8

0

Mostrar archivo

Archivo: test_pipelines_automatic_speech_recognition.py Proyecto: code-nic/transformers-help-loss

    def test_simple_s2t(self):
        import numpy as np
        from datasets import load_dataset

        model = Speech2TextForConditionalGeneration.from_pretrained(
            "facebook/s2t-small-mustc-en-it-st")
        tokenizer = AutoTokenizer.from_pretrained(
            "facebook/s2t-small-mustc-en-it-st")
        feature_extractor = AutoFeatureExtractor.from_pretrained(
            "facebook/s2t-small-mustc-en-it-st")

        asr = AutomaticSpeechRecognitionPipeline(
            model=model,
            tokenizer=tokenizer,
            feature_extractor=feature_extractor)

        waveform = np.zeros((34000, ))

        output = asr(waveform)
        self.assertEqual(output, {
            "text":
            "E questo è il motivo per cui non ci siamo mai incontrati."
        })

        ds = load_dataset("patrickvonplaten/librispeech_asr_dummy",
                          "clean",
                          split="validation")
        filename = ds[0]["file"]
        output = asr(filename)
        self.assertEqual(
            output,
            {"text": "Un uomo disse all'universo: \"Signore, io esisto."})

        filename = ds[0]["file"]
        with open(filename, "rb") as f:
            data = f.read()
        output = asr(data)
        self.assertEqual(
            output,
            {"text": "Un uomo disse all'universo: \"Signore, io esisto."})

Ejemplo n.º 9

0

Mostrar archivo

import numpy as np
from scipy.io.wavfile import read
from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration, Speech2TextFeatureExtractor, Speech2TextTokenizer

if __name__ == "__main__":

    audio_path = "../data/review#1.wav"
    samplerate, data = read(audio_path)

    data = (data - np.mean(data)) / np.std(data)
    print(samplerate, len(data))

    model = Speech2TextForConditionalGeneration.from_pretrained(
        "facebook/s2t-small-librispeech-asr")

    feature_extractor = Speech2TextFeatureExtractor.from_pretrained(
        "facebook/s2t-small-librispeech-asr")
    tokenizer = Speech2TextTokenizer.from_pretrained(
        "facebook/s2t-small-librispeech-asr")
    processor = Speech2TextProcessor(feature_extractor=feature_extractor,
                                     tokenizer=tokenizer)

    inputs = processor(data, sampling_rate=samplerate, return_tensors="pt")
    generated_ids = model.generate(input_ids=inputs["input_features"],
                                   attention_mask=inputs["attention_mask"])

    transcription = processor.batch_decode(generated_ids)

    print(transcription)