Example #1
0
    def test_simple_s2t(self):

        model = Speech2TextForConditionalGeneration.from_pretrained(
            "facebook/s2t-small-mustc-en-it-st")
        tokenizer = AutoTokenizer.from_pretrained(
            "facebook/s2t-small-mustc-en-it-st")
        feature_extractor = AutoFeatureExtractor.from_pretrained(
            "facebook/s2t-small-mustc-en-it-st")

        asr = AutomaticSpeechRecognitionPipeline(
            model=model,
            tokenizer=tokenizer,
            feature_extractor=feature_extractor)

        waveform = np.tile(np.arange(1000, dtype=np.float32), 34)

        output = asr(waveform)
        self.assertEqual(output, {"text": "(Applausi)"})

        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy",
                          "clean",
                          split="validation").sort("id")
        filename = ds[40]["file"]
        output = asr(filename)
        self.assertEqual(
            output,
            {"text": "Un uomo disse all'universo: \"Signore, io esisto."})

        filename = ds[40]["file"]
        with open(filename, "rb") as f:
            data = f.read()
        output = asr(data)
        self.assertEqual(
            output,
            {"text": "Un uomo disse all'universo: \"Signore, io esisto."})
Example #2
0
    def test_simple_wav2vec2(self):
        model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
        tokenizer = AutoTokenizer.from_pretrained(
            "facebook/wav2vec2-base-960h")
        feature_extractor = AutoFeatureExtractor.from_pretrained(
            "facebook/wav2vec2-base-960h")

        asr = AutomaticSpeechRecognitionPipeline(
            model=model,
            tokenizer=tokenizer,
            feature_extractor=feature_extractor)

        waveform = np.tile(np.arange(1000, dtype=np.float32), 34)
        output = asr(waveform)
        self.assertEqual(output, {"text": ""})

        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy",
                          "clean",
                          split="validation").sort("id")
        filename = ds[40]["file"]
        output = asr(filename)
        self.assertEqual(output,
                         {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})

        filename = ds[40]["file"]
        with open(filename, "rb") as f:
            data = f.read()
        output = asr(data)
        self.assertEqual(output,
                         {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})
    def get_test_pipeline(self, model, tokenizer, feature_extractor):
        if tokenizer is None:
            # Side effect of no Fast Tokenizer class for these model, so skipping
            # But the slow tokenizer test should still run as they're quite small
            self.skipTest("No tokenizer available")
            return
            # return None, None

        speech_recognizer = AutomaticSpeechRecognitionPipeline(
            model=model,
            tokenizer=tokenizer,
            feature_extractor=feature_extractor)

        # test with a raw waveform
        audio = np.zeros((34000, ))
        audio2 = np.zeros((14000, ))
        return speech_recognizer, [audio, audio2]
    def test_simple_s2t(self):
        import numpy as np
        from datasets import load_dataset

        model = Speech2TextForConditionalGeneration.from_pretrained(
            "facebook/s2t-small-mustc-en-it-st")
        tokenizer = AutoTokenizer.from_pretrained(
            "facebook/s2t-small-mustc-en-it-st")
        feature_extractor = AutoFeatureExtractor.from_pretrained(
            "facebook/s2t-small-mustc-en-it-st")

        asr = AutomaticSpeechRecognitionPipeline(
            model=model,
            tokenizer=tokenizer,
            feature_extractor=feature_extractor)

        waveform = np.zeros((34000, ))

        output = asr(waveform)
        self.assertEqual(output, {
            "text":
            "E questo รจ il motivo per cui non ci siamo mai incontrati."
        })

        ds = load_dataset("patrickvonplaten/librispeech_asr_dummy",
                          "clean",
                          split="validation")
        filename = ds[0]["file"]
        output = asr(filename)
        self.assertEqual(
            output,
            {"text": "Un uomo disse all'universo: \"Signore, io esisto."})

        filename = ds[0]["file"]
        with open(filename, "rb") as f:
            data = f.read()
        output = asr(data)
        self.assertEqual(
            output,
            {"text": "Un uomo disse all'universo: \"Signore, io esisto."})