Ejemplos de EncoderDecoderASR en Python

Lenguaje de programación: Python

Namespace/Package Name: speechbrain.pretrained

Clase / Tipo: EncoderDecoderASR

Ejemplos en hotexamples.com: 6

Python EncoderDecoderASR - 6 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de speechbrain.pretrained.EncoderDecoderASR extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

from_hparams(6)

Métodos usados con frecuencia

from_hparams (6)

Ejemplo n.º 1

Mostrar archivo

    def __init__(self, model_id: str):
        model_type = get_type(model_id)
        if model_type is ModelType.ENCODERASR:
            self.model = EncoderASR.from_hparams(source=model_id)
        elif model_type is ModelType.ENCODERDECODERASR:
            self.model = EncoderDecoderASR.from_hparams(source=model_id)

            # Reduce latency
            self.model.mods.decoder.beam_size = 1
        else:
            raise ValueError(
                f"{model_type.value} is invalid for automatic-speech-recognition"
            )

        # Please define a `self.sampling_rate` for this pipeline
        # to automatically read the input correctly
        self.sampling_rate = self.model.hparams.sample_rate

Ejemplo n.º 2

Mostrar archivo

Archivo: speech_to_text.py Proyecto: ariabee/applecore

    def getTranscription(self):
        asr_model = EncoderDecoderASR.from_hparams(
            source="speechbrain/asr-crdnn-rnnlm-librispeech",
            savedir="./pretrained_ASR")
        transcription = asr_model.transcribe_file(self.path_to_wav)
        return transcription.lower()


#with sr.Microphone() as source:
#    stt = SpeechToText()
#    r = sr.Recognizer()
#    audio = r.listen(source, timeout=5)
#    print("audio")
#    print(audio)
#    name = r.recognize_google(audio)
#    print("google works, why can't you?")
#    print(name)
#    stt.saveAudio(audio)
#    name = stt.getTranscription()
#    print(name)

Ejemplo n.º 3

Mostrar archivo

Archivo: scoring.py Proyecto: gcambara/speacher

    def call_huggingface(self, df):
        assert self.model_url != '', "Error! A model URL is needed for HuggingFace scoring, but --asr_download_model is empty"
        if self.tokenizer_url == '':
            print(
                f"Setting empty --tokenizer_url field identically to --asr_download_model: {self.model_url}"
            )
            self.tokenizer_url = self.model_url

        if self.scoring_sorting == 'ascending':
            df = df.sort_values(by=['n_frames']).reset_index(drop=True)
        elif self.scoring_sorting == 'descending':
            df = df.sort_values(by=['n_frames'],
                                ascending=False).reset_index(drop=True)
        elif self.scoring_sorting == '':
            pass
        else:
            raise NotImplementedError

        print(f"Preparing dataloader for manifest {self.manifest}...")
        dataset = AudioDataset(df)
        dataloader = DataLoader(dataset,
                                batch_size=self.batch_size,
                                collate_fn=dataset.collater,
                                num_workers=self.num_workers,
                                pin_memory=True)

        if self.hf_username == 'facebook':
            print(f"Downloading tokenizer: {self.tokenizer_url}")
            tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(
                self.tokenizer_url)

            print(f"Downloading model: {self.model_url}")
            model = Wav2Vec2ForCTC.from_pretrained(self.model_url)
        elif self.hf_username == 'speechbrain':
            if torch.cuda.is_available():
                run_opts = {"device": "cuda"}
            else:
                run_opts = {"device": "cpu"}
            print(f"Downloading model: {self.model_url}")
            model = EncoderDecoderASR.from_hparams(source=self.model_url,
                                                   run_opts=run_opts,
                                                   savedir=os.path.join(
                                                       'pretrained_models',
                                                       self.hf_modelname))
        else:
            raise NotImplementedError

        model.eval()

        print("Scoring dataset...")
        df['wer'] = np.nan

        for batch in tqdm(dataloader):
            indexes, waveforms, transcripts, wav_lens = batch

            if self.hf_username == 'facebook':
                output_logits = model(waveforms.squeeze()).logits
                predicted_ids = torch.argmax(output_logits, dim=-1)
                pred_transcripts = tokenizer.batch_decode(predicted_ids)
            elif self.hf_username == 'speechbrain':
                waveforms = waveforms.squeeze()
                #waveforms = model.audio_normalizer(waveforms, self.sampling_rate)
                pred_transcripts = model.transcribe_batch(waveforms,
                                                          wav_lens)[0]

            for index, ref in enumerate(transcripts):
                sample_id = indexes[index]
                ref = transcripts[index]
                pred = pred_transcripts[index]
                measures = jiwer.compute_measures(ref, pred)
                wer = measures['wer'] * 100.0
                assert (
                    ref == df.loc[int(sample_id), 'tgt_text']
                ), "The reference text indicated by the sample ID in the transcripts file does not match with the one stored in the dataset!"
                df.at[int(sample_id), 'wer'] = wer

        return df

Ejemplo n.º 4

Mostrar archivo

Archivo: asr.py Proyecto: eloyhernandezlua/MentalHealthProjectAI

# !pip install librosa

import time
from time import perf_counter
import numpy as np
import matplotlib.pyplot as plt
import librosa
from pydub import AudioSegment
import os
from google.colab import files
import moviepy.editor
from transformers import pipeline

from speechbrain.pretrained import EncoderDecoderASR

asr_model2 = EncoderDecoderASR.from_hparams(source="speechbrain/asr-crdnn-rnnlm-librispeech", savedir="pretrained_models/asr-crdnn-rnnlm-librispeech")

def transcribe_audio(fileList = []):
  listOfText = []
  if fileList == [] :
    uploaded = files.upload()
    listOfAudios = list(uploaded.keys())
  else:
    listOfAudios = fileList
  
  a = perf_counter()
  for file in listOfAudios:
    duration = librosa.get_duration(filename=file)
    t1 = 0
    t2 = duration * 1000 if duration < 30 else 30000
    textTemp = ""

Ejemplo n.º 5

Mostrar archivo

def load_asr_model():
    asr_model = EncoderDecoderASR.from_hparams(
        source="speechbrain/asr-transformer-transformerlm-librispeech",
        savedir="pretrained_model/")
    return asr_model

Ejemplo n.º 6

Mostrar archivo

def asr_model():
    """Load model for the CTC segmentation test."""

    asr_model = EncoderDecoderASR.from_hparams(
        source="speechbrain/asr-transformer-transformerlm-librispeech")
    return asr_model