Exemple #1
0
def ShiBie_ZiRanYuYan():
    wf = wave.open('yuyin.wav', "rb")
    model = Model("model")
    rec = KaldiRecognizer(model, wf.getframerate())
    wenben = ""
    while True:
        data = wf.readframes(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            res = json.loads(rec.Result())
            wenben = res['text']
            #print("识别结果:",wenben)
        else:
            pass
            #if '"text" : "' in rec.PartialResult():
            #wenben = rec.PartialResult()
            #print("部分识别结果:",rec.PartialResult())

    if wenben == "":
        res = json.loads(rec.FinalResult())
        wenben = res['text']


#    n = wenben.find('"text" : "')
#    wenben = wenben[n+10:].strip('}""')
    del_zf = ' "\n'
    for c in wenben:
        if c in del_zf:
            wenben = wenben.replace(c, '')

    return wenben
Exemple #2
0
def speech_to_text(args):
    if not os.path.exists(os.path.join('models', args.model)):
        print(
            "Please download the model from https://alphacephei.com/vosk/models and unpack to 'models' folder.")
        exit(1)

    for filepath in glob.iglob(os.path.join(os.getcwd(), args.data, '*.wav')):
        print(filepath)

        wf = wave.open(args.data, "rb")
        if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
            print("Audio file must be WAV format mono PCM.")
            exit(1)

        model = Model(args.model)
        rec = KaldiRecognizer(model, wf.getframerate())

        while True:
            data = wf.readframes(4000)
            if len(data) == 0:
                break
            if rec.AcceptWaveform(data):
                print(rec.Result())
            else:
                print(rec.PartialResult())

        print(rec.FinalResult())

        hypothesis_path = os.path.join(args.hypothesis, filepath.split('.')[0] + '.txt')
        with open(hypothesis_path, 'w') as hypothesis:
            hypothesis.write(rec.FinalResult())
Exemple #3
0
def ShiBie_ZiFu():
    if PanDuan == "":
        ZiFuJi = "继 续 检 搜 索 全 部 无 损 听 歌 播 放 音 乐 停 止 诗 词 单 曲 专 辑 循 环 顺 序 随 相 声 评 书 讲 坛 朗 读 关 机 复 制 上 下 一 个 从 头 添 加 收 藏 中 文 日 语 英 更 新 升 级 清 空 谁 多 少 什 么 唱 名 叫 他 的"
    else:
        ZiFuJi = "对 是 嗯 没 错"
    wenben = ""
    model = Model("model")
    rec = KaldiRecognizer(model, 16000, ZiFuJi)
    WaveWenJian = open("yuyin.wav", "rb")
    WaveWenJian.read(44)
    while True:
        data = WaveWenJian.read(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            res = json.loads(rec.Result())
            wenben = res['text']
            print("识别结果是: " + res['text'])

    res = json.loads(rec.FinalResult())
    if wenben == "":
        wenben = res['text']
    print("最终结果是: " + wenben)

    return wenben
def recognizer_process(queue_audio, queue_text):
    """
    as result: place into queue_text <- (text, True|False)  where:
        text - a str with recognizer result, to json.loads()
    """
    print('Worker started')
    rec = KaldiRecognizer(model, 8000)
    last_received = datetime.datetime.now()
    partial = True
    while True:
        queue_bytes = b''
        while not queue_audio.empty():
            last_received = datetime.datetime.now()
            queue_bytes += queue_audio.get()
        if rec.AcceptWaveform(queue_bytes):
            res = rec.Result()
            partial = False
            queue_text.put(res)

        if datetime.datetime.now() - datetime.timedelta(
                seconds=60) > last_received:
            if partial:
                queue_text.put(rec.FinalResult())
            print(f'Worker stopped ')
            time.sleep(1)
            return
        time.sleep(1)
def listen(model: Model,
           spk_model: SpkModel = None,
           speech_chunk_sec: float = 0.5,
           buffer_sec: float = 1):
    with ExitStack() as stack:
        rate = model.SampleFrequency()
        if spk_model:
            rec = KaldiRecognizer(model, spk_model, rate)
        else:
            rec = KaldiRecognizer(model, rate)
        p = stack.enter_context(_pyaudio())
        s = stack.enter_context(
            _pyaudio_open_stream(p,
                                 format=paInt16,
                                 channels=1,
                                 rate=rate,
                                 input=True,
                                 frames_per_buffer=int(rate * buffer_sec)))
        while True:
            data = s.read(int(rate * speech_chunk_sec))
            if rec.AcceptWaveform(data):
                res = json.loads(rec.Result())
                logging.info(res)
            else:
                res = json.loads(rec.PartialResult())
                logging.info(res)
 def _command_check(self):
     print('+ Switch to command mode')
     self._cmd_start_t = time.time()
     speech_recognizer = KaldiRecognizer(self._vosk_model, 16000)
     p = pyaudio.PyAudio()
     stream = p.open(format=pyaudio.paInt16,
                     channels=1,
                     rate=16000,
                     input=True,
                     frames_per_buffer=8000)
     stream.start_stream()
     self._cmd_start_f()
     while not self._command_interrupt_check():
         data = stream.read(4000)
         if len(data) == 0:
             break
         if speech_recognizer.AcceptWaveform(data):
             jdata = json.loads(speech_recognizer.Result())
             cmd = jdata.get("text")
             print("CMD:", cmd, end=f"\n-----{'-'*len(cmd)}\n")
             if cmd:
                 self._handle_command(cmd)
     stream.stop_stream()
     stream.close()
     self._cmd_stop_f()
Exemple #7
0
def audio_speech_recognition(model_path, audio_path):
    """
    Recognizes text from audio file

    :param model_path: str
    :return: None
    """

    # checking if file has .mp3 format and convert it to .wav
    result = ""
    with open(audio_path, 'rb') as wf:
        model = Model(model_path)
        rec = KaldiRecognizer(model, 16000)

        wf.read(44)

        while True:
            data = wf.read(4000)
            if len(data) == 0:
                break
            if rec.AcceptWaveform(data):
                res = json.loads(rec.Result())
                #print(res,29)
                print(res['text'])
                result = result + res['text']
        '''res = json.loads(rec.FinalResult())
        print(rec.FinalResult(),33)
        print(rec.Result(),34)
        print(res['text'],35)'''
        print(result)
        return result
Exemple #8
0
def audio_to_txt(file_name):

    os.system(f'ffmpeg -i {file_name} out.wav')

    model = Model("model")

    # Large vocabulary free form recognition
    rec = KaldiRecognizer(model, 16000)

    wf = wave.open('out.wav', "rb")

    if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype(
    ) != "NONE":
        print("Audio file must be WAV format mono PCM.")
        exit(1)
    rec = KaldiRecognizer(model, wf.getframerate())
    transcript = ''
    while True:
        data = wf.readframes(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            json_dict = json.loads(rec.Result())
            transcript += json_dict['text']
        else:
            rec.PartialResult()
    os.system(f'rm out.wav')
    return transcript
Exemple #9
0
def speech_to_text(file):
    SetLogLevel(0)

    timestamp = []
    text = []

    #ouverture fichier audio
    with wave.open(file, "rb") as wav_file:

        #entrainement modèle avec les données
        model = Model("model")
        rec = KaldiRecognizer(model, wav_file.getframerate())

        #lecture du fichier audio par bloc de frames
        data = wav_file.readframes(4000)
        while len(data) != 0:
            if rec.AcceptWaveform(data):

                #récupération de la transcription par json
                res = json.loads(rec.Result())

                #ajout de l'horodatage et de la transcription dans les listes
                #si ils existent
                if ('result' in res):
                    timestamp.append(res['result'][0]['start'])
                    text.append(res['text'])

            #lecture du bloc de frames suivant
            data = wav_file.readframes(4000)

    return timestamp, text
Exemple #10
0
def upload_voice_input(request):
    if request.method == "POST":
        myFile = request.FILES.get("myfile", None)
        if not myFile:
            print("no files for upload!")
            return HttpResponse("no files for upload!")
        destination = open(os.path.join("media/voice", myFile.name), 'wb+')
        for chunk in myFile.chunks():
            destination.write(chunk)
        destination.close()

        rec = KaldiRecognizer(vosk_model, 16000)
        wf = wave.open(BASE_DIR + '/media/voice/voicehome.wav', "rb")

        while True:
            data = wf.readframes(4000)
            if len(data) == 0:
                break
            if rec.AcceptWaveform(data):
                rec.Result()

        data = json.loads(rec.FinalResult())
        voicetext = data['text']
        print(voicetext)

        selectitem = dragon_cf['voicerec'][voice_section]
        item_value_array = selectitem.split(',')
        if voicetext not in item_value_array:
            newvalue = selectitem + ',' + voicetext
            dragon_cf.set('voicerec', voice_section, newvalue)
            dragon_cf.write(open(voice_rec_config, 'w'))
            return HttpResponse("voice added added added")

        return HttpResponse("voice already exist")
 def process_file(self, file_name):
     """
     Run the Vosk model on the input file
     :param file_name: Input wav or mp3 file
     :return: List of dictionaries containing: confidence, start time, end time and the predicted word
     """
     logger.info(f'Recognising speech for {file_name}')
     wf = wave.open(file_name, "rb")
     # Check to see if the audio file can be read by the Vosk model
     if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype(
     ) != "NONE":
         raise Exception(f'Invalid file format for {file_name}')
     rec = KaldiRecognizer(self.model, wf.getframerate())
     results = []
     while True:
         data = wf.readframes(config.frame_to_read)
         # If the data we have read is empty then we are at the end of the file
         if len(data) == 0:
             break
         if rec.AcceptWaveform(data):
             result = json.loads(rec.Result())
             # Result can contain an empty text string but no result list
             if len(result['text']) > 0:
                 # If we reach here we have accepted the translation of a section of text
                 results.extend(result['result'])
     result = json.loads(rec.FinalResult())
     # Add to results list
     if len(result['text']) > 0:
         results.extend(result['result'])
     logger.info(f'Processed speech, captured {len(results)} results')
     return results
Exemple #12
0
def translate_file(filename="last5.wav"):
    SetLogLevel(0)

    if not os.path.exists("model"):
        print(
            "Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder."
        )
        exit(1)
    filepath = "./" + filename
    wf = wave.open(filepath, "rb")
    if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype(
    ) != "NONE":
        print("Audio file must be WAV format mono PCM.")
        exit(1)

    model = Model("./model")
    rec = KaldiRecognizer(model, wf.getframerate())

    while True:
        data = wf.readframes(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            print(rec.Result())
        else:
            print(rec.PartialResult())
    results = rec.FinalResult()
    return json.loads(results)[
        "text"]  #["results"] for confidence of each word
    def meu_comando():  #função que retorna o que foi falado em forma de string
        p = pyaudio.PyAudio()
        stream = p.open(format=pyaudio.paInt16,
                        channels=1,
                        rate=16000,
                        input=True,
                        frames_per_buffer=8000)
        stream.start_stream()

        model = Model("vosk-model-small-pt-0.3"
                      )  #localiza o arquivo de reconhecimento de voz
        rec = KaldiRecognizer(model, 16000)
        print("Fale algo")

        while True:

            data = stream.read(2000)
            if len(data) == 0:
                break
            if rec.AcceptWaveform(data):
                meuResultado = rec.Result()
                minhaLista = meuResultado.split(
                    "text"
                )  #o que foi falado na posição text é retornado em lista
                comando = minhaLista[1]  #
                stream.stop_stream()
                stream.close()
                p.terminate()
                resultado = re.findall(
                    r'\w+',
                    comando)  #expressão regular parar pegar todas as letras
                resultadofinal = " ".join(
                    resultado)  #transforma a lista em string limpa
                return resultadofinal
def use_offline_recognition():
    """
    Переключение на оффлайн-распознавание речи
    :return: распознанная фраза
    """
    recognized_data = ""
    try:
        # проверка наличия модели на нужном языке в каталоге приложения
        if not os.path.exists("models/vosk-model-small-" + assistant.speech_language + "-0.4"):
            print(colored("Please download the model from:\n"
                          "https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.",
                          "red"))
            exit(1)

        # анализ записанного в микрофон аудио (чтобы избежать повторов фразы)
        wave_audio_file = wave.open("microphone-results.wav", "rb")
        model = Model("models/vosk-model-small-" + assistant.speech_language + "-0.4")
        offline_recognizer = KaldiRecognizer(model, wave_audio_file.getframerate())

        data = wave_audio_file.readframes(wave_audio_file.getnframes())
        if len(data) > 0:
            if offline_recognizer.AcceptWaveform(data):
                recognized_data = offline_recognizer.Result()

                # получение данных распознанного текста из JSON-строки (чтобы можно было выдать по ней ответ)
                recognized_data = json.loads(recognized_data)
                recognized_data = recognized_data["text"]
    except:
        traceback.print_exc()
        print(colored("Sorry, speech service is unavailable. Try again later", "red"))

    return recognized_data
Exemple #15
0
def get_transcript(wav_filename):
    """
    From a WAV filename, use vosk to generate a Transcript object
    See example code https://github.com/alphacep/vosk-api/blob/master/python/example/test_simple.py
    """
    transcript_text = []
    transcript = Transcript()
    wav_file = wave.open(wav_filename, "rb")
    if wav_file.getnchannels() != 1 or wav_file.getsampwidth(
    ) != 2 or wav_file.getcomptype() != "NONE":
        print("Audio file must be WAV format mono PCM.")
        raise Exception("Audio file must be WAV format mono PCM.")
    model = Model(MODEL_DIR)
    rec = KaldiRecognizer(model, wav_file.getframerate())
    rec.SetWords(True)
    while True:
        data = wav_file.readframes(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            result = rec.Result()
            json_result = json.loads(result)
            if (exists(json_result, 'result')):
                for word in json_result['result']:
                    item = Item()
                    item.start_time = word['start']
                    item.end_time = word['end']
                    item.confidence = word['conf']
                    item.content = word['word']
                    transcript.items.append(item)
            if (exists(json_result, 'text')):
                transcript_text.append(json_result['text'])
    transcript.text = ' '.join(transcript_text)
    return transcript
Exemple #16
0
def wav2str(filename, sample_rate=16000, foldername="voskmodel"):
    # this is the name of the model folder
    model = Model(foldername)
    rec = KaldiRecognizer(model, sample_rate)

    wf = wave.open(filename, "rb")
    if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype(
    ) != "NONE":
        print("Audio file must be WAV format mono PCM.")
        exit(1)

    results = []
    subs = []
    while True:
        data = wf.readframes(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            results.append(rec.Result())
    results.append(rec.FinalResult())

    Strings = []
    for i, res in enumerate(results):
        jres = json.loads(res)
        if not 'result' in jres:
            continue
        words = jres['result']
        for j in range(len(words)):
            Strings.append(words[j]['word'])
    return Strings
Exemple #17
0
    def _get_data_in_audio(self,audio_wav_path: str):
        """
        :param audio_wav_path:
                    -path to wav
        :return:
        """

        wf = wave.open(audio_wav_path, "rb")
        if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
            # check file not bead
            return
        # список для объединения результатов
        result = list()
        # wf.getframerate()->Возвращает частоту дискретизации.
        rec = KaldiRecognizer(self.model, wf.getframerate())
        while True:
            data = wf.readframes(1000)
            if len(data) == 0:
                break
            if rec.AcceptWaveform(data):
                # get result in JSON
                data = rec.Result()
                jsonData = json.loads(data)
                result.append(jsonData['text'])
        jsonData = json.loads(rec.FinalResult())
        # data is void
        if 'result' in jsonData:
            result.append(jsonData.get('text'))
        wf.close()
        self.raw_data = result
        return result
Exemple #18
0
def takeCommand():
    if not os.path.exists("model1"):
        print(
            "Please download the model from https://github.com/alphacep/vosk-api/blob/master/doc/models.md and unpack as 'model' in the current folder.")
        exit(1)

    model = Model("model1")
    rec = KaldiRecognizer(model, 16000)

    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8000)
    stream.start_stream()

    print("Start SPEAKING:-")
    subprocess.call(['/usr/bin/canberra-gtk-play', '--id', 'bell'])

    while True:
        final_string = ''
        data = stream.read(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            curr = eval(rec.Result())['text']
            final_string = final_string + curr
            print(final_string)
            return final_string.lower()
        else:
            continue
Exemple #19
0
class VoskVoiceToTextCalculator(Calculator):

    def __init__(self, name, s, options=None):
        from vosk import Model, KaldiRecognizer
        super().__init__(name, s, options)
        self.model = Model("model")
        self.rec = KaldiRecognizer(self.model, 16000)
        self.output_data = [None, None]

    def process(self):
        audio = self.get(0)
        if isinstance(audio, AudioData):
            if self.rec.AcceptWaveform(audio.audio):
                result = self.rec.Result()
                try:
                    result_json = json.loads(result)
                except json.decoder.JSONDecodeError as e:
                    print("Voice2Text: Failed to parse voice json:", e)
                    print(result)
                else:
                    if 'text' in result_json:
                        text = result_json['text']
                        if text:
                            print("Voice2Text:", repr(text), result_json)
                            self.set_output(0, VoiceTextData(text, audio.timestamp, info=result_json))
            else:
                partial_result = self.rec.PartialResult()
                partial_json = json.loads(partial_result)
                if 'partial' in partial_json:
                    text = partial_json['partial']
                    if text:
                        print("Voice2Text (partial): ", repr(text))
                        self.set_output(1, VoiceTextData(text, audio.timestamp, info=partial_json))
            return True
        return False
Exemple #20
0
def myChat():
    # "listens for chatter"
    # We imported vosk up above.
    p = pyaudio.PyAudio()
    # MyFormat = pyaudio.paInt16
    # MyChannels = 1
    # MyRate = 16000
    # MyInput = True
    # MyFPB = 8000
    stream = p.open(format=pyaudio.paInt16,
                    channels=1,
                    rate=16000,
                    input=True,
                    frames_per_buffer=8000)
    stream.start_stream()
    model = Model("model")
    rec = KaldiRecognizer(model, 16000)
    while True:
        data = stream.read(2000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            myResult = rec.Result()
            myList = myResult.split("text")
            chatter = myList[1]
            stream.stop_stream()
            stream.close()
            p.terminate()
            return chatter
    def recognize(self):
        if not os.path.exists("Speech_Recognition/model"):
            print(
                "Please create speech model as 'model' in the current folder.")
            exit(1)
        sound = AudioSegment.from_wav(self.file_folder)
        sound = sound.set_channels(1)
        sound.export("path.wav", format="wav")
        wf = wave.open('path.wav', "rb")
        if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype(
        ) != "NONE":
            print("Audio file must be WAV format mono PCM.")
            exit(1)

        model = Model("Speech_Recognition/model")
        rec = KaldiRecognizer(model, wf.getframerate())
        result = ''

        while True:
            data = wf.readframes(4000)
            if len(data) == 0:
                break
            if rec.AcceptWaveform(data):
                x = json.loads(rec.Result())
                result += x['text'] + ' '
            else:
                pass
        result += json.loads(rec.FinalResult())['text']

        return result
class WakeWordDetector:
    """唤醒词检测器,对 `vosk-api <https://github.com/alphacep/vosk-api>`_ 的简单封装,默认的唤醒词是 `'阿Q'` 和 `'R-Cute'`。

    如果要自定义唤醒词,请参考 https://github.com/alphacep/vosk-api/blob/master/python/example/test_words.py
    """
    def __init__(
        self,
        sr=16000,
        lang='en',
        grammar='[ "a b c d e f g h i j k l m n o p q r s t u v w x y z key cute", "[unk]" ]'
    ):
        self.load(lang)
        self._det = KaldiRecognizer(util.cache[f'vosk.{lang}'], sr, grammar)

    def _detected(self, text):
        if text == 'r q':
            return '阿Q'
        elif text == 'r cute':
            return 'R-Cute'

    def load(self, lang='en'):
        """load language model in advance"""
        model = util.cache.get(f'vosk.{lang}',
                               Model(util.data_file(f'vosk/{lang}')))
        util.cache[f'vosk.{lang}'] = model

    def detect(self, source, timeout=None):
        """开始检测

        :param source: 声音来源
        :param timeout: 超时,即检测的最长时间(秒),默认为 `None` ,表示不设置超时,知道检测到唤醒词才返回
        :type timeout: float, optional
        :return: 检测到的唤醒词模型对应的唤醒词,若超时没检测到唤醒词则返回 `None`
        :rtype: str
        """
        self._cancel = False  # possible race condition?
        if timeout:
            count = 0.0
        self._det.FinalResult()  # clear buffer
        while True:
            segment = source.read()
            if self._det.AcceptWaveform(segment.raw_data):
                p = self._detected(json.loads(self._det.Result())['text'])
            else:
                p = self._detected(
                    json.loads(self._det.PartialResult())['partial'])
            if p:
                return p
            if self._cancel:
                return
                # raise RuntimeError('Hotword detection cancelled by another thread')
            elif timeout:
                count += segment.duration_seconds
                if count > timeout:
                    return  # self._detected(self._det.FinalResult()['text'])

    def cancel(self):
        """停止检测"""
        self._cancel = True
 def next_sentence(self, process):
     reconizer = KaldiRecognizer(self.vosk_model, self.sample_rate)
     while True:
         data = process.stdout.read(8000)
         if len(data) == 0:
             break
         if reconizer.AcceptWaveform(data):
             yield self.format_result(reconizer.Result())
     yield self.format_result(reconizer.FinalResult(), final=True)
Exemple #24
0
def main():

    argv = sys.argv[1:]
    model_path = "./model"
    filename = ""

    try:

        opts, _ = getopt.getopt(argv, "f:m:", ["file_name =", "model_path ="])

        #print(opts)
        #print(args)

    except:
        print("Error with arguments")
        return

    for opt, arg in opts:
        if opt in ['-f', '--file_name']:
            filename = arg
        elif opt in ['-m', '--model_path']:
            model_path = arg

    print("FILE: ", filename, " MODEL: ", model_path)

    if not os.path.exists(model_path):
        print(
            "Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder."
        )
        return

    SetLogLevel(-1)
    sample_rate = 16000
    model = Model(model_path)
    rec = KaldiRecognizer(model, sample_rate)

    process = subprocess.Popen([
        'ffmpeg', '-loglevel', 'quiet', '-i', filename, '-ar',
        str(sample_rate), '-ac', '1', '-f', 's16le', '-'
    ],
                               stdout=subprocess.PIPE)

    result = ""
    while True:
        data = process.stdout.read(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            data = json.loads(rec.Result())
            result += data['text']

    #print(result)
    data = json.loads(rec.FinalResult())
    result += data['text']
    print("\n")
    print(result)
Exemple #25
0
 def StreamingRecognize(self, request_iterator, context):
     request = next(request_iterator)
     partial = request.config.specification.partial_results
     recognizer = KaldiRecognizer(self.model, request.config.specification.sample_rate_hertz)
     for request in request_iterator:
         res = recognizer.AcceptWaveform(request.audio_content)
         if res:
             yield self.get_response(recognizer.Result())
         elif partial:
             yield self.get_response(recognizer.PartialResult())
     yield self.get_response(recognizer.FinalResult())
class Recognizer:
    def __init__(self, pathToModel):
        self.answer = "None"
        self.modelFlag = False
        self.pyAudioFlag = False
        self.pathToModel = pathToModel

    def setupModel(self):
        if not os.path.exists(self.pathToModel):
            print(
                "Please download the model from https://github.com/alphacep/vosk-api/blob/master/doc/models.md and unpack as 'model' in the current folder."
            )
            exit(1)
        self.model = Model(self.pathToModel)
        self.rec = KaldiRecognizer(self.model, 16000)
        self.modelFlag = True
        self.startPyaudio()

    def startPyaudio(self):
        self.p = pyaudio.PyAudio()
        self.stream = self.p.open(format=pyaudio.paInt16,
                                  channels=1,
                                  rate=16000,
                                  input=True,
                                  frames_per_buffer=8000)
        self.stream.start_stream()
        self.pyAudioFlag = True

    def stopPyaudio(self):
        self.stream.stop_stream()
        self.pyAudioFlag = False

    def runTimedRecognition(self):
        n = 1000
        while True:
            data = self.stream.read(4000)
            if len(data) == 0:
                break
            if self.rec.AcceptWaveform(data):
                result = self.rec.Result()
                print(result)
                d = json.loads(str(result))
                myStr = d["text"]
                print(myStr)
                self.answer = myStr
                return myStr
            else:
                print(self.rec.PartialResult())
            if n == 0:
                break
            print(n)
            n -= 1
def reconize(model_path, process):
    vosk_model = Model(model_path)
    reconizer = KaldiRecognizer(vosk_model, sample_rate)
    reconizer.SetWords(True)

    while True:
        data = process.stdout.read(8000)
        if len(data) == 0:
            break
        if reconizer.AcceptWaveform(data):
            yield format_result(reconizer.Result())

    yield format_result(reconizer.FinalResult())
Exemple #28
0
class Decoder:
    def __init__(self):
        model = Model("/home/pi/Documents/DOORS/modules/model")
        self.rec = KaldiRecognizer(model, 8000)

    def decode_file(self, aud_file):
        SetLogLevel(0)

        wf = wave.open(aud_file, "rb")
        if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype(
        ) != "NONE":
            print("Audio aud_file must be WAV format mono PCM.")
            exit(1)

        results = []

        while True:
            data = wf.readframes(4000)
            if len(data) == 0:
                break
            if self.rec.AcceptWaveform(data):
                results.append(self.rec.Result())

        for i in results:
            y = json.loads(i)
            print("---VOSK TEXT---", y["text"])
        print("results:", results)
        return results

    def decode_stream(self, socket, initData):
        fname = 'temp.wav'
        cur = 1
        obj = wave.open(fname, 'wb')
        obj.setchannels(1)  #mono
        obj.setsampwidth(2)
        obj.setframerate(8000)
        obj.writeframesraw(initData)
        obj.close
        results = self.decode_file(fname)
        print("results " + cur + ":" + results)

        while true:
            obj = wave.open(fname, 'wb')
            obj.setchannels(1)  #mono
            obj.setsampwidth()
            obj.setframerate(8000)
            obj.writeframesraw(socket.read(1024))
            obj.close
            results = self.decode_file(fname)
            print("results " + cur + ":" + results)
            cur += 1
class VoskSpeechDetector(SpeechDetector):
    def __init__(self, model: Model, sample_rate):
        self._model = model
        self._sample_rate = sample_rate
        self._bytes_per_iter = 4096

        self._recognizer: Optional[KaldiRecognizer] = None

    def detect(self, file) -> Generator[Speech, None, None]:
        self._recognizer = KaldiRecognizer(self._model, self._sample_rate)

        for data in self._read_audio(file):
            if self._recognizer.AcceptWaveform(data):
                speech = self._parse_speech()
                if speech:
                    yield speech

        speech = self._parse_speech()
        if speech:
            yield speech

    def _parse_speech(self) -> Optional[Speech]:
        result = json.loads(self._recognizer.Result())

        if result['text'] == '':
            return None

        first_phrase = result['result'][0]
        last_phrase = result['result'][-1]

        begin_of_speech = timedelta(seconds=first_phrase['start'])
        end_of_speech = timedelta(seconds=last_phrase['end'])

        return Speech(result['text'], begin_of_speech, end_of_speech)

    def _read_audio(self, file):
        command = [
            'ffmpeg', '-loglevel', 'quiet', '-i', file, '-ar',
            str(self._sample_rate), '-ac', '1', '-f', 's16le', '-'
        ]

        process = subprocess.Popen(command, stdout=subprocess.PIPE)

        while True:
            data = process.stdout.read(self._bytes_per_iter)

            if len(data) == 0:
                break

            yield data
Exemple #30
0
    def video2data(self, url):
        """Получаем распознанный текст ролика по его url
        """
        current_dir = os.getcwd()
        os.chdir(self.path)
        ydl_opts = {
            'format':
            'bestaudio/best',
            'writeinfojson':
            'info',
            'postprocessors': [{
                'key': 'FFmpegExtractAudio',
                'preferredcodec': 'mp3',
                'preferredquality': '192',
            }],
            'progress_hooks': [self._catch_filename],
        }
        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])

        time.sleep(20)
        video_description = self._downloaded_data()

        model = Model(self.kaldi_path)
        rec = KaldiRecognizer(model, 16000)

        process = subprocess.Popen([
            'ffmpeg', '-loglevel', 'quiet', '-i',
            os.path.join(self.path, self.filename), '-ar',
            str(16_000), '-ac', '1', '-f', 's16le', '-'
        ],
                                   stdout=subprocess.PIPE)

        full_text = ''
        while True:
            data = process.stdout.read(4000)
            if len(data) == 0:
                break
            if rec.AcceptWaveform(data):
                res = json.loads(rec.Result())
                full_text += ' ' + res['text']
        full_text += ' ' + json.loads(rec.FinalResult())['text']

        os.remove(os.path.join(self.path, self.description_file))
        os.remove(os.path.join(self.path, self.filename))

        os.chdir(current_dir)
        return full_text, video_description