Python Pocketsphinx.hyp Exemples, pocketsphinx.Pocketsphinx.hyp Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : pocketsphinx.py Projet : milobella/device

class PocketSphinxWUW(WUWInterface):
    def __init__(self, keyword: str, kws_threshold: float):
        self._decoder = Pocketsphinx(keyphrase=keyword,
                                     lm=False,
                                     kws_threshold=kws_threshold)
        self._sound = pyaudio.PyAudio()
        self._audio_stream = self._sound.open(rate=_SAMPLE_RATE,
                                              channels=1,
                                              format=pyaudio.paInt16,
                                              input=True,
                                              frames_per_buffer=_FRAME_LENGTH)

    def prepare(self) -> None:
        print("starting utterance")
        self._audio_stream.start_stream()
        self._decoder.start_utt()
        print("started utterance")

    def process(self) -> bool:
        buf = self._audio_stream.read(_FRAME_LENGTH)
        if buf:
            self._decoder.process_raw(buf, False, False)
        else:
            return False

        if self._decoder.hyp():
            print(self._decoder.hyp().hypstr)
            # print([(seg.word, seg.prob, seg.start_frame, seg.end_frame) for seg in self._decoder.seg()])
            # print("Detected keyphrase, restarting search")
            # for best, i in zip(self._decoder.nbest(), range(10)):
            #     print(best.hypstr, best.score)
            print("ending utterance")
            self._decoder.end_utt()
            self._audio_stream.stop_stream()
            print("ended utterance")
            return True
        return False

    def terminate(self) -> None:
        if self._audio_stream is not None:
            self._audio_stream.close()

        if self._sound is not None:
            self._sound.terminate()

Exemple #2

0

Afficher le fichier

                def detect():
                    from pocketsphinx import Pocketsphinx, Ad
                    ad = Ad(None, 16000)  # default input
                    decoder = Pocketsphinx(lm=False,
                                           hmm=hmm,
                                           dic=dic,
                                           keyphrase=keyphrase,
                                           kws_threshold=kws_threshold)

                    buf = bytearray(2048)
                    with ad:
                        with decoder.start_utterance():
                            while ad.readinto(buf) >= 0:
                                decoder.process_raw(buf, False, False)
                                if decoder.hyp():
                                    with decoder.end_utterance():
                                        logging.info('Wake word detected for %s' % system)
                                        wake_statuses[system] = 'detected'
                                        break

Exemple #3

0

Afficher le fichier

Fichier : stt_pocketsphinx.py Projet : brBart/rhasspy-assistant

            def decode():
                nonlocal decoder, decoded_phrase

                # Dynamically load decoder
                if decoder is None:
                    _LOGGER.debug('Loading decoder')
                    hass.states.async_set(OBJECT_POCKETSPHINX, STATE_LOADING, state_attrs)
                    decoder = Pocketsphinx(
                        hmm=acoustic_model,
                        lm=language_model,
                        dic=dictionary)
                    hass.states.async_set(OBJECT_POCKETSPHINX, STATE_DECODING, state_attrs)

                # Do actual decoding
                with decoder.start_utterance():
                    decoder.process_raw(recorded_data, False, True)  # full utterance
                    hyp = decoder.hyp()
                    if hyp:
                        with decoder.end_utterance():
                            decoded_phrase = hyp.hypstr

                decoded_event.set()

Exemple #4

0

Afficher le fichier

class HotwordRecognizer:
    """热词（唤醒词）识别器，对 |pocketsphinx| 的简单封装，默认的热词是 `'阿Q'` 和 `'R-cute`。

    如果要自定义热词，请参考 https://blog.51cto.com/feature09/2300352

    .. |pocketsphinx| raw:: html

        <a href='https://github.com/bambocher/pocketsphinx-python' target='blank'>pocketsphinx</a>

    .. |config| raw:: html

        <a href='https://github.com/bambocher/pocketsphinx-python#default-config' target='blank'>pocketsphinx Default config</a>

    :param hotword: 热词或热词列表，默认为 `['阿Q', 'R-cute']`
    :type hotword: str / list, optional
    :param hmm: 参考 |config|
    :type hmm: str, optional
    :param lm: 参考 |config|
    :type lm: str, optional
    :param dic: 参考 |config|
    :type dic: str, optional
    """
    def __init__(self, **kwargs):
        # signal.signal(signal.SIGINT, self.stop)
        self._no_search = False
        self._full_utt = False
        hotword = kwargs.pop('hotword', ['阿Q', 'R-cute'])
        self._hotwords = hotword if isinstance(hotword, list) else [hotword]

        model_path = get_model_path()
        opt = {
            'verbose': False,
            'hmm': os.path.join(model_path, 'en-us'),
            'lm': util.resource('sphinx/rcute.lm'),
            'dic': util.resource('sphinx/rcute.dic'),
        }
        opt.update(kwargs)
        self._rec = Pocketsphinx(**opt)

    def recognize(self, stream, timeout=None):
        """开始识别

        :param source: 声音来源
        :param timeout: 超时，即识别的最长时间（秒），默认为 `None` ，表示不设置超时，知道识别到热词才返回
        :type timeout: float, optional
        :return: 识别到的热词模型对应的热词，若超时没识别到热词则返回 `None`
        :rtype: str
        """
        self._cancel = False
        if timeout:
            count = 0.0
        in_speech = False
        with self._rec.start_utterance():
            while True:
                data = stream.raw_read()
                self._rec.process_raw(data, self._no_search, self._full_utt)
                if in_speech != self._rec.get_in_speech():
                    in_speech = not in_speech
                    if not in_speech and self._rec.hyp():
                        with self._rec.end_utterance():
                            hyp = self._rec.hypothesis()
                            if hyp in self._hotwords:
                                return hyp
                if self._cancel:
                    raise RuntimeError(
                        'Hotword detection cancelled by another thread')
                elif timeout:
                    count += source.frame_duration  #len(data) / 32000
                    if count > timeout:
                        return

    def cancel(self):
        """停止识别"""
        self._cancel = True

Exemple #5

0

Afficher le fichier

Fichier : stt_pocketsphinx.py Projet : brBart/rhasspy-assistant

        def decode():
            nonlocal decoder, decoded_phrase, data, filename

            # Check if WAV is in the correct format.
            # Convert with sox if not.
            with io.BytesIO(data) as wav_data:
                with wave.open(wav_data, mode='rb') as wav_file:
                    rate, width, channels = wav_file.getframerate(), wav_file.getsampwidth(), wav_file.getnchannels()
                    _LOGGER.debug('rate=%s, width=%s, channels=%s.' % (rate, width, channels))

                    if (rate != 16000) or (width != 2) or (channels != 1):
                        # Convert to 16-bit 16Khz mono (required by pocketsphinx acoustic models)
                        _LOGGER.debug('Need to convert to 16-bit 16Khz mono.')
                        if shutil.which('sox') is None:
                            _LOGGER.error("'sox' command not found. Cannot convert WAV file to appropriate format. Expect poor performance.")
                        else:
                            temp_input_file = None
                            if filename is None:
                                # Need to write original WAV data out to a file for sox
                                temp_input_file = tempfile.NamedTemporaryFile(suffix='.wav', mode='wb+')
                                temp_input_file.write(data)
                                temp_input_file.seek(0)
                                filename = temp_input_file.name

                            # sox <IN> -r 16000 -e signed-integer -b 16 -c 1 <OUT>
                            with tempfile.NamedTemporaryFile(suffix='.wav', mode='wb+') as out_wav_file:
                                subprocess.check_call(['sox',
                                                       filename,
                                                       '-r', '16000',
                                                       '-e', 'signed-integer',
                                                       '-b', '16',
                                                       '-c', '1',
                                                       out_wav_file.name])

                                out_wav_file.seek(0)

                                # Use converted data
                                with wave.open(out_wav_file, 'rb') as wav_file:
                                    data = wav_file.readframes(wav_file.getnframes())

                            if temp_input_file is not None:
                                # Clean up temporary file
                                del temp_input_file

            # Dynamically load decoder
            if decoder is None:
                _LOGGER.debug('Loading decoder')
                hass.states.async_set(OBJECT_POCKETSPHINX, STATE_LOADING, state_attrs)
                decoder = Pocketsphinx(
                    hmm=acoustic_model,
                    lm=language_model,
                    dic=dictionary)
                hass.states.async_set(OBJECT_POCKETSPHINX, STATE_DECODING, state_attrs)

            # Process WAV data as a complete utterance (best performance)
            with decoder.start_utterance():
                decoder.process_raw(data, False, True)  # full utterance
                if decoder.hyp():
                    with decoder.end_utterance():
                        decoded_phrase = decoder.hyp().hypstr

            decoded_event.set()