예제 #1
0
                def detect():
                    from pocketsphinx import Pocketsphinx, Ad
                    ad = Ad(None, 16000)  # default input
                    decoder = Pocketsphinx(lm=False,
                                           hmm=hmm,
                                           dic=dic,
                                           keyphrase=keyphrase,
                                           kws_threshold=kws_threshold)

                    buf = bytearray(2048)
                    with ad:
                        with decoder.start_utterance():
                            while ad.readinto(buf) >= 0:
                                decoder.process_raw(buf, False, False)
                                if decoder.hyp():
                                    with decoder.end_utterance():
                                        logging.info('Wake word detected for %s' % system)
                                        wake_statuses[system] = 'detected'
                                        break
예제 #2
0
            def decode():
                nonlocal decoder, decoded_phrase

                # Dynamically load decoder
                if decoder is None:
                    _LOGGER.debug('Loading decoder')
                    hass.states.async_set(OBJECT_POCKETSPHINX, STATE_LOADING, state_attrs)
                    decoder = Pocketsphinx(
                        hmm=acoustic_model,
                        lm=language_model,
                        dic=dictionary)
                    hass.states.async_set(OBJECT_POCKETSPHINX, STATE_DECODING, state_attrs)

                # Do actual decoding
                with decoder.start_utterance():
                    decoder.process_raw(recorded_data, False, True)  # full utterance
                    hyp = decoder.hyp()
                    if hyp:
                        with decoder.end_utterance():
                            decoded_phrase = hyp.hypstr

                decoded_event.set()
예제 #3
0
class HotwordRecognizer:
    """热词(唤醒词)识别器,对 |pocketsphinx| 的简单封装,默认的热词是 `'阿Q'` 和 `'R-cute`。

    如果要自定义热词,请参考 https://blog.51cto.com/feature09/2300352

    .. |pocketsphinx| raw:: html

        <a href='https://github.com/bambocher/pocketsphinx-python' target='blank'>pocketsphinx</a>

    .. |config| raw:: html

        <a href='https://github.com/bambocher/pocketsphinx-python#default-config' target='blank'>pocketsphinx Default config</a>

    :param hotword: 热词或热词列表,默认为 `['阿Q', 'R-cute']`
    :type hotword: str / list, optional
    :param hmm: 参考 |config|
    :type hmm: str, optional
    :param lm: 参考 |config|
    :type lm: str, optional
    :param dic: 参考 |config|
    :type dic: str, optional
    """
    def __init__(self, **kwargs):
        # signal.signal(signal.SIGINT, self.stop)
        self._no_search = False
        self._full_utt = False
        hotword = kwargs.pop('hotword', ['阿Q', 'R-cute'])
        self._hotwords = hotword if isinstance(hotword, list) else [hotword]

        model_path = get_model_path()
        opt = {
            'verbose': False,
            'hmm': os.path.join(model_path, 'en-us'),
            'lm': util.resource('sphinx/rcute.lm'),
            'dic': util.resource('sphinx/rcute.dic'),
        }
        opt.update(kwargs)
        self._rec = Pocketsphinx(**opt)

    def recognize(self, stream, timeout=None):
        """开始识别

        :param source: 声音来源
        :param timeout: 超时,即识别的最长时间(秒),默认为 `None` ,表示不设置超时,知道识别到热词才返回
        :type timeout: float, optional
        :return: 识别到的热词模型对应的热词,若超时没识别到热词则返回 `None`
        :rtype: str
        """
        self._cancel = False
        if timeout:
            count = 0.0
        in_speech = False
        with self._rec.start_utterance():
            while True:
                data = stream.raw_read()
                self._rec.process_raw(data, self._no_search, self._full_utt)
                if in_speech != self._rec.get_in_speech():
                    in_speech = not in_speech
                    if not in_speech and self._rec.hyp():
                        with self._rec.end_utterance():
                            hyp = self._rec.hypothesis()
                            if hyp in self._hotwords:
                                return hyp
                if self._cancel:
                    raise RuntimeError(
                        'Hotword detection cancelled by another thread')
                elif timeout:
                    count += source.frame_duration  #len(data) / 32000
                    if count > timeout:
                        return

    def cancel(self):
        """停止识别"""
        self._cancel = True
예제 #4
0
        def decode():
            nonlocal decoder, decoded_phrase, data, filename

            # Check if WAV is in the correct format.
            # Convert with sox if not.
            with io.BytesIO(data) as wav_data:
                with wave.open(wav_data, mode='rb') as wav_file:
                    rate, width, channels = wav_file.getframerate(), wav_file.getsampwidth(), wav_file.getnchannels()
                    _LOGGER.debug('rate=%s, width=%s, channels=%s.' % (rate, width, channels))

                    if (rate != 16000) or (width != 2) or (channels != 1):
                        # Convert to 16-bit 16Khz mono (required by pocketsphinx acoustic models)
                        _LOGGER.debug('Need to convert to 16-bit 16Khz mono.')
                        if shutil.which('sox') is None:
                            _LOGGER.error("'sox' command not found. Cannot convert WAV file to appropriate format. Expect poor performance.")
                        else:
                            temp_input_file = None
                            if filename is None:
                                # Need to write original WAV data out to a file for sox
                                temp_input_file = tempfile.NamedTemporaryFile(suffix='.wav', mode='wb+')
                                temp_input_file.write(data)
                                temp_input_file.seek(0)
                                filename = temp_input_file.name

                            # sox <IN> -r 16000 -e signed-integer -b 16 -c 1 <OUT>
                            with tempfile.NamedTemporaryFile(suffix='.wav', mode='wb+') as out_wav_file:
                                subprocess.check_call(['sox',
                                                       filename,
                                                       '-r', '16000',
                                                       '-e', 'signed-integer',
                                                       '-b', '16',
                                                       '-c', '1',
                                                       out_wav_file.name])

                                out_wav_file.seek(0)

                                # Use converted data
                                with wave.open(out_wav_file, 'rb') as wav_file:
                                    data = wav_file.readframes(wav_file.getnframes())

                            if temp_input_file is not None:
                                # Clean up temporary file
                                del temp_input_file

            # Dynamically load decoder
            if decoder is None:
                _LOGGER.debug('Loading decoder')
                hass.states.async_set(OBJECT_POCKETSPHINX, STATE_LOADING, state_attrs)
                decoder = Pocketsphinx(
                    hmm=acoustic_model,
                    lm=language_model,
                    dic=dictionary)
                hass.states.async_set(OBJECT_POCKETSPHINX, STATE_DECODING, state_attrs)

            # Process WAV data as a complete utterance (best performance)
            with decoder.start_utterance():
                decoder.process_raw(data, False, True)  # full utterance
                if decoder.hyp():
                    with decoder.end_utterance():
                        decoded_phrase = decoder.hyp().hypstr

            decoded_event.set()