Exemple #1
0
    def setup(self):
        # PocketSphinx configuration
        ps_config = Decoder.default_config()

        # Set recognition model to US
        ps_config.set_string('-hmm', os.path.join(get_model_path(), 'en-us'))
        ps_config.set_string(
            '-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict'))

        # Specify recognition key phrase
        ps_config.set_string('-keyphrase', self._tconfig['phrase'])
        ps_config.set_float('-kws_threshold',
                            float(self._tconfig['threshold']))

        # Hide the VERY verbose logging information when not in debug
        if logging.getLogger('alexapi').getEffectiveLevel() != logging.DEBUG:

            null_path = '/dev/null'
            if platform.system() == 'Windows':
                null_path = 'nul'

            ps_config.set_string('-logfn', null_path)

        # Process audio chunk by chunk. On keyword detected perform action and restart search
        self._detector = Decoder(ps_config)
Exemple #2
0
 def build_decoder(self):
     config = Decoder.default_config()
     config.set_string(
         "-dict", os.path.join(self.MODEL_DIR, "cmudict-en-us.dict")
     )
     config.set_string(
         "-fdict", os.path.join(self.MODEL_DIR, "en-us/noisedict")
     )
     config.set_string(
         "-featparams", os.path.join(self.MODEL_DIR, "en-us/feat.params")
     )
     config.set_string(
         "-tmat", os.path.join(self.MODEL_DIR, "en-us/transition_matrices")
     )
     config.set_string("-hmm", os.path.join(self.MODEL_DIR, "en-us"))
     config.set_string("-lm", os.path.join(self.MODEL_DIR, "en-us.lm.bin"))
     config.set_string("-mdef", os.path.join(self.MODEL_DIR, "en-us/mdef"))
     config.set_string("-mean", os.path.join(self.MODEL_DIR, "en-us/means"))
     config.set_string(
         "-sendump", os.path.join(self.MODEL_DIR, "en-us/sendump")
     )
     config.set_string(
         "-var", os.path.join(self.MODEL_DIR, "en-us/variances")
     )
     null_path = "/dev/null"
     if sys.platform == "win32":
         null_path = "NUL"
     config.set_string("-logfn", null_path)
     return Decoder(config)
Exemple #3
0
    def setup(self):
        # PocketSphinx configuration
        ps_config = Decoder.default_config()

        # Set recognition model to US
        ps_config.set_string(
            '-hmm', os.path.join(get_model_path(), self._tconfig['language']))
        ps_config.set_string(
            '-dict', os.path.join(get_model_path(),
                                  self._tconfig['dictionary']))

        # Specify recognition key phrase
        #ps_config.set_string('-keyphrase', self._tconfig['phrase'])
        #ps_config.set_float('-kws_threshold', float(self._tconfig['threshold']))

        ### Multiple Hotwords
        #ps_config.set_string('-inmic', 'yes')
        ps_config.set_string('-kws', '/opt/AlexaPi/src/keyphrase.list')

        # Hide the VERY verbose logging information when not in debug
        if logging.getLogger('alexapi').getEffectiveLevel() != logging.DEBUG:
            ps_config.set_string('-logfn', '/dev/null')

        # Process audio chunk by chunk. On keyword detected perform action and restart search
        self._decoder = Decoder(ps_config)
Exemple #4
0
    def __init__(self, phrase, threshold, device_index=0):

        self._decoder = None
        self._pa = None
        self._device_no = device_index
        self._phrase = phrase
        self._threshold = float(threshold)

        # PocketSphinx configuration
        logging.info('Phrase: ' + phrase + ' Threshold: ' + str(threshold))
        ps_config = Decoder.default_config()

        # Set recognition model to US
        ps_config.set_string('-hmm',
                             os.path.join(get_model_path_keyword(), 'en-us'))
        ps_config.set_string(
            '-dict',
            os.path.join(get_model_path_keyword(), 'cmudict-en-us.dict'))
        # Specify recognition key phrase
        ps_config.set_string('-keyphrase', self._phrase)
        ps_config.set_float('-kws_threshold', self._threshold)
        ps_config.set_string('-logfn', '/dev/null')

        # Process audio chunk by chunk. On keyword detected perform action and restart search
        self._decoder = Decoder(ps_config)
        self._pa = pyaudio.PyAudio()
Exemple #5
0
def create_decoder():
    base = os.path.join(root(), 'pocketsphinx', 'zero_ru_cont_8k_v3')
    hmm = os.path.join(base, 'zero_ru.cd_semi_4000')  # - mobile?
    # hmm = os.path.join(base, 'zero_ru.cd_cont_4000')
    # hmm = os.path.join(base, 'zero_ru.cd_ptm_4000') - mobile?

    dict = os.path.join(base, 'ru.dic.orig')
    # dict = os.path.join(base, 'ru.dic')
    lm = os.path.join(base, 'ru.lm.orig')

    # kws = os.path.join(base, 'ru.dic.orig.keywords')
    kws = os.path.join(base, 'keywords.mini')

    decoder_config = Decoder.default_config()
    decoder_config.set_string('-hmm', hmm)

    decoder_config.set_string("-lm", lm)
    # decoder_config.set_string('-keyphrase', 'алекса')
    # decoder_config.set_float('-kws_threshold', 1e-20)
    # decoder_config.set_string('-kws', kws)

    decoder_config.set_string('-dict', dict)
    decoder_config.set_boolean('-remove_noise', False)
    decoder_config.set_float('-samprate', 8000)
    decoder_config.set_string('-logfn', os.devnull)

    decoder = Decoder(decoder_config)

    return decoder
Exemple #6
0
    def create_decoder():
        path = os.path.dirname(os.path.realpath(__file__))
        pocketsphinx_data = os.getenv('POCKETSPHINX_DATA',
                                      os.path.join(path, 'pocketsphinx'))
        hmm = os.getenv('POCKETSPHINX_HMM',
                        os.path.join(pocketsphinx_data, 'tdt_sc_8k'))
        dict = os.getenv('POCKETSPHINX_DIC',
                         os.path.join(pocketsphinx_data, 'keywords.dic'))
        kws = os.getenv('POCKETSPHINX_KWS',
                        os.path.join(pocketsphinx_data, 'keywords.kws'))
        lm = os.getenv('POCKETSPHINX_LM',
                       os.path.join(pocketsphinx_data, 'keywords.lm'))
        log = os.getenv('POCKETSPHINX_LOG',
                        os.path.join(pocketsphinx_data, 'log'))

        config = Decoder.default_config()
        config.set_string('-hmm', hmm)
        config.set_string('-lm', lm)
        config.set_string('-dict', dict)
        # config.set_string('-kws', kws)
        # config.set_int('-samprate', SAMPLE_RATE) # uncomment if rate is not 16000. use config.set_float() on ubuntu
        config.set_int('-nfft', 512)
        #config.set_float('-vad_threshold', 2.7)
        config.set_string('-logfn', log)

        return Decoder(config)
Exemple #7
0
def audio2phoneme(audio_file):
    wave_read = wave.open(audio_file, 'rb')
    length = wave_read.getnframes() / wave_read.getframerate()
    wave_read.close()

    # Decode streaming data.
    decoder = Decoder(config)

    buf = bytearray(1024)
    with open(audio_file, 'rb') as f:
        decoder.start_utt()
        while f.readinto(buf):
            decoder.process_raw(buf, False, False)
        decoder.end_utt()

    nframes = decoder.n_frames()

    phonemes = []
    offset = None
    for seg in decoder.seg():
        if offset is None:
            offset = seg.start_frame
        start_frame = seg.start_frame - offset
        end_frame = seg.end_frame - offset
        phonemes.append((seg.word, start_frame / nframes * length,
                         end_frame / nframes * length))

    return phonemes
Exemple #8
0
    def process_file(self, audiofile):
        """
        processes audio file and returns the text
        """
        with open(audiofile, 'rb') as audiofile:
            decoder = Decoder(self.config)
            decoder.start_utt()

            while True:
                buf = audiofile.read(1024)
                if buf:
                    decoder.process_raw(buf, False, False)
                else:
                    break
            decoder.end_utt()

            hyp = decoder.hyp()
            print "Hyp:", hyp

            if hyp != None:
                print "Hyp Score", (hyp.prob, hyp.best_score)
                average_score = 0
                seg_count = 0
                for seg in decoder.seg():
                    if seg.word != "<sil>":
                        seg_count += 1
                        average_score += seg.ascore
                        print(seg.word, seg.ascore, seg.lscore)

                print "hyp:", hyp.hypstr
                print average_score / seg_count
                return hyp.hypstr
        return None
    def __init__(self, keyword, sensitivity):
        config = Decoder.default_config()
        config.set_string('-logfn', '/dev/null')
        config.set_string('-hmm', os.path.join(get_model_path(), 'en-us'))
        config.set_string('-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict'))
        config.set_string('-keyphrase', keyword if keyword != 'snowboy' else 'snow boy')
        config.set_float('-kws_threshold', 10 ** -sensitivity)

        self._decoder = Decoder(config)
        self._decoder.start_utt()
Exemple #10
0
def audio2phoneme(audio_file):
    wave_read = wave.open(audio_file, 'rb')
    length = wave_read.getnframes()/wave_read.getframerate()
    wave_read.close()

    # Decode streaming data.
    decoder = Decoder(config)

    buf = bytearray(1024)
    with open(audio_file, 'rb') as f:
        decoder.start_utt()
        while f.readinto(buf):
            decoder.process_raw(buf, False, False)
        decoder.end_utt()

    nframes = decoder.n_frames()


    phonemes = []
    offset = None
    for seg in decoder.seg():
        if offset is None:
            offset = seg.start_frame
        start_frame = seg.start_frame - offset
        end_frame = seg.end_frame - offset
        phonemes.append((
            seg.word, start_frame/nframes*length, end_frame/nframes*length))

    return phonemes
Exemple #11
0
    def __init__(self):
        # https://github.com/cmusphinx/pocketsphinx-python/blob/master/example.py
        config = Decoder.default_config()
        config.set_string('-logfn', '/dev/null')
        config.set_string('-hmm', os.path.join(get_model_path(), 'en-us'))
        config.set_string('-lm', os.path.join(get_model_path(),
                                              'en-us.lm.bin'))
        config.set_string('-dict',
                          os.path.join(get_model_path(), 'cmudict-en-us.dict'))

        self._decoder = Decoder(config)
 def configure(self):
     config = Decoder.default_config()
     config.set_string('-hmm', os.path.join(BASEDIR, 'model', self.lang,
                                            'hmm'))
     config.set_string('-dict', os.path.join(BASEDIR, 'model', self.lang,
                                             'mycroft-en-us.dict'))
     config.set_string('-keyphrase', self.key_phrase)
     config.set_float('-kws_threshold', float('1e-45'))
     config.set_float('-samprate', self.sample_rate)
     config.set_int('-nfft', 2048)
     config.set_string('-logfn', '/dev/null')
     self.decoder = Decoder(config)
 def configure(self):
     config = Decoder.default_config()
     config.set_string('-hmm', os.path.join(BASEDIR, 'model', self.lang,
                                            'hmm'))
     config.set_string('-dict', os.path.join(BASEDIR, 'model', self.lang,
                                             'mycroft-en-us.dict'))
     config.set_string('-keyphrase', self.key_phrase)
     config.set_float('-kws_threshold', float('1e-45'))
     config.set_float('-samprate', self.sample_rate)
     config.set_int('-nfft', 2048)
     config.set_string('-logfn', '/dev/null')
     self.decoder = Decoder(config)
Exemple #14
0
    def setup_pocketsphinx(self) -> None:
        self.logger.info("Setting up PocketSphinx.")
        self.MODELDIR = "resources/model"

        config = Decoder.default_config()
        config.set_string('-hmm', os.path.join(self.MODELDIR, 'es-es'))
        config.set_string('-lm', os.path.join(self.MODELDIR, 'es-es.lm'))
        config.set_string('-dict', os.path.join(self.MODELDIR, 'es.dict'))
        config.set_string('-logfn', '/dev/null')

        self.decoder = Decoder(config)

        self.prev_buf_is_speech = False
        self.decoder.start_utt()
        self.logger.info("Done setting up PocketSphinx.")
Exemple #15
0
    def _prepare_decoder(self):
        """Set decoder config"""
        # prepare config
        self._hotword = self._settings['speech']['hotword']
        # self._answer = self._settings['hotword']['answer']
        if not os.path.isdir("pocketsphinx-data"):
            raise HotWordError("Missing pocketsphinx-data folder. Please run `make hotword`")

        acoustic_model = os.path.join("pocketsphinx-data",
                                      self._settings['speech']['language'],
                                      'acoustic-model',
                                      )
        language_model = os.path.join("pocketsphinx-data",
                                      self._settings['speech']['language'],
                                      'language-model.lm.bin',
                                      )
        pocket_dict = os.path.join("pocketsphinx-data",
                                   self._settings['speech']['language'],
                                   'pronounciation-dictionary.dict',
                                   )
        self._config.set_string('-logfn', "/dev/null")
        self._config.set_string('-hmm', acoustic_model)
        self._config.set_string('-lm', language_model)
        self._config.set_string('-dict', pocket_dict)
        try:
            self._decoder = Decoder(self._config)
        except RuntimeError:
            self.logger.critical("Error get audio decoder. Hotword not started")
            return False
        self._decoder.set_keyphrase('wakeup', self._hotword)
        self._decoder.set_search('wakeup')
class LocalRecognizer(object):
    def __init__(self, sample_rate=16000, lang="en-us", key_phrase="mycroft"):
        self.lang = lang
        self.key_phrase = key_phrase
        self.sample_rate = sample_rate
        self.configure()

    def configure(self):
        config = Decoder.default_config()
        config.set_string('-hmm', os.path.join(BASEDIR, 'model', self.lang,
                                               'hmm'))
        config.set_string('-dict', os.path.join(BASEDIR, 'model', self.lang,
                                                'mycroft-en-us.dict'))
        config.set_string('-keyphrase', self.key_phrase)
        config.set_float('-kws_threshold', float('1e-45'))
        config.set_float('-samprate', self.sample_rate)
        config.set_int('-nfft', 2048)
        config.set_string('-logfn', '/dev/null')
        self.decoder = Decoder(config)

    def transcribe(self, byte_data, metrics=None):
        start = time.time()
        self.decoder.start_utt()
        self.decoder.process_raw(byte_data, False, False)
        self.decoder.end_utt()
        if metrics:
            metrics.timer("mycroft.stt.local.time_s", time.time() - start)
        return self.decoder.hyp()

    def is_recognized(self, byte_data, metrics):
        hyp = self.transcribe(byte_data, metrics)
        return hyp and self.key_phrase in hyp.hypstr.lower()

    def contains(self, hypothesis):
        return hypothesis and self.key_phrase in hypothesis.hypstr.lower()
class LocalRecognizer(object):
    def __init__(self, sample_rate=16000, lang="en-us", key_phrase="mycroft"):
        self.lang = lang
        self.key_phrase = key_phrase
        self.sample_rate = sample_rate
        self.configure()

    def configure(self):
        config = Decoder.default_config()
        config.set_string('-hmm', os.path.join(BASEDIR, 'model', self.lang,
                                               'hmm'))
        config.set_string('-dict', os.path.join(BASEDIR, 'model', self.lang,
                                                'mycroft-en-us.dict'))
        config.set_string('-keyphrase', self.key_phrase)
        config.set_float('-kws_threshold', float('1e-45'))
        config.set_float('-samprate', self.sample_rate)
        config.set_int('-nfft', 2048)
        config.set_string('-logfn', '/dev/null')
        self.decoder = Decoder(config)

    def transcribe(self, byte_data, metrics=None):
        start = time.time()
        self.decoder.start_utt()
        self.decoder.process_raw(byte_data, False, False)
        self.decoder.end_utt()
        if metrics:
            metrics.timer("mycroft.stt.local.time_s", time.time() - start)
        return self.decoder.hyp()

    def is_recognized(self, byte_data, metrics):
        hyp = self.transcribe(byte_data, metrics)
        return hyp and self.key_phrase in hyp.hypstr.lower()

    def found_wake_word(self, hypothesis):
        return hypothesis and self.key_phrase in hypothesis.hypstr.lower()
Exemple #18
0
	def setup(self):
		# PocketSphinx configuration
		ps_config = Decoder.default_config()

		# Set recognition model to US
		ps_config.set_string('-hmm', os.path.join(get_model_path(), 'en-us'))
		ps_config.set_string('-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict'))

		# Specify recognition key phrase
		ps_config.set_string('-keyphrase', self._tconfig['phrase'])
		ps_config.set_float('-kws_threshold', float(self._tconfig['threshold']))

		# Hide the VERY verbose logging information when not in debug
		if logging.getLogger('alexapi').getEffectiveLevel() != logging.DEBUG:
			ps_config.set_string('-logfn', '/dev/null')

		# Process audio chunk by chunk. On keyword detected perform action and restart search
		self._decoder = Decoder(ps_config)
Exemple #19
0
    def __init__(self, settings, action_queue, tts_queue, logger):
        NLUBase.__init__(self, settings, action_queue, None, tts_queue, logger)
        # Init private attributes
        self._rerun = True

        self._answer_sound_path = "sounds/answer.wav"
        self._config = Decoder.default_config()
        if not self._prepare_decoder():
            self._must_run = False
    def start_recognizer(self):
        """Function to handle lm or grammar processing of audio."""
        config = Decoder.default_config()
        rospy.loginfo("Done initializing pocketsphinx")

        # Setting configuration of decoder using provided params
        config.set_string('-dict', self.dict)
        config.set_string('-lm', self.class_lm)
        config.set_string('-hmm', self.hmm)
        self.decoder = Decoder(config)

        # Start processing input audio
        self.decoder.start_utt()
        rospy.loginfo("Decoder started successfully")

        # Subscribe to audio topic
        rospy.Subscriber("recognizer/audio_ready", Bool, self.process_audio)
        rospy.spin()
 def __init__(self, key_phrase, phonemes, threshold, sample_rate=16000,
              lang="en-us"):
     self.lang = lang
     self.key_phrase = key_phrase
     self.sample_rate = sample_rate
     self.threshold = threshold
     self.phonemes = phonemes
     dict_name = self.create_dict(key_phrase, phonemes)
     self.decoder = Decoder(self.create_config(dict_name))
Exemple #22
0
    def create_decoder():
        from pocketsphinx.pocketsphinx import Decoder

        path = os.path.dirname(os.path.realpath(__file__))
        pocketsphinx_data = os.getenv('POCKETSPHINX_DATA', os.path.join(path, 'pocketsphinx-data'))
        hmm = os.getenv('POCKETSPHINX_HMM', os.path.join(pocketsphinx_data, 'hmm'))
        dict = os.getenv('POCKETSPHINX_DIC', os.path.join(pocketsphinx_data, 'dictionary.txt'))
        kws = os.getenv('POCKETSPHINX_KWS', os.path.join(pocketsphinx_data, 'keywords.txt'))

        config = Decoder.default_config()
        config.set_string('-hmm', hmm)
        config.set_string('-dict', dict)
        config.set_string('-kws', kws)
        # config.set_int('-samprate', SAMPLE_RATE) # uncomment if rate is not 16000. use config.set_float() on ubuntu
        config.set_int('-nfft', 512)
        config.set_float('-vad_threshold', 2.7)
        config.set_string('-logfn', os.devnull)

        return Decoder(config)
Exemple #23
0
    def __init__(self, keyword, sensitivity):
        """
        Constructor.

        :param keyword: keyword to be detected.
        :param sensitivity: detection sensitivity.
        """

        # Set the configuration.
        config = Decoder.default_config()
        config.set_string('-logfn', '/dev/null')
        # Set recognition model to US
        config.set_string('-hmm', os.path.join(get_model_path(), 'en-us'))
        config.set_string('-dict',
                          os.path.join(get_model_path(), 'cmudict-en-us.dict'))
        config.set_string('-keyphrase', keyword)
        config.set_float('-kws_threshold', sensitivity)
        self._decoder = Decoder(config)
        self._decoder.start_utt()
Exemple #24
0
 def __init__(self, kws_threshold = 1e-40):
     # configuration.
     base_dir = os.path.dirname(__file__)
     modeldir = "../../../pocketsphinx/model/en-us"
     config = _Decoder.default_config()
     config.set_string('-hmm', os.path.join(base_dir, modeldir, 'en-us'))
     config.set_string('-dict', os.path.join(base_dir, modeldir, 'cmudict-en-us.dict'))
     config.set_float('-kws_threshold', kws_threshold)
     self.config = config
     self.decoder = None
Exemple #25
0
    def __init__(self, device_index=0, model_path=None):

        self._decoder = None
        self._pa = None
        self._device_no = device_index
        self._model_path = model_path

        # PocketSphinx configuration
        logging.info('Grammar file:' + os.path.join(model_path, self.GRAMMAR))
        ps_config = Decoder.default_config()

        # Set recognition model to ...
        ps_config.set_string('-hmm', os.path.join(model_path, self.HMM))
        ps_config.set_string('-dict', os.path.join(model_path, self.DIC))
        ps_config.set_string('-jsgf', os.path.join(model_path, self.GRAMMAR))
        ps_config.set_string('-logfn', '/dev/null')

        # Process audio chunk by chunk. On keyword detected perform action and restart search
        self._decoder = Decoder(ps_config)
        self._pa = pyaudio.PyAudio()
 def create_config(self, dict_name):
     config = Decoder.default_config()
     config.set_string('-hmm', os.path.join(BASEDIR, 'model', self.lang,
                                            'hmm'))
     config.set_string('-dict', dict_name)
     config.set_string('-keyphrase', self.key_phrase)
     config.set_float('-kws_threshold', float(self.threshold))
     config.set_float('-samprate', self.sample_rate)
     config.set_int('-nfft', 2048)
     config.set_string('-logfn', '/dev/null')
     return config
Exemple #27
0
    def __init__(self, engine_type, keyword, sensitivity):
        """Initializer.

        :param engine_type: type of the engine.
        :param keyword: keyword being used for detection.
        :param sensitivity: sensitivity passed to the engine.
        """

        super().__init__(engine_type, keyword, sensitivity)
        # Set the configuration.
        config = Decoder.default_config()
        config.set_string('-logfn', '/dev/null')
        # Set recognition model to US
        config.set_string('-hmm', os.path.join(get_model_path(), 'en-us'))
        config.set_string('-dict',
                          os.path.join(get_model_path(), 'cmudict-en-us.dict'))
        config.set_string('-keyphrase', keyword)
        config.set_float('-kws_threshold', sensitivity)
        self._decoder = Decoder(config)
        self._decoder.start_utt()
Exemple #28
0
    def __init__(self):
        self.MODELDIR = 'speech/'
        self.wav_name = 'media/temp.wav'
        self.raw_name = 'media/temp.raw'

        config = Decoder.default_config()
        config.set_string('-hmm', self.MODELDIR + 'ru_ru/')
        config.set_string('-dict', self.MODELDIR + 'ru.dic')
        self.decoder = Decoder(config)

        jsgf = Jsgf(self.MODELDIR + 'gr.gram')
        rule = jsgf.get_rule('gr.rule')
        fsg = jsgf.build_fsg(rule, self.decoder.get_logmath(), 7.5)
        fsg.writefile('gr.fsg')

        self.decoder.set_fsg('gr', fsg)
        self.decoder.set_search('gr')

        self.rec = Recognizer()
        self.mic = Microphone()
        def recognize_phonemes(segments_path, phonemes_result_path):

            # Create a decoder with certain model
            config = Decoder.default_config()
            config.set_string('-hmm', join(model_dir, decoder_hmm))
            config.set_string('-allphone', join(model_dir, decoder_allphone))
            config.set_string('-dict', join(model_dir, decoder_dict))
            config.set_float('-lw', decoder_lw)
            config.set_float('-pip', decoder_pip)
            config.set_float('-beam', decoder_beam)
            config.set_float('-pbeam', decoder_pbeam)
            config.set_boolean('-mmap', decoder_mmap)
            hyps = []
            segs = []
            self.decoder = Decoder(config)
            with open(segments_path, 'rb') as stream:
                in_speech_buffer = False
                self.decoder.start_utt()
                while True:
                    buf = stream.read(decoder_stream_buf_size)
                    if buf:
                        self.decoder.process_raw(buf, False, False)
                        if self.decoder.get_in_speech() != in_speech_buffer:
                            in_speech_buffer = self.decoder.get_in_speech()
                            if not in_speech_buffer:
                                hyp_result, segment = _get_decoder_results()
                                segs += segment

                                hyps.append(hyp_result)
                                self.decoder.start_utt()
                    else:
                        if in_speech_buffer:
                            hyp_result, segment = _get_decoder_results()
                            segs += segment

                            hyps.append(hyp_result)
                        break
            phonemes_dict = dict(hypotheses=hyps, segment_info=segs)
            phonemes_result = DecoderOutputSchema().dumps(phonemes_dict)
            with open(phonemes_result_path, 'w') as f:
                f.write(phonemes_result)
Exemple #30
0
 def __init__(self, gui):
     QThread.__init__(self, gui)
     if settings.sphinx_acoustic_model_dir == '':  # use default acoustic model
         acoustic_model_directory = path.join(get_model_path(), 'en-us')
     else:  # use custom acoustic model
         acoustic_model_directory = settings.sphinx_acoustic_model_dir
     config = Decoder.default_config()
     config.set_string('-hmm', acoustic_model_directory)  # acoustic model
     config.set_string(
         '-dict', settings.prepared_lexicon_file)  # lexicon pronunciation
     config.set_string(
         '-jsgf',
         settings.prepared_grammar_file)  # language model from grammar
     config.set_string(
         '-logfn',
         settings.outputFileName(sphinx_decoder_log_file_base_name,
                                 ext='log'))
     self.listen = False
     self.decoder = Decoder(config)
     self.audio = None
     self.device = None
Exemple #31
0
    def _create_decoder(config) -> Decoder:
        decoder_config = Decoder.default_config()
        decoder_config.set_string('-hmm', config.hmm)
        decoder_config.set_string('-dict', config.dict)
        decoder_config.set_boolean('-remove_noise', config.remove_noise)
        decoder_config.set_float('-samprate', config.sample_rate)
        decoder_config.set_string('-logfn', devnull)

        if config.lm is not None:
            decoder_config.set_string("-lm", config.lm)
        elif len(config.hotwords) == 1:
            decoder_config.set_string('-keyphrase', config.hotwords[0])
            decoder_config.set_float('-kws_threshold', config.threshold)
        else:
            import os
            from tempfile import gettempdir
            path = os.path.join(gettempdir(), 'keywords.mini')
            f = open(path, 'w')
            f.writelines(['{} /{}/\n'.format(w, config.threshold) for w in config.hotwords])
            f.flush()
            decoder_config.set_string('-kws', path)

        return Decoder(decoder_config)
Exemple #32
0
    def get_decoder():
        from pocketsphinx.pocketsphinx import Decoder

        script_dir = os.path.dirname(os.path.realpath(__file__))
        config = Decoder.default_config()
        config.set_string('-hmm', os.path.join(script_dir, 'model/hmm/en'))
        config.set_string('-dict',
                          os.path.join(script_dir, 'model/respeaker.dic'))
        config.set_string('-kws', os.path.join(script_dir,
                                               'model/keywords.txt'))
        # config.set_string('-keyphrase', 'respeaker')
        # config.set_float('-kws_threshold', 1e-43)
        config.set_int('-samprate', SAMPLE_RATE)
        config.set_int('-nfft', 2048)
        config.set_string('-logfn', os.devnull)
        try:
            decoder = Decoder(config)
        except Exception as e:
            print(
                "Maybe replace config.set_int('-samprate', SAMPLE_RATE) with config.set_float('-samprate', SAMPLE_RATE)"
            )
            raise e

        return decoder
class LocalRecognizer(object):
    def __init__(self, key_phrase, phonemes, threshold, sample_rate=16000,
                 lang="en-us"):
        self.lang = lang
        self.key_phrase = key_phrase
        self.sample_rate = sample_rate
        self.threshold = threshold
        self.phonemes = phonemes
        dict_name = self.create_dict(key_phrase, phonemes)
        self.decoder = Decoder(self.create_config(dict_name))

    def create_dict(self, key_phrase, phonemes):
        (fd, file_name) = tempfile.mkstemp()
        words = key_phrase.split()
        phoneme_groups = phonemes.split('.')
        with os.fdopen(fd, 'w') as f:
            for word, phoneme in zip(words, phoneme_groups):
                f.write(word + ' ' + phoneme + '\n')
        return file_name

    def create_config(self, dict_name):
        config = Decoder.default_config()
        config.set_string('-hmm', os.path.join(BASEDIR, 'model', self.lang,
                                               'hmm'))
        config.set_string('-dict', dict_name)
        config.set_string('-keyphrase', self.key_phrase)
        config.set_float('-kws_threshold', float(self.threshold))
        config.set_float('-samprate', self.sample_rate)
        config.set_int('-nfft', 2048)
        config.set_string('-logfn', '/dev/null')
        return config

    def transcribe(self, byte_data, metrics=None):
        start = time.time()
        self.decoder.start_utt()
        self.decoder.process_raw(byte_data, False, False)
        self.decoder.end_utt()
        if metrics:
            metrics.timer("mycroft.stt.local.time_s", time.time() - start)
        return self.decoder.hyp()

    def is_recognized(self, byte_data, metrics):
        hyp = self.transcribe(byte_data, metrics)
        return hyp and self.key_phrase in hyp.hypstr.lower()

    def found_wake_word(self, hypothesis):
        return hypothesis and self.key_phrase in hypothesis.hypstr.lower()
Exemple #34
0
def init():
    # Create a decoder with certain model
    config = Decoder.default_config()
    config.set_string('-logfn', settings.POCKETSPHINX_LOG)
    config.set_string('-hmm',   settings.ACOUSTIC_MODEL)
    config.set_string('-lm',    settings.LANGUAGE_MODEL)
    config.set_string('-dict',  settings.POCKET_DICT)

    # Decode streaming data
    global decoder, p
    decoder = Decoder(config)
    decoder.set_keyphrase('wakeup', settings.WAKE_UP_WORD)
    decoder.set_search('wakeup')
    p = pyaudio.PyAudio()

    global r
    r = speech_recognition.Recognizer()
        def load_models(pipe, config, models):
            """Internal worker method to load the language model

            Note:
                Some lanaguages take a long time to load. English is by far
                the fastest language to be loaded as a model.
            
            Arguments:
                pipe (:obj: socket): The response pipe to send to the parent process
                models (dict): The language and nltk models developed by the parent process
           
            Returns: (Decoder)
                The STT decoder object and the nltk model

            """

            language_model = models["language_model"]
            nltk_model = models["nltk_model"]

            if False in [
                    language_model.is_valid_model(),
                    nltk_model.is_valid_model()
            ]:
                l_log.error("The language model %s is invalid!" %
                            str(language_model.name))
                send_error(pipe, "Failed loading language model!")
                return

            # Load the model configurations into pocketsphinx
            config.set_string('-hmm', str(language_model.hmm))
            config.set_string('-lm', str(language_model.lm))
            config.set_string('-dict', str(language_model.dict))
            decoder = Decoder(config)

            send_json(
                pipe,
                {"success": True})  # Send a success message to the client

            l_log.debug("Set the language model to %s" %
                        str(language_model.name))

            return decoder, nltk_model  # Return the new decoder and nltk model
Exemple #36
0
def init():
    # Create a decoder with certain model
    config = Decoder.default_config()
    config.set_string('-logfn', settings.POCKETSPHINX_LOG)
    config.set_string('-hmm',   settings.ACOUSTIC_MODEL)
    config.set_string('-lm',    settings.LANGUAGE_MODEL)
    config.set_string('-dict',  settings.POCKET_DICT)

    # Decode streaming data
    global decoder, p
    decoder = Decoder(config)
    decoder.set_keyphrase('wakeup', settings.WAKE_UP_WORD)
    decoder.set_search('wakeup')
    p = pyaudio.PyAudio()

    global r
    r = speech_recognition.Recognizer()
Exemple #37
0
def init():
    # Create a decoder with certain model
    config = Decoder.default_config()
    config.set_string('-logfn', os.path.join(settings.LOGS_DIR, 'passive-listen.log'))
    config.set_string('-hmm', os.path.join(settings.MODEL_DIR, 'en-US/acoustic-model'))
    config.set_string('-lm', os.path.join(settings.MODEL_DIR, 'en-US/language-model.lm.bin'))
    config.set_string('-dict', os.path.join(settings.MODEL_DIR, 'en-US/pronounciation-dictionary.dict'))
    
    # Decode streaming data
    global decoder, p
    decoder = Decoder(config)
    decoder.set_keyphrase('wakeup', settings.WAKE_UP_WORD)
    decoder.set_search('wakeup')
    p = pyaudio.PyAudio()
    
    global r
    r = speech_recognition.Recognizer()
Exemple #38
0
class PocketSphinxASREngine(ASREngine):
    """https://pypi.org/project/pocketsphinx/"""
    def __init__(self):
        # https://github.com/cmusphinx/pocketsphinx-python/blob/master/example.py
        config = Decoder.default_config()
        config.set_string('-logfn', '/dev/null')
        config.set_string('-hmm', os.path.join(get_model_path(), 'en-us'))
        config.set_string('-lm', os.path.join(get_model_path(),
                                              'en-us.lm.bin'))
        config.set_string('-dict',
                          os.path.join(get_model_path(), 'cmudict-en-us.dict'))

        self._decoder = Decoder(config)

    def transcribe(self, path):
        pcm, sample_rate = soundfile.read(path)
        assert sample_rate == 16000
        pcm = (np.iinfo(np.int16).max * pcm).astype(np.int16).tobytes()

        self._decoder.start_utt()
        self._decoder.process_raw(pcm, no_search=False, full_utt=True)
        self._decoder.end_utt()

        words = []
        for seg in self._decoder.seg():
            word = seg.word

            # Remove special tokens.
            if word == '<sil>' or word == '<s>' or word == '</s>':
                continue

            word = ''.join([x for x in word if x.isalpha()])

            words.append(word)

        return ' '.join(words)

    def __str__(self):
        return 'PocketSphinx'
Exemple #39
0
def init():
    # Be wary of an OSError due to a race condition
    if not os.path.exists(LOGS_DIR):
        os.makedirs(LOGS_DIR)
    
    # Create a decoder with certain model
    config = Decoder.default_config()
    config.set_string('-logfn', path.join(LOGS_DIR, 'passive-listen.log'))
    config.set_string('-hmm', path.join(MODEL_DIR, 'en-us\en-us'))
    config.set_string('-lm', path.join(MODEL_DIR, 'en-us\en-us.lm.dmp'))
    config.set_string('-dict', path.join(MODEL_DIR, 'en-us\cmudict-en-us.dict'))
    
    # Decode streaming data
    global decoder, p
    decoder = Decoder(config)
    decoder.set_keyphrase("wakeup", WAKE_UP_WORD)
    decoder.set_search("wakeup")
    
    p = pyaudio.PyAudio()
Exemple #40
0
def init():
    # Create a decoder with certain model
    config = Decoder.default_config()
    config.set_string('-logfn',
                      os.path.join(settings.LOGS_DIR, 'passive-listen.log'))
    config.set_string('-hmm',
                      os.path.join(settings.MODEL_DIR, 'en-US/acoustic-model'))
    config.set_string(
        '-lm', os.path.join(settings.MODEL_DIR, 'en-US/language-model.lm.bin'))
    config.set_string(
        '-dict',
        os.path.join(settings.MODEL_DIR,
                     'en-US/pronounciation-dictionary.dict'))

    # Decode streaming data
    global decoder, p
    decoder = Decoder(config)
    decoder.set_keyphrase('wakeup', settings.WAKE_UP_WORD)
    decoder.set_search('wakeup')
    p = pyaudio.PyAudio()

    global r
    r = speech_recognition.Recognizer()
Exemple #41
0
class PocketSphinxEngine(Engine):
    def __init__(self, keyword, sensitivity):
        config = Decoder.default_config()
        config.set_string('-logfn', '/dev/null')
        config.set_string('-hmm', os.path.join(get_model_path(), 'en-us'))
        config.set_string('-dict',
                          os.path.join(get_model_path(), 'cmudict-en-us.dict'))
        config.set_string('-keyphrase',
                          keyword if keyword != 'snowboy' else 'snow boy')
        config.set_float('-kws_threshold', 10**-sensitivity)

        self._decoder = Decoder(config)
        self._decoder.start_utt()

    def process(self, pcm):
        assert pcm.dtype == np.int16

        self._decoder.process_raw(pcm.tobytes(), False, False)

        detected = self._decoder.hyp()
        if detected:
            self._decoder.end_utt()
            self._decoder.start_utt()

        return detected

    def release(self):
        self._decoder.end_utt()

    def __str__(self):
        return 'PocketSphinx'
Exemple #42
0
#!/usr/bin/python

import sys, os
from pocketsphinx.pocketsphinx import Decoder
import pyaudio

script_dir = os.path.dirname(os.path.realpath(__file__))

# Create a decoder with certain model
config = Decoder.default_config()
config.set_string("-logfn", os.devnull)
config.set_string('-hmm', os.path.join(script_dir, 'model/hmm/en'))
config.set_string('-dict', os.path.join(script_dir, 'model/keywords_en.dic'))
if True:
    config.set_string('-kws', os.path.join(script_dir,
                                           'model/keywords_en.txt'))
else:
    config.set_string('-keyphrase', 'miss j')
    config.set_float('-kws_threshold', 1e-15)

# Process audio chunk by chunk. On keyword detected perform action and restart search
decoder = Decoder(config)
decoder.start_utt()

stream = None
if len(sys.argv) > 1:
    stream = open(sys.argv[1], "rb")
else:
    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paInt16,
                    channels=1,
Exemple #43
0
from __future__ import division
import os
import sys
import wave
sys.path.insert(0, '/opt/hansonrobotics/lib/python2.7/site-packages/')

from pocketsphinx.pocketsphinx import Decoder

MODELDIR = '/opt/hansonrobotics/share/pocketsphinx/model'

config = Decoder.default_config()
config.set_string('-hmm', os.path.join(MODELDIR, 'en-us/en-us'))
config.set_string('-allphone', os.path.join(MODELDIR, 'en-us/en-us-phone.lm.dmp'))
config.set_float('-lw', 2.0)
config.set_float('-beam', 1e-10)
config.set_float('-pbeam', 1e-10)

def audio2phoneme(audio_file):
    wave_read = wave.open(audio_file, 'rb')
    length = wave_read.getnframes()/wave_read.getframerate()
    wave_read.close()

    # Decode streaming data.
    decoder = Decoder(config)

    buf = bytearray(1024)
    with open(audio_file, 'rb') as f:
        decoder.start_utt()
        while f.readinto(buf):
            decoder.process_raw(buf, False, False)
        decoder.end_utt()
 def __init__(self, key_phrase, threshold, sample_rate=16000, lang="en-us"):
     self.lang = lang
     self.key_phrase = key_phrase
     self.sample_rate = sample_rate
     self.threshold = threshold
     self.decoder = Decoder(self.create_config())
Exemple #45
0
  def speech_recog(self, model):

    # Create a decoder with certain model
    config = Decoder.default_config()
    config.set_string('-hmm', '/usr/local/share/pocketsphinx/model/en-us/en-us')
    config.set_int('-ds', 2)
    config.set_int('-topn', 3)
    config.set_int('-maxwpf', 5)
    #config.set_string('-kws', MODELDIR + model + '.txt')
    config.set_string('-lm', MODELDIR + model + '.lm')
    config.set_string('-dict', MODELDIR + model + '.dict')
    decoder = Decoder(config)

    decoder.start_utt()
    tstamp = time.time()
    recog_text = ''

    while len(recog_text) < 1:
      try:
        buf = self.stream_in.read(CHUNK_SIZE)
        logging.info("actual voice")
        decoder.process_raw(buf, False, False)
        if decoder.hyp().hypstr != '':
          recog_text += decoder.hyp().hypstr
          print "text: " + decoder.hyp().hypstr
          tstamp = time.time()
      except IOError as ex:
        if ex[1] != pyaudio.paInputOverflowed:
          raise
        buf = '\x00' * CHUNK_SIZE #white noise
        logging.info("white noise") 
      except AttributeError:
        pass

    decoder.end_utt()

    logging.info("recog text: " + recog_text)
    return recog_text
Exemple #46
0
class NLUAudio(NLUBase):
    """Define NLUAudio component

    For now hotword uses pocketsphinx with speech_recognition
    and Nuance services has NLU
    """
    def __init__(self, settings, action_queue, tts_queue, logger):
        NLUBase.__init__(self, settings, action_queue, None, tts_queue, logger)
        # Init private attributes
        self._rerun = True

        self._answer_sound_path = "sounds/answer.wav"
        self._config = Decoder.default_config()
        if not self._prepare_decoder():
            self._must_run = False

    def _prepare_decoder(self):
        """Set decoder config"""
        # prepare config
        self._hotword = self._settings['speech']['hotword']
        # self._answer = self._settings['hotword']['answer']
        if not os.path.isdir("pocketsphinx-data"):
            raise HotWordError("Missing pocketsphinx-data folder. Please run `make hotword`")

        acoustic_model = os.path.join("pocketsphinx-data",
                                      self._settings['speech']['language'],
                                      'acoustic-model',
                                      )
        language_model = os.path.join("pocketsphinx-data",
                                      self._settings['speech']['language'],
                                      'language-model.lm.bin',
                                      )
        pocket_dict = os.path.join("pocketsphinx-data",
                                   self._settings['speech']['language'],
                                   'pronounciation-dictionary.dict',
                                   )
        self._config.set_string('-logfn', "/dev/null")
        self._config.set_string('-hmm', acoustic_model)
        self._config.set_string('-lm', language_model)
        self._config.set_string('-dict', pocket_dict)
        try:
            self._decoder = Decoder(self._config)
        except RuntimeError:
            self.logger.critical("Error get audio decoder. Hotword not started")
            return False
        self._decoder.set_keyphrase('wakeup', self._hotword)
        self._decoder.set_search('wakeup')

    def stop(self):
        """Stop process"""
        self._rerun = False
        NLUBase.stop(self)

    def _answering(self):
        """Play the hotwoard confirmation sound"""
        f_ans = wave.open(self._answer_sound_path, "rb")
        stream = self._paudio.open(format=self._paudio.get_format_from_width(f_ans.getsampwidth()),
                                   channels=f_ans.getnchannels(),
                                   rate=f_ans.getframerate(),
                                   output=True)
        data = f_ans.readframes(1024)
        while len(data) > 0:
            stream.write(data)
            data = f_ans.readframes(1024)
        f_ans.close()

    def run(self):
        """Listen for NLU"""
        self._rerun = True
        self._must_run = True
        self.logger.debug("starting listening hotword %s", self._hotword)
        while self._rerun:
            self._rerun = False
            try:
                self._paudio = pyaudio.PyAudio()
                stream = self._paudio.open(format=pyaudio.paInt16, channels=1, rate=16000,
                                           input=True, frames_per_buffer=1024)
            except OSError:
                self.logger.warning("No audio device found can not listen for NLU")
                self.logger.warning("Disabling NLU audio")
                self._must_run = False
                self._rerun = False
                return
            stream.start_stream()
            self._paudio.get_default_input_device_info()

            self._decoder.start_utt()
            while self._must_run:
                buf = stream.read(1024)
                self._decoder.process_raw(buf, False, False)
                if not self.tts_queue.empty():
                    # If tts_queue is not empty, this means the Droid
                    # is currently speaking. So we don't want to it listen itself
                    # TODO replace this stuff by speaker annulation
                    continue
                if self._decoder.hyp() and self._decoder.hyp().hypstr == self._hotword:
                    self.logger.debug("Hotword detected")
                    # self.tts_queue.put(gtt(self._answer))
                    # self.tts_queue.put(gtt("mmm"))
                    self._answering()
                    ret = nlu_audio(self._settings, self.logger)

                    # GOT ACTIONS
                    interpretations = ret.get("nlu_interpretation_results", {}).\
                        get("payload", {}).get("interpretations", {})
                    # TODO: what about if len(interpretations) > 1 ??
                    for interpretation in interpretations:
                        intent = interpretation.get("action", {}).get("intent", {})
                        self.logger.info("Intent: {}".format(intent.get("value")))
                        self.logger.info("Confidence: {}".format(intent.get("confidence")))
                        # TODO log arguments
                        if intent.get("value") == "NO_MATCH":
                            # I don't understand :/
                            self._misunderstand(0, True, True)
                        elif intent.get("confidence") < 0.8:
                            # I'm not sure to undestand :/
                            self._misunderstand(intent.get("confidence"), True, True)
                        else:
                            # Check intent name
                            if len(intent.get("value").split("__")) != 2:
                                self.logger.critical("BAD Intent name: "
                                                     "{}".format(intent.get("value")))
                                self._misunderstand(0, True, True)
                            # Run function with parameters
                            action, method = intent.get("value").split("__")
                            # Run action
                            # TODO add parameters from NLU response
                            self._run_action(action, method, {}, False, True, True)
                    # TODO run nlu audio detection
                    self._rerun = True
                    break
            self._decoder.end_utt()
Exemple #47
0
	cl = getattr(im, config['platform']['device'].capitalize() + 'Platform')
	platform = cl(config)
except ImportError:
	from alexapi.device_platforms.desktop import DesktopPlatform
	platform = DesktopPlatform(config)

# Setup
recorded = False
servers = ["127.0.0.1:11211"]
mc = Client(servers, debug=1)
path = os.path.realpath(__file__).rstrip(os.path.basename(__file__))
resources_path = os.path.join(path, 'resources', '')
tmp_path = os.path.join(tempfile.mkdtemp(prefix='AlexaPi-runtime-'), '')

# PocketSphinx configuration
ps_config = Decoder.default_config()

# Set recognition model to US
ps_config.set_string('-hmm', os.path.join(get_model_path(), 'en-us'))
ps_config.set_string('-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict'))

# Specify recognition key phrase
ps_config.set_string('-keyphrase', config['sphinx']['trigger_phrase'])
ps_config.set_float('-kws_threshold', 1e-5)

# Hide the VERY verbose logging information
if not debug:
	ps_config.set_string('-logfn', '/dev/null')

# Process audio chunk by chunk. On keyword detected perform action and restart search
decoder = Decoder(ps_config)
Exemple #48
0
def main():
    abspath = os.path.dirname(os.path.abspath(__file__))
    abspath = os.path.join(abspath, '..')

    model_dir = os.path.join(abspath, 'model')

    hmm = os.path.join(model_dir, HMM)
    lm = os.path.join(model_dir, LM)
    dic = os.path.join(model_dir, DIC)

    config = Decoder.default_config()
    config.set_string('-hmm', hmm)
    config.set_string('-lm', lm)
    config.set_string('-dict', dic)
    config.set_string('-logfn', '/dev/null')
    decoder = Decoder(config)

    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=BUFFER)
    stream.start_stream()
    in_speech_bf = True
    decoder.start_utt()
    while True:
        buf = stream.read(BUFFER)
        if buf:
            decoder.process_raw(buf, False, False)
            if decoder.get_in_speech():
                sys.stdout.write('.')
                sys.stdout.flush()
            if decoder.get_in_speech() == in_speech_bf:
                continue

            in_speech_bf = decoder.get_in_speech()
            if in_speech_bf:
                continue

            decoder.end_utt()
            try:
                if decoder.hyp().hypstr != '':
                    print('You said:', decoder.hyp().hypstr)
            except AttributeError:
                pass
            decoder.start_utt()
        else:
            break
    decoder.end_utt()
    print('An Error occured:', decoder.hyp().hypstr)
Exemple #49
0
    def speech_recog(self, model):
        # Create a decoder with certain model
        config = Decoder.default_config()
        config.set_string('-hmm', '/usr/local/share/pocketsphinx/model/en-us/en-us')
        config.set_int('-ds', 2)
        config.set_int('-topn', 3)
        config.set_int('-maxwpf', 5)
        #config.set_string('-kws', MODELDIR + model + '.txt')
        config.set_string('-lm', MODELDIR + model + '.lm')
        config.set_string('-dict', MODELDIR + model + '.dict')
        decoder = Decoder(config)

        decoder.start_utt()
        recog_text = ''

        with self.stream_in as stream:
            audio_generator = stream.generator()
            for content in audio_generator:
                decoder.process_raw(content, False, False)
                if decoder.hyp() and decoder.hyp().hypstr != '':
                    recog_text += decoder.hyp().hypstr
                    if len(recog_text) > 1:
                        decoder.end_utt()
                        logging.info("recog text: %s", recog_text)
                        return recog_text
        return recog_text
#!/usr/bin/python

import sys, os
from pocketsphinx.pocketsphinx import Decoder
import pyaudio


script_dir = os.path.dirname(os.path.realpath(__file__))


# Create a decoder with certain model
config = Decoder.default_config()
config.set_string("-logfn", os.devnull)
config.set_string('-hmm', os.path.join(script_dir, 'model/hmm/en'))
config.set_string('-dict', os.path.join(script_dir, 'model/keywords_en.dic'))
if True:
    config.set_string('-kws', os.path.join(script_dir, 'model/keywords_en.txt'))
else:
    config.set_string('-keyphrase', 'miss j')
    config.set_float('-kws_threshold', 1e-15)

# Process audio chunk by chunk. On keyword detected perform action and restart search
decoder = Decoder(config)
decoder.start_utt()


stream = None
if len(sys.argv) > 1:
    stream = open(sys.argv[1], "rb")
else:
    p = pyaudio.PyAudio()
Exemple #51
0
def main():
    environment: str = os.getenv("ENVIRONMENT", "dev")
    config: Dict = load_config(environment)
    initialize_logger(level=config["logging"]["level"],
                      filename=config["logging"]["filename"])
    redis_host = config["redis"]["host"]
    redis_port = config["redis"]["port"]
    logger.debug(f"Connecting to redis at {redis_host}:{redis_port}")
    redis_client: Redis = Redis(host=redis_host, port=redis_port, db=0)

    logger.debug("Initializing PyAudio interface")
    audio = pyaudio.PyAudio()
    microphone_index = get_microphone_index(audio,
                                            config["microphone"]["name"])
    logger.debug(
        f"Using microphone device '{config['microphone']['name']}' (card index {microphone_index})"
    )
    logger.debug(
        f"Intializing pocketsphinx Decoder using model dir {MODELDIR}")
    decoder_config: DecoderConfig = Decoder.default_config()
    decoder_config.set_string("-hmm", os.path.join(MODELDIR, "en-us/en-us"))
    decoder_config.set_string("-lm",
                              os.path.join(MODELDIR, "en-us/en-us.lm.bin"))
    decoder_config.set_string(
        "-dict", os.path.join(MODELDIR, "en-us/cmudict-en-us.dict"))
    decoder = Decoder(decoder_config)

    logger.debug("Opening audio stream")
    stream = audio.open(format=pyaudio.paInt16,
                        channels=1,
                        rate=44100,
                        input=True,
                        frames_per_buffer=2048,
                        input_device_index=microphone_index)
    stream.start_stream()

    in_speech_bf = False
    decoder.start_utt()

    try:
        logger.debug("Starting decoder loop")
        while cycle([True]):
            buf = stream.read(2048)
            if buf:
                logger.debug("Decoding raw audio")
                decoder.process_raw(buf, False, False)
                if decoder.get_in_speech() != in_speech_bf:
                    logger.debug("GOT HERE")
                    in_speech_bf = decoder.get_in_speech()
                    if not in_speech_bf:
                        decoder.end_utt()
                        transcription = decoder.hyp().hypstr
                        logger.debug(f"Result: {transcription}")
                        redis_client.publish("subsystem.listener.recording",
                                             transcription)
                        decoder.start_utt()
            else:
                logger.debug("Buffer closed. Ending")
                break
        decoder.end_utt()
    except Exception:
        logger.exception("Something bad happened")
    finally:
        redis_client.close()
class PocketsphinxTrigger(BaseTrigger):


	type = triggers.TYPES.VOICE

	def __init__(self, config, trigger_callback):
		super(PocketsphinxTrigger, self).__init__(config, trigger_callback, 'pocketsphinx')

		self._enabled_lock = threading.Event()
		self._disabled_sync_lock = threading.Event()
		self._decoder = None

	def setup(self):
		# PocketSphinx configuration
		ps_config = Decoder.default_config()

		# Set recognition model to US
		ps_config.set_string('-hmm', os.path.join(get_model_path(), self._tconfig['language']))
		ps_config.set_string('-dict', os.path.join(get_model_path(), self._tconfig['dictionary']))

		# Specify recognition key phrase
		#ps_config.set_string('-keyphrase', self._tconfig['phrase'])
		#ps_config.set_float('-kws_threshold', float(self._tconfig['threshold']))

		### Multiple Hotwords
		#ps_config.set_string('-inmic', 'yes')
		ps_config.set_string('-kws', '/opt/AlexaPi/src/keyphrase.list')


		# Hide the VERY verbose logging information when not in debug
		if logging.getLogger('alexapi').getEffectiveLevel() != logging.DEBUG:
			ps_config.set_string('-logfn', '/dev/null')

		# Process audio chunk by chunk. On keyword detected perform action and restart search
		self._decoder = Decoder(ps_config)

	def run(self):
		thread = threading.Thread(target=self.thread, args=())
		thread.setDaemon(True)
		thread.start()

	def thread(self):
		while True:
			self._enabled_lock.wait()

			# Enable reading microphone raw data
			inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NORMAL, self._config['sound']['input_device'])
			inp.setchannels(1)
			inp.setrate(16000)
			inp.setformat(alsaaudio.PCM_FORMAT_S16_LE)
			inp.setperiodsize(1024)

			self._decoder.start_utt()

			triggered = False
			#assistantTriggered = False
			voice_command = ""

			while not triggered:

				if not self._enabled_lock.isSet():
					break

				# Read from microphone
				_, buf = inp.read()

				# Detect if keyword/trigger word was said
				self._decoder.process_raw(buf, False, False)

				triggered = self._decoder.hyp() is not None

			# To avoid overflows close the microphone connection
			inp.close()

			self._decoder.end_utt()

			self._disabled_sync_lock.set()

			if triggered:
				### Assistant Starts Here
				try:
					voice_command = self._decoder.hyp().hypstr
				except:
					voice_command = ""
				self._trigger_callback(self, voice_command)
				###

	def enable(self):
		self._enabled_lock.set()
		self._disabled_sync_lock.clear()

	def disable(self):
		self._enabled_lock.clear()
		self._disabled_sync_lock.wait()