def __init__(self, phrase, threshold, device_index=0): self._decoder = None self._pa = None self._device_no = device_index self._phrase = phrase self._threshold = float(threshold) # PocketSphinx configuration logging.info('Phrase: ' + phrase + ' Threshold: ' + str(threshold)) ps_config = Decoder.default_config() # Set recognition model to US ps_config.set_string('-hmm', os.path.join(get_model_path_keyword(), 'en-us')) ps_config.set_string( '-dict', os.path.join(get_model_path_keyword(), 'cmudict-en-us.dict')) # Specify recognition key phrase ps_config.set_string('-keyphrase', self._phrase) ps_config.set_float('-kws_threshold', self._threshold) ps_config.set_string('-logfn', '/dev/null') # Process audio chunk by chunk. On keyword detected perform action and restart search self._decoder = Decoder(ps_config) self._pa = pyaudio.PyAudio()
def speech_recog(self, model): # Create a decoder with certain model config = Decoder.default_config() config.set_string('-hmm', '/usr/local/share/pocketsphinx/model/en-us/en-us') config.set_int('-ds', 2) config.set_int('-topn', 3) config.set_int('-maxwpf', 5) #config.set_string('-kws', MODELDIR + model + '.txt') config.set_string('-lm', MODELDIR + model + '.lm') config.set_string('-dict', MODELDIR + model + '.dict') decoder = Decoder(config) decoder.start_utt() recog_text = '' with self.stream_in as stream: audio_generator = stream.generator() for content in audio_generator: decoder.process_raw(content, False, False) if decoder.hyp() and decoder.hyp().hypstr != '': recog_text += decoder.hyp().hypstr if len(recog_text) > 1: decoder.end_utt() logging.info("recog text: %s", recog_text) return recog_text return recog_text
def setup(self): # PocketSphinx configuration ps_config = Decoder.default_config() # Set recognition model to US ps_config.set_string( '-hmm', os.path.join(get_model_path(), self._tconfig['language'])) ps_config.set_string( '-dict', os.path.join(get_model_path(), self._tconfig['dictionary'])) # Specify recognition key phrase #ps_config.set_string('-keyphrase', self._tconfig['phrase']) #ps_config.set_float('-kws_threshold', float(self._tconfig['threshold'])) ### Multiple Hotwords #ps_config.set_string('-inmic', 'yes') ps_config.set_string('-kws', '/opt/AlexaPi/src/keyphrase.list') # Hide the VERY verbose logging information when not in debug if logging.getLogger('alexapi').getEffectiveLevel() != logging.DEBUG: ps_config.set_string('-logfn', '/dev/null') # Process audio chunk by chunk. On keyword detected perform action and restart search self._decoder = Decoder(ps_config)
def build_decoder(self): config = Decoder.default_config() config.set_string( "-dict", os.path.join(self.MODEL_DIR, "cmudict-en-us.dict") ) config.set_string( "-fdict", os.path.join(self.MODEL_DIR, "en-us/noisedict") ) config.set_string( "-featparams", os.path.join(self.MODEL_DIR, "en-us/feat.params") ) config.set_string( "-tmat", os.path.join(self.MODEL_DIR, "en-us/transition_matrices") ) config.set_string("-hmm", os.path.join(self.MODEL_DIR, "en-us")) config.set_string("-lm", os.path.join(self.MODEL_DIR, "en-us.lm.bin")) config.set_string("-mdef", os.path.join(self.MODEL_DIR, "en-us/mdef")) config.set_string("-mean", os.path.join(self.MODEL_DIR, "en-us/means")) config.set_string( "-sendump", os.path.join(self.MODEL_DIR, "en-us/sendump") ) config.set_string( "-var", os.path.join(self.MODEL_DIR, "en-us/variances") ) null_path = "/dev/null" if sys.platform == "win32": null_path = "NUL" config.set_string("-logfn", null_path) return Decoder(config)
def create_decoder(): base = os.path.join(root(), 'pocketsphinx', 'zero_ru_cont_8k_v3') hmm = os.path.join(base, 'zero_ru.cd_semi_4000') # - mobile? # hmm = os.path.join(base, 'zero_ru.cd_cont_4000') # hmm = os.path.join(base, 'zero_ru.cd_ptm_4000') - mobile? dict = os.path.join(base, 'ru.dic.orig') # dict = os.path.join(base, 'ru.dic') lm = os.path.join(base, 'ru.lm.orig') # kws = os.path.join(base, 'ru.dic.orig.keywords') kws = os.path.join(base, 'keywords.mini') decoder_config = Decoder.default_config() decoder_config.set_string('-hmm', hmm) decoder_config.set_string("-lm", lm) # decoder_config.set_string('-keyphrase', 'алекса') # decoder_config.set_float('-kws_threshold', 1e-20) # decoder_config.set_string('-kws', kws) decoder_config.set_string('-dict', dict) decoder_config.set_boolean('-remove_noise', False) decoder_config.set_float('-samprate', 8000) decoder_config.set_string('-logfn', os.devnull) decoder = Decoder(decoder_config) return decoder
def speech_recog(self, model): # Create a decoder with certain model config = Decoder.default_config() config.set_string('-hmm', '/usr/local/share/pocketsphinx/model/en-us/en-us') config.set_int('-ds', 2) config.set_int('-topn', 3) config.set_int('-maxwpf', 5) #config.set_string('-kws', MODELDIR + model + '.txt') config.set_string('-lm', MODELDIR + model + '.lm') config.set_string('-dict', MODELDIR + model + '.dict') decoder = Decoder(config) decoder.start_utt() recog_text = '' with self.stream_in as stream: audio_generator = stream.generator() for content in audio_generator: decoder.process_raw(content, False, False) if decoder.hyp() and decoder.hyp().hypstr != '': recog_text += decoder.hyp().hypstr if len(recog_text) > 1: decoder.end_utt() logging.info("recog text: %s", recog_text) return recog_text return recog_text
def create_decoder(): path = os.path.dirname(os.path.realpath(__file__)) pocketsphinx_data = os.getenv('POCKETSPHINX_DATA', os.path.join(path, 'pocketsphinx')) hmm = os.getenv('POCKETSPHINX_HMM', os.path.join(pocketsphinx_data, 'tdt_sc_8k')) dict = os.getenv('POCKETSPHINX_DIC', os.path.join(pocketsphinx_data, 'keywords.dic')) kws = os.getenv('POCKETSPHINX_KWS', os.path.join(pocketsphinx_data, 'keywords.kws')) lm = os.getenv('POCKETSPHINX_LM', os.path.join(pocketsphinx_data, 'keywords.lm')) log = os.getenv('POCKETSPHINX_LOG', os.path.join(pocketsphinx_data, 'log')) config = Decoder.default_config() config.set_string('-hmm', hmm) config.set_string('-lm', lm) config.set_string('-dict', dict) # config.set_string('-kws', kws) # config.set_int('-samprate', SAMPLE_RATE) # uncomment if rate is not 16000. use config.set_float() on ubuntu config.set_int('-nfft', 512) #config.set_float('-vad_threshold', 2.7) config.set_string('-logfn', log) return Decoder(config)
def setup(self): # PocketSphinx configuration ps_config = Decoder.default_config() # Set recognition model to US ps_config.set_string('-hmm', os.path.join(get_model_path(), 'en-us')) ps_config.set_string( '-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict')) # Specify recognition key phrase ps_config.set_string('-keyphrase', self._tconfig['phrase']) ps_config.set_float('-kws_threshold', float(self._tconfig['threshold'])) # Hide the VERY verbose logging information when not in debug if logging.getLogger('alexapi').getEffectiveLevel() != logging.DEBUG: null_path = '/dev/null' if platform.system() == 'Windows': null_path = 'nul' ps_config.set_string('-logfn', null_path) # Process audio chunk by chunk. On keyword detected perform action and restart search self._detector = Decoder(ps_config)
def __init__(self, settings, action_queue, tts_queue, logger): NLUBase.__init__(self, settings, action_queue, None, tts_queue, logger) # Init private attributes self._rerun = True self._answer_sound_path = "sounds/answer.wav" self._config = Decoder.default_config() if not self._prepare_decoder(): self._must_run = False
def __init__(self, kws_threshold = 1e-40): # configuration. base_dir = os.path.dirname(__file__) modeldir = "../../../pocketsphinx/model/en-us" config = _Decoder.default_config() config.set_string('-hmm', os.path.join(base_dir, modeldir, 'en-us')) config.set_string('-dict', os.path.join(base_dir, modeldir, 'cmudict-en-us.dict')) config.set_float('-kws_threshold', kws_threshold) self.config = config self.decoder = None
def __init__(self, keyword, sensitivity): config = Decoder.default_config() config.set_string('-logfn', '/dev/null') config.set_string('-hmm', os.path.join(get_model_path(), 'en-us')) config.set_string('-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict')) config.set_string('-keyphrase', keyword if keyword != 'snowboy' else 'snow boy') config.set_float('-kws_threshold', 10 ** -sensitivity) self._decoder = Decoder(config) self._decoder.start_utt()
def __init__(self): # https://github.com/cmusphinx/pocketsphinx-python/blob/master/example.py config = Decoder.default_config() config.set_string('-logfn', '/dev/null') config.set_string('-hmm', os.path.join(get_model_path(), 'en-us')) config.set_string('-lm', os.path.join(get_model_path(), 'en-us.lm.bin')) config.set_string('-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict')) self._decoder = Decoder(config)
def start_listening(self): ''' Starts streaming. Pauses until self.resume has been called ''' config = Decoder.default_config() config.set_string('-hmm', path.join(self.model_dir, self.hmm)) config.set_string('-lm', path.join(self.model_dir, self.lm)) config.set_string('-dict', path.join(self.model_dir, self.dictionary)) config.set_string('-logfn', self.logfn) # This takes a while decoder = Decoder(config) p = pyaudio.PyAudio() print(self.input_source_index) stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, input_device_index=self.input_source_index, frames_per_buffer=1024) stream.start_stream() in_speech_bf = False decoder.start_utt() self.wait_to_resume_lock.acquire() while self.is_running: while self.paused: pass buf = stream.read(1024, exception_on_overflow=False) if buf: decoder.process_raw(buf, False, False) if decoder.get_in_speech() != in_speech_bf: in_speech_bf = decoder.get_in_speech() if not in_speech_bf: decoder.end_utt() # if self.wait_to_resume: # stream.stop_stream() phrase = decoder.hyp().hypstr if phrase != "": self.all_speech_data.append(phrase) # if self.wait_to_resume: # # print("waiting") # self.wait_to_resume_lock.acquire() # # print("resuming") # if self.wait_to_resume: # stream.start_stream() decoder.start_utt() else: break decoder.end_utt()
def create_config(self, dict_name): config = Decoder.default_config() config.set_string('-hmm', os.path.join(BASEDIR, 'model', self.lang, 'hmm')) config.set_string('-dict', dict_name) config.set_string('-keyphrase', self.key_phrase) config.set_float('-kws_threshold', float(self.threshold)) config.set_float('-samprate', self.sample_rate) config.set_int('-nfft', 2048) config.set_string('-logfn', '/dev/null') return config
def configure(self): config = Decoder.default_config() config.set_string('-hmm', os.path.join(BASEDIR, 'model', self.lang, 'hmm')) config.set_string('-dict', os.path.join(BASEDIR, 'model', self.lang, 'mycroft-en-us.dict')) config.set_string('-keyphrase', self.key_phrase) config.set_float('-kws_threshold', float('1e-45')) config.set_float('-samprate', self.sample_rate) config.set_int('-nfft', 2048) config.set_string('-logfn', '/dev/null') self.decoder = Decoder(config)
def configure(self): config = Decoder.default_config() config.set_string('-hmm', os.path.join(BASEDIR, 'model', self.lang, 'hmm')) config.set_string('-dict', os.path.join(BASEDIR, 'model', self.lang, 'mycroft-en-us.dict')) config.set_string('-keyphrase', self.key_phrase) config.set_float('-kws_threshold', float('1e-45')) config.set_float('-samprate', self.sample_rate) config.set_int('-nfft', 2048) config.set_string('-logfn', '/dev/null') self.decoder = Decoder(config)
def main(): abspath = os.path.dirname(os.path.abspath(__file__)) abspath = os.path.join(abspath, '..') model_dir = os.path.join(abspath, 'model') hmm = os.path.join(model_dir, HMM) lm = os.path.join(model_dir, LM) dic = os.path.join(model_dir, DIC) config = Decoder.default_config() config.set_string('-hmm', hmm) config.set_string('-lm', lm) config.set_string('-dict', dic) config.set_string('-logfn', '/dev/null') decoder = Decoder(config) p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=BUFFER) stream.start_stream() in_speech_bf = True decoder.start_utt() while True: buf = stream.read(BUFFER) if buf: decoder.process_raw(buf, False, False) if decoder.get_in_speech(): sys.stdout.write('.') sys.stdout.flush() if decoder.get_in_speech() == in_speech_bf: continue in_speech_bf = decoder.get_in_speech() if in_speech_bf: continue decoder.end_utt() try: if decoder.hyp().hypstr != '': print('You said:', decoder.hyp().hypstr) except AttributeError: pass decoder.start_utt() else: break decoder.end_utt() print('An Error occured:', decoder.hyp().hypstr)
def setup_pocketsphinx(self) -> None: self.logger.info("Setting up PocketSphinx.") self.MODELDIR = "resources/model" config = Decoder.default_config() config.set_string('-hmm', os.path.join(self.MODELDIR, 'es-es')) config.set_string('-lm', os.path.join(self.MODELDIR, 'es-es.lm')) config.set_string('-dict', os.path.join(self.MODELDIR, 'es.dict')) config.set_string('-logfn', '/dev/null') self.decoder = Decoder(config) self.prev_buf_is_speech = False self.decoder.start_utt() self.logger.info("Done setting up PocketSphinx.")
def main(): abspath = os.path.dirname(os.path.abspath(__file__)) abspath = os.path.join(abspath, '..') model_dir = os.path.join(abspath, 'model') hmm = os.path.join(model_dir, HMM) lm = os.path.join(model_dir, LM) dic = os.path.join(model_dir, DIC) config = Decoder.default_config() config.set_string('-hmm', hmm) config.set_string('-lm', lm) config.set_string('-dict', dic) config.set_string('-logfn', '/dev/null') decoder = Decoder(config) p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=BUFFER) stream.start_stream() in_speech_bf = True decoder.start_utt() while True: buf = stream.read(BUFFER) if buf: decoder.process_raw(buf, False, False) if decoder.get_in_speech(): sys.stdout.write('.') sys.stdout.flush() if decoder.get_in_speech() == in_speech_bf: continue in_speech_bf = decoder.get_in_speech() if in_speech_bf: continue decoder.end_utt() try: if decoder.hyp().hypstr != '': print('You said:', decoder.hyp().hypstr) except AttributeError: pass decoder.start_utt() else: break decoder.end_utt() print('An Error occured:', decoder.hyp().hypstr)
def init(): # Create a decoder with certain model config = Decoder.default_config() config.set_string('-logfn', os.path.join(settings.LOGS_DIR, 'passive-listen.log')) config.set_string('-hmm', os.path.join(settings.MODEL_DIR, 'en-US/acoustic-model')) config.set_string('-lm', os.path.join(settings.MODEL_DIR, 'en-US/language-model.lm.bin')) config.set_string('-dict', os.path.join(settings.MODEL_DIR, 'en-US/pronounciation-dictionary.dict')) # Decode streaming data global decoder, p decoder = Decoder(config) decoder.set_keyphrase('wakeup', settings.WAKE_UP_WORD) decoder.set_search('wakeup') p = pyaudio.PyAudio() global r r = speech_recognition.Recognizer()
def init(): # Create a decoder with certain model config = Decoder.default_config() config.set_string('-logfn', settings.POCKETSPHINX_LOG) config.set_string('-hmm', settings.ACOUSTIC_MODEL) config.set_string('-lm', settings.LANGUAGE_MODEL) config.set_string('-dict', settings.POCKET_DICT) # Decode streaming data global decoder, p decoder = Decoder(config) decoder.set_keyphrase('wakeup', settings.WAKE_UP_WORD) decoder.set_search('wakeup') p = pyaudio.PyAudio() global r r = speech_recognition.Recognizer()
def init(): # Create a decoder with certain model config = Decoder.default_config() config.set_string('-logfn', settings.POCKETSPHINX_LOG) config.set_string('-hmm', settings.ACOUSTIC_MODEL) config.set_string('-lm', settings.LANGUAGE_MODEL) config.set_string('-dict', settings.POCKET_DICT) # Decode streaming data global decoder, p decoder = Decoder(config) decoder.set_keyphrase('wakeup', settings.WAKE_UP_WORD) decoder.set_search('wakeup') p = pyaudio.PyAudio() global r r = speech_recognition.Recognizer()
def start_recognizer(self): """Function to handle lm or grammar processing of audio.""" config = Decoder.default_config() rospy.loginfo("Done initializing pocketsphinx") # Setting configuration of decoder using provided params config.set_string('-dict', self.dict) config.set_string('-lm', self.class_lm) config.set_string('-hmm', self.hmm) self.decoder = Decoder(config) # Start processing input audio self.decoder.start_utt() rospy.loginfo("Decoder started successfully") # Subscribe to audio topic rospy.Subscriber("recognizer/audio_ready", Bool, self.process_audio) rospy.spin()
def setup(self): # PocketSphinx configuration ps_config = Decoder.default_config() # Set recognition model to US ps_config.set_string('-hmm', os.path.join(get_model_path(), 'en-us')) ps_config.set_string('-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict')) # Specify recognition key phrase ps_config.set_string('-keyphrase', self._tconfig['phrase']) ps_config.set_float('-kws_threshold', float(self._tconfig['threshold'])) # Hide the VERY verbose logging information when not in debug if logging.getLogger('alexapi').getEffectiveLevel() != logging.DEBUG: ps_config.set_string('-logfn', '/dev/null') # Process audio chunk by chunk. On keyword detected perform action and restart search self._decoder = Decoder(ps_config)
def __init__(self, keyword, sensitivity): """ Constructor. :param keyword: keyword to be detected. :param sensitivity: detection sensitivity. """ # Set the configuration. config = Decoder.default_config() config.set_string('-logfn', '/dev/null') # Set recognition model to US config.set_string('-hmm', os.path.join(get_model_path(), 'en-us')) config.set_string('-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict')) config.set_string('-keyphrase', keyword) config.set_float('-kws_threshold', sensitivity) self._decoder = Decoder(config) self._decoder.start_utt()
def init(): # Be wary of an OSError due to a race condition if not os.path.exists(LOGS_DIR): os.makedirs(LOGS_DIR) # Create a decoder with certain model config = Decoder.default_config() config.set_string('-logfn', path.join(LOGS_DIR, 'passive-listen.log')) config.set_string('-hmm', path.join(MODEL_DIR, 'en-us\en-us')) config.set_string('-lm', path.join(MODEL_DIR, 'en-us\en-us.lm.dmp')) config.set_string('-dict', path.join(MODEL_DIR, 'en-us\cmudict-en-us.dict')) # Decode streaming data global decoder, p decoder = Decoder(config) decoder.set_keyphrase("wakeup", WAKE_UP_WORD) decoder.set_search("wakeup") p = pyaudio.PyAudio()
def create_decoder(): from pocketsphinx.pocketsphinx import Decoder path = os.path.dirname(os.path.realpath(__file__)) pocketsphinx_data = os.getenv('POCKETSPHINX_DATA', os.path.join(path, 'pocketsphinx-data')) hmm = os.getenv('POCKETSPHINX_HMM', os.path.join(pocketsphinx_data, 'hmm')) dict = os.getenv('POCKETSPHINX_DIC', os.path.join(pocketsphinx_data, 'dictionary.txt')) kws = os.getenv('POCKETSPHINX_KWS', os.path.join(pocketsphinx_data, 'keywords.txt')) config = Decoder.default_config() config.set_string('-hmm', hmm) config.set_string('-dict', dict) config.set_string('-kws', kws) # config.set_int('-samprate', SAMPLE_RATE) # uncomment if rate is not 16000. use config.set_float() on ubuntu config.set_int('-nfft', 512) config.set_float('-vad_threshold', 2.7) config.set_string('-logfn', os.devnull) return Decoder(config)
def __init__(self, engine_type, keyword, sensitivity): """Initializer. :param engine_type: type of the engine. :param keyword: keyword being used for detection. :param sensitivity: sensitivity passed to the engine. """ super().__init__(engine_type, keyword, sensitivity) # Set the configuration. config = Decoder.default_config() config.set_string('-logfn', '/dev/null') # Set recognition model to US config.set_string('-hmm', os.path.join(get_model_path(), 'en-us')) config.set_string('-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict')) config.set_string('-keyphrase', keyword) config.set_float('-kws_threshold', sensitivity) self._decoder = Decoder(config) self._decoder.start_utt()
def __init__(self, device_index=0, model_path=None): self._decoder = None self._pa = None self._device_no = device_index self._model_path = model_path # PocketSphinx configuration logging.info('Grammar file:' + os.path.join(model_path, self.GRAMMAR)) ps_config = Decoder.default_config() # Set recognition model to ... ps_config.set_string('-hmm', os.path.join(model_path, self.HMM)) ps_config.set_string('-dict', os.path.join(model_path, self.DIC)) ps_config.set_string('-jsgf', os.path.join(model_path, self.GRAMMAR)) ps_config.set_string('-logfn', '/dev/null') # Process audio chunk by chunk. On keyword detected perform action and restart search self._decoder = Decoder(ps_config) self._pa = pyaudio.PyAudio()
def __init__(self): self.MODELDIR = 'speech/' self.wav_name = 'media/temp.wav' self.raw_name = 'media/temp.raw' config = Decoder.default_config() config.set_string('-hmm', self.MODELDIR + 'ru_ru/') config.set_string('-dict', self.MODELDIR + 'ru.dic') self.decoder = Decoder(config) jsgf = Jsgf(self.MODELDIR + 'gr.gram') rule = jsgf.get_rule('gr.rule') fsg = jsgf.build_fsg(rule, self.decoder.get_logmath(), 7.5) fsg.writefile('gr.fsg') self.decoder.set_fsg('gr', fsg) self.decoder.set_search('gr') self.rec = Recognizer() self.mic = Microphone()
def recognize_phonemes(segments_path, phonemes_result_path): # Create a decoder with certain model config = Decoder.default_config() config.set_string('-hmm', join(model_dir, decoder_hmm)) config.set_string('-allphone', join(model_dir, decoder_allphone)) config.set_string('-dict', join(model_dir, decoder_dict)) config.set_float('-lw', decoder_lw) config.set_float('-pip', decoder_pip) config.set_float('-beam', decoder_beam) config.set_float('-pbeam', decoder_pbeam) config.set_boolean('-mmap', decoder_mmap) hyps = [] segs = [] self.decoder = Decoder(config) with open(segments_path, 'rb') as stream: in_speech_buffer = False self.decoder.start_utt() while True: buf = stream.read(decoder_stream_buf_size) if buf: self.decoder.process_raw(buf, False, False) if self.decoder.get_in_speech() != in_speech_buffer: in_speech_buffer = self.decoder.get_in_speech() if not in_speech_buffer: hyp_result, segment = _get_decoder_results() segs += segment hyps.append(hyp_result) self.decoder.start_utt() else: if in_speech_buffer: hyp_result, segment = _get_decoder_results() segs += segment hyps.append(hyp_result) break phonemes_dict = dict(hypotheses=hyps, segment_info=segs) phonemes_result = DecoderOutputSchema().dumps(phonemes_dict) with open(phonemes_result_path, 'w') as f: f.write(phonemes_result)
def __init__(self, gui): QThread.__init__(self, gui) if settings.sphinx_acoustic_model_dir == '': # use default acoustic model acoustic_model_directory = path.join(get_model_path(), 'en-us') else: # use custom acoustic model acoustic_model_directory = settings.sphinx_acoustic_model_dir config = Decoder.default_config() config.set_string('-hmm', acoustic_model_directory) # acoustic model config.set_string( '-dict', settings.prepared_lexicon_file) # lexicon pronunciation config.set_string( '-jsgf', settings.prepared_grammar_file) # language model from grammar config.set_string( '-logfn', settings.outputFileName(sphinx_decoder_log_file_base_name, ext='log')) self.listen = False self.decoder = Decoder(config) self.audio = None self.device = None
def speech_recog(self, model): # Create a decoder with certain model config = Decoder.default_config() config.set_string('-hmm', '/usr/local/share/pocketsphinx/model/en-us/en-us') config.set_int('-ds', 2) config.set_int('-topn', 3) config.set_int('-maxwpf', 5) #config.set_string('-kws', MODELDIR + model + '.txt') config.set_string('-lm', MODELDIR + model + '.lm') config.set_string('-dict', MODELDIR + model + '.dict') decoder = Decoder(config) decoder.start_utt() tstamp = time.time() recog_text = '' while len(recog_text) < 1: try: buf = self.stream_in.read(CHUNK_SIZE) logging.info("actual voice") decoder.process_raw(buf, False, False) if decoder.hyp().hypstr != '': recog_text += decoder.hyp().hypstr print "text: " + decoder.hyp().hypstr tstamp = time.time() except IOError as ex: if ex[1] != pyaudio.paInputOverflowed: raise buf = '\x00' * CHUNK_SIZE #white noise logging.info("white noise") except AttributeError: pass decoder.end_utt() logging.info("recog text: " + recog_text) return recog_text
def speech_recog(self, model): # Create a decoder with certain model config = Decoder.default_config() config.set_string('-hmm', '/usr/local/share/pocketsphinx/model/en-us/en-us') config.set_int('-ds', 2) config.set_int('-topn', 3) config.set_int('-maxwpf', 5) #config.set_string('-kws', MODELDIR + model + '.txt') config.set_string('-lm', MODELDIR + model + '.lm') config.set_string('-dict', MODELDIR + model + '.dict') decoder = Decoder(config) decoder.start_utt() tstamp = time.time() recog_text = '' while len(recog_text) < 1: try: buf = self.stream_in.read(CHUNK_SIZE) logging.info("actual voice") decoder.process_raw(buf, False, False) if decoder.hyp().hypstr != '': recog_text += decoder.hyp().hypstr print "text: " + decoder.hyp().hypstr tstamp = time.time() except IOError as ex: if ex[1] != pyaudio.paInputOverflowed: raise buf = '\x00' * CHUNK_SIZE #white noise logging.info("white noise") except AttributeError: pass decoder.end_utt() logging.info("recog text: " + recog_text) return recog_text
def init(): # Create a decoder with certain model config = Decoder.default_config() config.set_string('-logfn', os.path.join(settings.LOGS_DIR, 'passive-listen.log')) config.set_string('-hmm', os.path.join(settings.MODEL_DIR, 'en-US/acoustic-model')) config.set_string( '-lm', os.path.join(settings.MODEL_DIR, 'en-US/language-model.lm.bin')) config.set_string( '-dict', os.path.join(settings.MODEL_DIR, 'en-US/pronounciation-dictionary.dict')) # Decode streaming data global decoder, p decoder = Decoder(config) decoder.set_keyphrase('wakeup', settings.WAKE_UP_WORD) decoder.set_search('wakeup') p = pyaudio.PyAudio() global r r = speech_recognition.Recognizer()
def _create_decoder(config) -> Decoder: decoder_config = Decoder.default_config() decoder_config.set_string('-hmm', config.hmm) decoder_config.set_string('-dict', config.dict) decoder_config.set_boolean('-remove_noise', config.remove_noise) decoder_config.set_float('-samprate', config.sample_rate) decoder_config.set_string('-logfn', devnull) if config.lm is not None: decoder_config.set_string("-lm", config.lm) elif len(config.hotwords) == 1: decoder_config.set_string('-keyphrase', config.hotwords[0]) decoder_config.set_float('-kws_threshold', config.threshold) else: import os from tempfile import gettempdir path = os.path.join(gettempdir(), 'keywords.mini') f = open(path, 'w') f.writelines(['{} /{}/\n'.format(w, config.threshold) for w in config.hotwords]) f.flush() decoder_config.set_string('-kws', path) return Decoder(decoder_config)
def get_decoder(): from pocketsphinx.pocketsphinx import Decoder script_dir = os.path.dirname(os.path.realpath(__file__)) config = Decoder.default_config() config.set_string('-hmm', os.path.join(script_dir, 'model/hmm/en')) config.set_string('-dict', os.path.join(script_dir, 'model/respeaker.dic')) config.set_string('-kws', os.path.join(script_dir, 'model/keywords.txt')) # config.set_string('-keyphrase', 'respeaker') # config.set_float('-kws_threshold', 1e-43) config.set_int('-samprate', SAMPLE_RATE) config.set_int('-nfft', 2048) config.set_string('-logfn', os.devnull) try: decoder = Decoder(config) except Exception as e: print( "Maybe replace config.set_int('-samprate', SAMPLE_RATE) with config.set_float('-samprate', SAMPLE_RATE)" ) raise e return decoder
#!/usr/bin/python import sys, os from pocketsphinx.pocketsphinx import Decoder import pyaudio script_dir = os.path.dirname(os.path.realpath(__file__)) # Create a decoder with certain model config = Decoder.default_config() config.set_string("-logfn", os.devnull) config.set_string('-hmm', os.path.join(script_dir, 'model/hmm/en')) config.set_string('-dict', os.path.join(script_dir, 'model/keywords_en.dic')) if True: config.set_string('-kws', os.path.join(script_dir, 'model/keywords_en.txt')) else: config.set_string('-keyphrase', 'miss j') config.set_float('-kws_threshold', 1e-15) # Process audio chunk by chunk. On keyword detected perform action and restart search decoder = Decoder(config) decoder.start_utt() stream = None if len(sys.argv) > 1: stream = open(sys.argv[1], "rb") else: p = pyaudio.PyAudio()
#!/usr/bin/python import sys, os from pocketsphinx.pocketsphinx import Decoder import pyaudio script_dir = os.path.dirname(os.path.realpath(__file__)) # Create a decoder with certain model config = Decoder.default_config() config.set_string("-logfn", os.devnull) config.set_string('-hmm', os.path.join(script_dir, 'model/hmm/en')) config.set_string('-dict', os.path.join(script_dir, 'model/keywords_en.dic')) if True: config.set_string('-kws', os.path.join(script_dir, 'model/keywords_en.txt')) else: config.set_string('-keyphrase', 'miss j') config.set_float('-kws_threshold', 1e-15) # Process audio chunk by chunk. On keyword detected perform action and restart search decoder = Decoder(config) decoder.start_utt() stream = None if len(sys.argv) > 1: stream = open(sys.argv[1], "rb") else: p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16, channels=1,
def __init__(self, config): self.config = Decoder.default_config() for key in config: self.config.set_string("-" + key, config[key])
def main(): environment: str = os.getenv("ENVIRONMENT", "dev") config: Dict = load_config(environment) initialize_logger(level=config["logging"]["level"], filename=config["logging"]["filename"]) redis_host = config["redis"]["host"] redis_port = config["redis"]["port"] logger.debug(f"Connecting to redis at {redis_host}:{redis_port}") redis_client: Redis = Redis(host=redis_host, port=redis_port, db=0) logger.debug("Initializing PyAudio interface") audio = pyaudio.PyAudio() microphone_index = get_microphone_index(audio, config["microphone"]["name"]) logger.debug( f"Using microphone device '{config['microphone']['name']}' (card index {microphone_index})" ) logger.debug( f"Intializing pocketsphinx Decoder using model dir {MODELDIR}") decoder_config: DecoderConfig = Decoder.default_config() decoder_config.set_string("-hmm", os.path.join(MODELDIR, "en-us/en-us")) decoder_config.set_string("-lm", os.path.join(MODELDIR, "en-us/en-us.lm.bin")) decoder_config.set_string( "-dict", os.path.join(MODELDIR, "en-us/cmudict-en-us.dict")) decoder = Decoder(decoder_config) logger.debug("Opening audio stream") stream = audio.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=2048, input_device_index=microphone_index) stream.start_stream() in_speech_bf = False decoder.start_utt() try: logger.debug("Starting decoder loop") while cycle([True]): buf = stream.read(2048) if buf: logger.debug("Decoding raw audio") decoder.process_raw(buf, False, False) if decoder.get_in_speech() != in_speech_bf: logger.debug("GOT HERE") in_speech_bf = decoder.get_in_speech() if not in_speech_bf: decoder.end_utt() transcription = decoder.hyp().hypstr logger.debug(f"Result: {transcription}") redis_client.publish("subsystem.listener.recording", transcription) decoder.start_utt() else: logger.debug("Buffer closed. Ending") break decoder.end_utt() except Exception: logger.exception("Something bad happened") finally: redis_client.close()
def __worker(self, pipe, l_log): """The core of the STT program, this is the multiprocessed part Note: Multiprocessing will require a pipe between the parent and child subprocess. Since this is the case, the worker subprocess cannot access non-shared variables """ l_log.debug("STT worker started") audio_processor = AudioProcessor( ) # Create a new audio processing object text_processor = TextProcessor( ) # Remember that we can't load the text processor nltk model until the nltk model is set from the client language config = Decoder.default_config( ) # Create a new pocketsphinx decoder with the default configuration, which is English decoder = None nltk_model = None mutex_flags = {"keyphrases": {"use": False}} shutdown_flags = {"shutdown": False, "decoder": None} def send_json(pipe, to_send): """Internal worker method to send a json through the parent socket Arguments: pipe (:obj: socket): The response pipe to send to the parent process to_send (:obj: dict): A dictionary to be sent to the parent socket """ try: ret = self.__send_buffered( pipe, to_send ) # Send the message passed by argument back to the parent process if not ret[0]: l_log.error( "Failed to send buffered message to the parent process! (err: %s)" % ret[1]) except Exception as err: l_log.error("Failed to send json! (err: %s)" % str(err)) def send_error(pipe, error): """Internal worker method to send a json error through the parent socket Arguments: pipe (:obj: socket): The response pipe to send to the parent process error (str): The string error message to send """ send_json(pipe, {"error": error}) def load_models(pipe, config, models): """Internal worker method to load the language model Note: Some lanaguages take a long time to load. English is by far the fastest language to be loaded as a model. Arguments: pipe (:obj: socket): The response pipe to send to the parent process models (dict): The language and nltk models developed by the parent process Returns: (Decoder) The STT decoder object and the nltk model """ language_model = models["language_model"] nltk_model = models["nltk_model"] if False in [ language_model.is_valid_model(), nltk_model.is_valid_model() ]: l_log.error("The language model %s is invalid!" % str(language_model.name)) send_error(pipe, "Failed loading language model!") return # Load the model configurations into pocketsphinx config.set_string('-hmm', str(language_model.hmm)) config.set_string('-lm', str(language_model.lm)) config.set_string('-dict', str(language_model.dict)) decoder = Decoder(config) send_json( pipe, {"success": True}) # Send a success message to the client l_log.debug("Set the language model to %s" % str(language_model.name)) return decoder, nltk_model # Return the new decoder and nltk model def process_text(pipe, text, is_final, args): """Internal worker method to process the Speech To Text phrase Arguments: pipe (:obj: socket): The response pipe to send to the parent process text (str): The spoken text to further process is_final (boo): If the text being processed is the final text else it's a partial result args (dict): Any other flags specifically required for a final or partial speech result """ generate_keyphrases = mutex_flags["keyphrases"]["use"] keyphrases = [] if generate_keyphrases: text_processor.generate_keyphrases( text) # Generate keyphrases from the given text keyphrases_list = text_processor.get_keyphrases() for keyphrase in keyphrases_list: to_append_keyphrase = { "score": keyphrase[0], "keyphrase": keyphrase[1] } keyphrases.append(to_append_keyphrase) else: keyphrases = text # Don't do any processing and just pass the text into the keyphrases # Generate the json to be sent back to the client hypothesis_results = args hypothesis_results["keyphrases"] = generate_keyphrases if is_final: hypothesis_results["hypothesis"] = keyphrases else: hypothesis_results["partial_hypothesis"] = keyphrases print(hypothesis_results) # Send the results back to the client send_json(pipe, hypothesis_results) def start_audio(pipe, decoder, args): """Internal worker method to start the audio processing chunk sequence Note: This must be called before the process_audio method or the STT engine will not process the audio chunks Arguments: pipe (:obj: socket): The response pipe to send to the parent process decoder (Decoder): The pocketsphinx decoder to control the STT engine args (dict): All of the available arguments passed by the parent process """ if decoder is None: l_log.error("Language model is not loaded") send_error(pipe, "Language model not loaded!") send_json(pipe, {"decoder": False}) return l_log.debug("Starting the audio processing...") decoder.start_utt() # Start the pocketsphinx listener # Tell the client that the decoder has successfully been loaded send_json(pipe, {"decoder": True}) def process_audio(pipe, decoder, args): """Internal worker method to process an audio chunk Note: The audio chunk is expected to be in base64 format Arguments: pipe (:obj: socket): The response pipe to send to the parent process decoder (Decoder): The pocketsphinx decoder to control the STT engine args (dict): All of the available arguments passed by the parent process """ if decoder is None: l_log.error("Language model is not loaded") send_error(pipe, "Language model not loaded!") return l_log.debug("Processing audio chunk!") audio_chunk = args["audio"] # Retrieve the audio data processed_wav = audio_processor.process_chunk( audio_chunk) # Process the base64 wrapped audio data l_log.debug("Recognizing speech...") decoder.process_raw( processed_wav, False, False) # Process the audio chunk through the STT engine hypothesis = decoder.hyp() # Get pocketshpinx's hypothesis # Send back the results of the decoding if hypothesis is None: l_log.debug("Silence detected") send_json(pipe, { "partial_silence": True, "partial_hypothesis": None }) else: hypothesis_results = { "partial_silence": False if len(hypothesis.hypstr) > 0 else True, } l_log.debug("Partial speech detected: %s" % str(hypothesis.hypstr)) process_text(pipe, hypothesis.hypstr, False, hypothesis_results) l_log.debug("Done decoding speech from audio chunk!") def stop_audio(pipe, decoder, args): """Internal worker method to stop the audio processing chunk sequence Note: This must be called after the process_audio method or the STT engine will continue to listen for audio chunks Arguments: pipe (:obj: socket): The response pipe to send to the parent process decoder (Decoder): The pocketsphinx decoder to control the STT engine args (dict): All of the available arguments passed by the parent process """ if decoder is None: l_log.error("Language model is not loaded") send_error(pipe, "Language model not loaded!") send_json({"decoder": False}) return l_log.debug("Stopping the audio processing...") decoder.end_utt() # Stop the pocketsphinx listener l_log.debug("Done recognizing speech!") hypothesis = decoder.hyp() # Get pocketshpinx's hypothesis logmath = decoder.get_logmath() # Send back the results of the decoding if hypothesis is None: l_log.debug("Silence detected") send_json(pipe, {"silence": True, "hypothesis": None}) else: hypothesis_results = { "silence": False if len(hypothesis.hypstr) > 0 else True, "score": hypothesis.best_score, "confidence": logmath.exp(hypothesis.prob) } l_log.debug("Speech detected: %s" % str(hypothesis.hypstr)) process_text(pipe, hypothesis.hypstr, True, hypothesis_results) def shutdown_thread(self, l_log): """Worker method to handle the checking of a shutdown call Note: To reduce overhead, this thread will only be called every 100 milliseconds """ while not shutdown_flags["shutdown"]: try: if self._shutdown_event.is_set(): l_log.debug("Shutting down worker thread!") shutdown_flags["shutdown"] = True # Exit the main loop if shutdown_flags["decoder"] is not None: try: shutdown_flags["decoder"].end_utt() except Exception as err: l_log.debug( "STT decoder object returned a non-zero status" ) else: l_log.warning( "The decoder object is already None!") break sleep(0.1) except Exception as err: l_log.error( "Failed shutting down worker thread! (err: %s)" % str(err)) shutdown_t = Thread(target=shutdown_thread, args=( self, l_log, )) shutdown_t.setDaemon(True) shutdown_t.start() p_out, p_in = pipe while not shutdown_flags["shutdown"]: try: try: command = self.__get_buffered( p_out) # Wait for a command from the parent process if "set_models" in command[ "exec"]: # Check to see if our command is to decoder, nltk_model = load_models( p_out, config, command["args"]) text_processor.set_nltk_model( nltk_model) # Set the text processor nltk model shutdown_flags["decoder"] = decoder elif "start_audio" in command["exec"]: start_audio(p_out, decoder, command["args"]) elif "process_audio" in command["exec"]: process_audio(p_out, decoder, command["args"]) elif "stop_audio" in command["exec"]: stop_audio(p_out, decoder, command["args"]) elif "set_keyphrases" in command["exec"]: mutex_flags["keyphrases"] = command["args"] else: l_log.error("Invalid command %s" % str(command)) send_error(socket, "Invalid command!") except (EOFError, IOError) as err: continue except Exception as err: l_log.error( "Failed recieving command from subprocess (id: %d) (err: %s)" % (current_process().pid, str(err)))