def get_text_from_audio(audio_input_name: str, working_directory: str = WORKING_DIRECTORY): """ Gets text from audio file (using pocketsphinx-python library) Args: Return: list: text from audio file """ # Create a decoder with certain model config = Decoder.default_config() config.set_string('-hmm', os.path.join(SPEECH_MODEL_PATH, 'en-us')) config.set_string('-lm', os.path.join(SPEECH_MODEL_PATH, 'en-us.lm.bin')) config.set_string('-dict', os.path.join(SPEECH_MODEL_PATH, 'cmudict-en-us.dict')) decoder = Decoder(config) # Decode streaming data. decoder.start_utt() with open(os.path.join(working_directory, audio_input_name), 'rb') as stream: while True: buf = stream.read(1024) if buf: decoder.process_raw(buf, False, False) else: break decoder.end_utt() text_from_audio = [seg.word for seg in decoder.seg()] return text_from_audio if text_from_audio else 'Audio file doesn\'t contain words'
def recog_wav(MODELDIR, wavfile): #print(MODELDIR) config = Decoder.default_config() config.set_string('-hmm', os.path.join(MODELDIR, 'en-us')) config.set_string('-lm', os.path.join(MODELDIR, 'en-us.lm.bin')) config.set_string('-dict', os.path.join(MODELDIR, 'cmudict-en-us.dict')) # Decode streaming data. decoder = Decoder(config) start = time.time() decoder.start_utt() wav_stream = open(wavfile, "rb") while True: buffer = wav_stream.read(1024) if buffer: decoder.process_raw(buffer, False, False) else: break decoder.end_utt() duration = time.time() - start print("Duration: " + str(duration)) #Benchmarking for seg in decoder.seg(): print(seg.word)
def recognize(wav_file): #BASE_PATH = os.path.dirname(os.path.realpath(__file__)) #HMDIR = os.path.join(BASE_PATH, "hmm") #LMDIR = os.path.join(BASE_PATH, "lm/en-us.lm.bin") #DICTD = os.path.join(BASE_PATH, "dict/en_in.dic") sound = "try1.wav" model_path = get_model_path() data_path = get_data_path() config = DefaultConfig() config.set_string('-hmm', "hmm/") config.set_string('-lm', 'lm\en-us.lm.bin') config.set_string('-dict', 'dict\en_in.dic') #decoder = Decoder(config) """ Run speech recognition on a given file. """ speech_rec = Decoder(config) print("Decoder Initialized") wav_file = wave.open(wav_file, 'rb') print("AudioFile Loaded") speech_rec.decode_raw(wav_file) print("Audio file decoded") result = speech_rec.get_hyp() print("Result Ready\n") return result
def onStart(self): super().onStart() if not self.checkLanguage(): self.downloadLanguage() try: pocketSphinxPath = self.getPocketSphinxPath() except: raise self._config = Decoder.default_config() self._config.set_string( '-hmm', f'{pocketSphinxPath}/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}' ) self._config.set_string( '-lm', f'{pocketSphinxPath}/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}.lm.bin' ) self._config.set_string( '-dict', f'{pocketSphinxPath}/model/cmudict-{self.LanguageManager.activeLanguageAndCountryCode.lower()}.dict' ) self._decoder = Decoder(self._config)
def __init__(self, key_phrase, dict_file, hmm_folder, threshold=1e-90, chunk_size=-1): from pocketsphinx import Decoder config = Decoder.default_config() config.set_string('-hmm', hmm_folder) config.set_string('-dict', dict_file) config.set_string('-keyphrase', key_phrase) config.set_float('-kws_threshold', float(threshold)) config.set_float('-samprate', 16000) config.set_int('-nfft', 2048) config.set_string('-logfn', '/dev/null') self.key_phrase = key_phrase self.buffer = b'\0' * pr.sample_depth * pr.buffer_samples self.pr = pr self.read_size = -1 if chunk_size == -1 else pr.sample_depth * chunk_size try: self.decoder = Decoder(config) except RuntimeError: options = dict(key_phrase=key_phrase, dict_file=dict_file, hmm_folder=hmm_folder, threshold=threshold) raise RuntimeError('Invalid Pocketsphinx options: ' + str(options))
def __init__(self): JarvisIOHandler.__init__(self) hmm = '/usr/local/share/pocketsphinx/model/en-us/en-us' dic ='/usr/local/share/pocketsphinx/model/en-us/cmudict-en-us.dict' lm ='/usr/local/share/pocketsphinx/model/en-us/en-us.lm.bin' config = Decoder.default_config() config.set_string('-hmm',hmm) config.set_string('-lm',lm) config.set_string('-dict',dic) config.set_string('-logfn','/dev/null') self.decoder = Decoder(config) self.microphone = pyaudio.PyAudio() pyvona_config = open('configs/pyvona.txt') pvcfg = pyvona_config.readlines() pyvona_config.close() self.voice = pyvona.create_voice(pvcfg[0].strip(),pvcfg[1].strip()) self.voice.region = 'us-west' self.voice.voice_name='Brian' self.voice.sentence_break = 200 googleSTT_config = open('configs/GoogleSTT.txt') self.key = googleSTT_config.readlines()[0].strip() googleSTT_config.close() self.recognizer = sr.Recognizer() with sr.Microphone() as source: self.recognizer.adjust_for_ambient_noise(source)
def pocket(): ps = Pocketsphinx() language_directory = os.path.dirname(os.path.realpath(__file__)) print language_directory acoustic_parameters_directory = os.path.join(language_directory, "acoustic-model") language_model_file = os.path.join(language_directory, "language-model.lm.bin") phoneme_dictionary_file = os.path.join(language_directory, "pronounciation-dictionary.dict") config = Decoder.default_config() config.set_string("-hmm", acoustic_parameters_directory) # set the path of the hidden Markov model (HMM) parameter files config.set_string("-lm", language_model_file) config.set_string("-dict", phoneme_dictionary_file) decoder = Decoder(config) with sr.AudioFile(s_dir + "/a bad situation could become dramatically worse. /a bad situation could become dramatically worse. .wav") as source: audio_data = r.record(source) decoder.start_utt() decoder.process_raw(audio_data, False, True) decoder.end_utt() print decoder.hyp() ps.decode( audio_file=os.path.join(s_dir, 'a bad situation could become dramatically worse. /a bad situation could become dramatically worse. .wav'), buffer_size=2048, no_search=False, full_utt=False) print(ps.hypothesis()) # => ['<s>', '<sil>', 'go', 'forward', 'ten', 'meters', '</s>'] #pocket()
def record(listen_time): THRESHOLD = None WAVE_OUTPUT_FILENAME = "livewav.wav" p = pyaudio.PyAudio() if THRESHOLD == None: THRESHOLD = fetchThreshold() print THRESHOLD stream = p.open(format=FORMAT, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) print "* recording" frames = [] detected = False for i in range(0, RATE / CHUNK * listen_time): data = stream.read(CHUNK) frames.append(data) score = getScore(data) if score < THRESHOLD: continue else: detected = True if not detected: print "nothing detected" return("") print "* done recording" # stream.stop_stream() stream.close() p.terminate() # write data to WAVE file data = ''.join(frames) wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(1) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(data) wf.close() sysdir = os.getcwd() wavfile = sysdir + "/livewav.wav" config = Decoder.default_config() config.set_string('-hmm', hmdir) config.set_string('-lm', lmdir) config.set_string('-dict', dictd) config.set_string('-logfn', '/dev/null') speechRec = Decoder(config) with open(wavfile, 'rb') as wavFile: speechRec.decode_raw(wavFile) #result = speechRec.get_hyp() return(speechRec.hyp().hypstr)
def create_decoder(): model_path = get_model_path() config = Decoder.default_config() config.set_string("-hmm", os.path.join(model_path, "en-us")) config.set_string("-lm", os.path.join(model_path, "en-us.lm.bin")) config.set_string("-dict", os.path.join(model_path, "cmudict-en-us.dict")) config.set_string("-logfn", os.devnull) return Decoder(config)
def prepareDecoder(self, pGramma): ''' Entry point where sphinx decoder is initialized or grammar updated ''' if self.decoder is None: self.config = self.createConfig(pGramma) self.decoder = Decoder(self.config) else: self.updateGrammar(self.decoder, pGramma)
def __init__(self, key_phrase, phonemes, threshold, sample_rate=16000, lang="en-us"): self.lang = lang self.key_phrase = key_phrase self.sample_rate = sample_rate self.threshold = threshold self.phonemes = phonemes dict_name = self.create_dict(key_phrase, phonemes) self.decoder = Decoder(self.create_config(dict_name))
def listen(MODE): CORPUS = 6278 model_path = get_model_path() home_path = "/home/the0s/Desktop/HCR_Python" print(model_path) print(home_path) DATADIR = "/usr/local/lib/python2.7/dist-packages/pocketsphinx/data" config = Decoder.default_config() config.set_string('-hmm', os.path.join(model_path, 'hub4wsj_sc_8k')) config.set_string('-lm', os.path.join(home_path, str(CORPUS) + '.lm.bin')) config.set_string('-dict', os.path.join(home_path, str(CORPUS) + '.dic')) config.set_string('-logfn', '/dev/null') decoder = Decoder(config) p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024) stream.start_stream() in_speech_bf = False decoder.start_utt() while True: buf = stream.read(1024) if buf: decoder.process_raw(buf, False, False) if decoder.get_in_speech() != in_speech_bf: in_speech_bf = decoder.get_in_speech() if not in_speech_bf: decoder.end_utt() if decoder.hyp() is not None: buf = [s for s in decoder.hyp().hypstr.split()] print(buf) if len(buf) > 0: if MODE == 0: #DrinkRequest for item in buf: if checkRequest(item) != "NONE": output = checkRequest(item) stream.stop_stream() stream.close() return output if MODE == 1: #DrinkConfirm for item in buf: if checkConfirm(item) != "NONE": output = checkConfirm(item) stream.stop_stream() stream.close() return output decoder.start_utt() else: break decoder.end_utt()
def __init__(self): MODELDIR = get_model_path() CURR_DIR = os.path.dirname(os.path.realpath(__file__)) KEYPHRASE_THRESH_DIR = CURR_DIR + '/keyphrases.thresh' # Create a decoder with certain model config = Decoder.default_config() config.set_string('-hmm', os.path.join(MODELDIR, 'en-us')) config.set_string('-dict', \ os.path.join(MODELDIR, 'cmudict-en-us.dict')) config.set_string('-kws', KEYPHRASE_THRESH_DIR) #config.set_string('-logfn', '/dev/null') decoder = Decoder(config) p = pyaudio.PyAudio() host_info = p.get_host_api_info_by_index(0) device_index = 3 for i in range(host_info.get('deviceCount')): device_info = p.get_device_info_by_host_api_device_index(0, i) #print('\n\n\n\n'+str(i)+device_info.get('name') + " : " + str(device_info.get('maxInputChannels'))) if 'USB' in device_info.get('name'): device_index = i break ''' fire /1e18/ ''' stream = p.open( format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=1024, input_device_index=device_index) stream.start_stream() in_speech_bf = True decoder.start_utt() print("Starting to listen") while True: buf = stream.read(1024, exception_on_overflow = False) decoder.process_raw(buf, False, False) if decoder.hyp() != None: print("\nDetected: " + decoder.hyp().hypstr + "\n") decoder.end_utt() #print "Detected Move Forward, restarting search" decoder.start_utt() print("Am not listening any more") stream.stop_stream() stream.close() p.terminate()
def passiverecord(THRESHOLD=None): FORMAT = pyaudio.paInt16 CHANNELS = 1 LISTEN_TIME = 2 WAVE_OUTPUT_FILENAME = "passive.wav" p = pyaudio.PyAudio() if THRESHOLD == None: THRESHOLD = fetchThreshold() print THRESHOLD stream = p.open(format=FORMAT, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) # print "* recording" frames = [] lastN = [THRESHOLD * 1.2 for i in range(30)] for i in range(0, RATE / CHUNK * LISTEN_TIME): data = stream.read(CHUNK) frames.append(data) score = getScore(data) lastN.pop(0) lastN.append(score) average = sum(lastN) / float(len(lastN)) # print average,THRESHOLD * 0.8 if average < THRESHOLD * 0.8: break # print "* done recording" # stream.stop_stream() stream.close() p.terminate() # write data to WAVE file data = ''.join(frames) wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(1) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(data) wf.close() sysdir = os.getcwd() wavfile = sysdir + "/passive.wav" # decoded=decodepassive() speechRec = Decoder(hmm=hmdir, lm=lmdir, dict=dictd) with open(wavfile, 'rb') as wavFile: speechRec.decode_raw(wavFile) result = speechRec.get_hyp() return (result[0])
def onStart(self): super().onStart() if not self.checkLanguage(): self.downloadLanguage() self._config = Decoder.default_config() self._config.set_string('-hmm', f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}') self._config.set_string('-lm', f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}.lm.bin') self._config.set_string('-dict', f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/cmudict-{self.LanguageManager.activeLanguageAndCountryCode.lower()}.dict') self._decoder = Decoder(self._config)
def __init__(self, key_phrase="hey mycroft", config=None, lang="en-us"): super().__init__(key_phrase, config, lang) # Hotword module imports from pocketsphinx import Decoder # Hotword module params self.phonemes = self.config.get("phonemes", "HH EY . M AY K R AO F T") self.num_phonemes = len(self.phonemes.split()) self.threshold = self.config.get("threshold", 1e-90) self.sample_rate = self.listener_config.get("sample_rate", 1600) dict_name = self.create_dict(self.key_phrase, self.phonemes) config = self.create_config(dict_name, Decoder.default_config()) self.decoder = Decoder(config)
def retrieve_scores(word): filename = word + '.wav' grammarname = word + '-align.jsgf' model_path = get_model_path() # Initialize the config values config = DefaultConfig() config.set_boolean('-verbose', False) config.set_string('-hmm', os.path.join(model_path, 'en-us')) config.set_boolean('-lm', False) config.set_string('-dict', 'phonemes.dict.txt') config.set_boolean('-backtrace', True) config.set_boolean('-bestpath', False) config.set_boolean('-fsgusefiller', False) decoder = Decoder(config) # Set the search to JSGF Grammar jsgf = Jsgf(grammarname) rule = jsgf.get_rule('forcing.' + word) decoder.set_jsgf_file('grammar', grammarname) decoder.set_search('grammar') stream = open(filename, 'rb') utt_started = False scores = [] decoder.start_utt() while True: buf = stream.read(1024) if buf: decoder.process_raw(buf, False, False) in_speech = decoder.get_in_speech() if (in_speech and not utt_started): utt_started = True if (not in_speech and utt_started): decoder.end_utt() hyp = decoder.hyp() if hyp is not None: print('hyp: %s' % (hyp.best_score)) print_segments(decoder) scores = retrieve_segments(decoder) decoder.start_utt() utt_started = False else: break decoder.end_utt() print('scores:', scores) return scores
def __init__(self, key_phrase, phonemes, threshold, sample_rate=16000, lang="en-us"): self.lang = str(lang) self.key_phrase = str(key_phrase) print("####key_phrase-->", key_phrase) self.sample_rate = sample_rate self.threshold = threshold self.phonemes = phonemes print("####phonemes -->", phonemes) dict_name = self.create_dict(key_phrase, phonemes) print("####dict_name --->", dict_name) self.decoder = Decoder(self.create_config(dict_name))
def get_phonemes(file): # Decode streaming data decoder = Decoder(config) decoder.start_utt() stream = open(file, 'rb') i=0 while True: buf = stream.read(1024) if buf: decoder.process_raw(buf, False, False) else: break decoder.end_utt() Hypothesis = decoder.hyp() return [seg.word for seg in decoder.seg()]
def init(): # Create a decoder with certain model config = DefaultConfig() # config.set_string('-logfn', settings.POCKET_LOG) config.set_string('-hmm', settings.POCKET_HMM_ACOUSTIC_MODEL) config.set_string('-lm', settings.POCKET_LANGUAGE_MODEL) config.set_string('-dict', settings.POCKET_DICTIONARY) # config.set_string('-kws', settings.POCKET_KEYPHRASES) # Decode streaming data global decoder, p decoder = Decoder(config) p = pyaudio.PyAudio() # Set up speech recognition recogniser global r r = speech_recognition.Recognizer()
def __init__(self, key_phrase="hey mycroft", config=None, lang="en-us"): super(PocketsphinxHotWord, self).__init__(key_phrase, config, lang) # Hotword module imports from pocketsphinx import Decoder # Hotword module config module = self.config.get("module") if module != "pocketsphinx": LOG.warning( str(module) + " module does not match with " "Hotword class pocketsphinx") # Hotword module params self.phonemes = self.config.get("phonemes", "HH EY . M AY K R AO F T") self.num_phonemes = len(self.phonemes.split()) self.threshold = self.config.get("threshold", 1e-90) self.sample_rate = self.listener_config.get("sample_rate", 1600) dict_name = self.create_dict(self.key_phrase, self.phonemes) config = self.create_config(dict_name, Decoder.default_config()) self.decoder = Decoder(config)
def __init__(self, rt, on_activation: Callable): super().__init__(rt, on_activation) lang = rt.config['lang'] self.hmm_folder = join(rt.paths.user_config, 'models', lang) self.rate, self.width = self.rec_config['sample_rate'], self.rec_config['sample_width'] self.padding = b'\0' * int(self.rate * self.width * self.SILENCE_SEC) self.buffer = b'' download_extract_tar(self.url.format(lang=lang), self.hmm_folder) config = Decoder.default_config() config.set_string('-hmm', self.hmm_folder) config.set_string('-dict', self._create_dict(self.wake_word, self.config['phonemes'])) config.set_string('-keyphrase', self.wake_word) config.set_float('-kws_threshold', float(self.config['threshold'])) config.set_float('-samprate', self.rate) config.set_int('-nfft', 2048) config.set_string('-logfn', '/dev/null') self.ps = Decoder(config)
def __init__(self): config = get_decoder_config() self.decoder = Decoder(config) self.speech = pyttsx3.init() self.audio = sphinxbase.Ad(self.audio_device, self.sampling_rate) self.buffer = bytearray(self.buffer_size) self.default_search = self.decoder.get_search() self.in_speech = False self.max_history = 100 self.phrases = [] self.prompts = {} self.next_prompt_id = 1 self.current_prompt = None self.prompt_queue = queue.Queue()
def init(): # Create a decoder with certain model config = DefaultConfig() config.set_string('-logfn', settings.POCKETSPHINX_LOG) #config.set_string('-hmm', settings.ACOUSTIC_MODEL) config.set_string('-hmm', os.path.join(get_model_path(), 'en-us')) config.set_string('-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict')) #config.set_string('-lm', settings.LANGUAGE_MODEL) config.set_string('-kws', settings.KEYPHRASES) #config.set_string('-dict', settings.POCKET_DICT) # Decode streaming data global decoder, p decoder = Decoder(config) p = pyaudio.PyAudio() global r r = speech_recognition.Recognizer()
def run(self): conf = Decoder.default_config() conf.set_string('-hmm', self.config.hmmPS) conf.set_string('-lm', self.config.lmPS) conf.set_string('-dict', self.config.dictPS) if os.path.isfile(self.config.mllrPS): conf.set_string('-mllr', self.config.mllrPS) decoder = Decoder(conf) p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024) stream.start_stream() self.samplewith = p.get_sample_size(pyaudio.paInt16) in_speech_bf = True decoder.start_utt('') while not self._terminate: buf = stream.read(1024) if buf: if self.save: self.liSave.append(buf) self.numSave += 1 if self.numSave > self.maxSave: # nos protegemos de dejar el microfono encendido self.activeSave(self.fichWAV) decoder.process_raw(buf, False, False) if decoder.get_in_speech() != in_speech_bf: in_speech_bf = decoder.get_in_speech() if not in_speech_bf: decoder.end_utt() try: if decoder.hyp().hypstr != '': self.decode(decoder.hyp().hypstr) except AttributeError: pass decoder.start_utt('') else: break decoder.end_utt()
def __init__(self, file_name='aux.wav', raspi=False, local=True): ## load environment self.FILE_NAME = file_name self.audio = pyaudio.PyAudio() self.raspi = raspi self.local = local self.config = Decoder.default_config() self.config.set_string('-hmm', os.path.join(self.MODELDIR, 'acoustic-model')) self.config.set_string( '-dict', os.path.join(self.MODELDIR, 'pronounciation-dictionary.dict')) self.config.set_string('-logfn', os.devnull) self.decoder = Decoder(self.config) self.r = sr.Recognizer() print("adjunting...") with sr.Microphone() as source: self.r.adjust_for_ambient_noise(source) # tts if self.local: self.tts = pyttsx3.init() self.tts.setProperty('rate', self.RATE) self.tts.setProperty('volume', self.VOLUME) self.tts.setProperty('voice', 'spanish-latin-am') else: # Instantiates a client self.tts_client = texttospeech.TextToSpeechClient() # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral") self.tts_voice = texttospeech.types.VoiceSelectionParams( language_code='es-ES', ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE) # Select the type of audio file you want returned self.tts_audio_config = texttospeech.types.AudioConfig( audio_encoding=texttospeech.enums.AudioEncoding.MP3)
def begin_passive_listening(self): """Uses PocketSphinx to listen for the wakeword and call the active listening function """ config = Decoder.default_config() config.set_string('-hmm', os.path.join(get_model_path(), 'en-us')) config.set_string('-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict')) config.set_string('-keyphrase', self.config.get("general", "wake_word")) config.set_string('-logfn', 'nul') config.set_float('-kws_threshold', 1e-10) p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024) stream.start_stream() decoder = Decoder(config) decoder.start_utt() while True: buf = stream.read(1024) decoder.process_raw(buf, False, False) if decoder.hyp() is not None: logging.debug("Wake word recognized") speech_input = self.active_listen() if (speech_input != -1 and speech_input != -2 and speech_input != -3): for name, command in self.commands.items(): if speech_input in name: command() elif speech_input == -1: self.speak("Sorry, I didn't catch that.") decoder.end_utt() decoder.start_utt() logging.debug("Listening for wakeword again")
def __init__(self, file_name='aux.wav', raspi=False): self.FILE_NAME = file_name self.audio = pyaudio.PyAudio() self.raspi = raspi self.config = Decoder.default_config() self.config.set_string('-hmm', os.path.join(self.MODELDIR, 'acoustic-model')) self.config.set_string( '-dict', os.path.join(self.MODELDIR, 'pronounciation-dictionary.dict')) self.config.set_string('-logfn', os.devnull) self.decoder = Decoder(self.config) self.r = sr.Recognizer() print("adjunting...") with sr.Microphone() as source: self.r.adjust_for_ambient_noise(source) # tts self.tts = pyttsx3.init() self.tts.setProperty('rate', self.RATE) self.tts.setProperty('volume', self.VOLUME) self.tts.setProperty('voice', 'spanish-latin-am')
def __init__(self, in_fs, out_fs, mute_period_length, kws_frame_length): threading.Thread.__init__(self) # 初始化配置 self.daemon = True self.exit_flag = False self.in_fs = in_fs self.out_fs = out_fs self.mute_period_frames_count = int(in_fs * mute_period_length) self.kws_frames_count = int(in_fs * kws_frame_length) model_path = get_model_path() config = Decoder.default_config() config.set_string('-hmm', os.path.join(model_path, 'en-us')) # 声学模型路径 # config.set_string('-lm',"./tests/7567.lm") config.set_string('-dict', os.path.join(model_path, 'cmudict-en-us.dict')) # 字典路径 config.set_string('-keyphrase', 'alexa') config.set_float('-kws_threshold', 1e-20) config.set_string('-logfn', './logs/tmp') # INFO输出到其他位置 self.decoder = Decoder(config) self.decoder.start_utt() self.start()
def load_decoder(myid, model_config, out): # Create a decoder with certain model pocketsphinx_config = DefaultConfig() model_name = model_config.sections()[0] hmm = model_config[model_name]['hmm'] dict = model_config[model_name]['dict'] lm = model_config[model_name]['lm'] # logfn = model_config[model_name]['log'] logfn = '{}_{}.log'.format(out, myid) if not os.path.exists(hmm): print('ERROR: {} does not exist'.format(hmm)) sys.exit(-2) if not os.path.exists(lm): print('ERROR: {} does not exist'.format(lm)) sys.exit(-4) if not os.path.exists(dict): print('ERROR: {} does not exist'.format(dict)) sys.exit(-5) pocketsphinx_config.set_string('-hmm', hmm) pocketsphinx_config.set_string('-lm', lm) pocketsphinx_config.set_string('-dict', dict) pocketsphinx_config.set_string('-logfn', logfn) decoder_engine = Decoder(pocketsphinx_config) return decoder_engine