def pocket(): ps = Pocketsphinx() language_directory = os.path.dirname(os.path.realpath(__file__)) print language_directory acoustic_parameters_directory = os.path.join(language_directory, "acoustic-model") language_model_file = os.path.join(language_directory, "language-model.lm.bin") phoneme_dictionary_file = os.path.join(language_directory, "pronounciation-dictionary.dict") config = Decoder.default_config() config.set_string("-hmm", acoustic_parameters_directory) # set the path of the hidden Markov model (HMM) parameter files config.set_string("-lm", language_model_file) config.set_string("-dict", phoneme_dictionary_file) decoder = Decoder(config) with sr.AudioFile(s_dir + "/a bad situation could become dramatically worse. /a bad situation could become dramatically worse. .wav") as source: audio_data = r.record(source) decoder.start_utt() decoder.process_raw(audio_data, False, True) decoder.end_utt() print decoder.hyp() ps.decode( audio_file=os.path.join(s_dir, 'a bad situation could become dramatically worse. /a bad situation could become dramatically worse. .wav'), buffer_size=2048, no_search=False, full_utt=False) print(ps.hypothesis()) # => ['<s>', '<sil>', 'go', 'forward', 'ten', 'meters', '</s>'] #pocket()
def __init__(self): JarvisIOHandler.__init__(self) hmm = '/usr/local/share/pocketsphinx/model/en-us/en-us' dic ='/usr/local/share/pocketsphinx/model/en-us/cmudict-en-us.dict' lm ='/usr/local/share/pocketsphinx/model/en-us/en-us.lm.bin' config = Decoder.default_config() config.set_string('-hmm',hmm) config.set_string('-lm',lm) config.set_string('-dict',dic) config.set_string('-logfn','/dev/null') self.decoder = Decoder(config) self.microphone = pyaudio.PyAudio() pyvona_config = open('configs/pyvona.txt') pvcfg = pyvona_config.readlines() pyvona_config.close() self.voice = pyvona.create_voice(pvcfg[0].strip(),pvcfg[1].strip()) self.voice.region = 'us-west' self.voice.voice_name='Brian' self.voice.sentence_break = 200 googleSTT_config = open('configs/GoogleSTT.txt') self.key = googleSTT_config.readlines()[0].strip() googleSTT_config.close() self.recognizer = sr.Recognizer() with sr.Microphone() as source: self.recognizer.adjust_for_ambient_noise(source)
def get_text_from_audio(audio_input_name: str, working_directory: str = WORKING_DIRECTORY): """ Gets text from audio file (using pocketsphinx-python library) Args: Return: list: text from audio file """ # Create a decoder with certain model config = Decoder.default_config() config.set_string('-hmm', os.path.join(SPEECH_MODEL_PATH, 'en-us')) config.set_string('-lm', os.path.join(SPEECH_MODEL_PATH, 'en-us.lm.bin')) config.set_string('-dict', os.path.join(SPEECH_MODEL_PATH, 'cmudict-en-us.dict')) decoder = Decoder(config) # Decode streaming data. decoder.start_utt() with open(os.path.join(working_directory, audio_input_name), 'rb') as stream: while True: buf = stream.read(1024) if buf: decoder.process_raw(buf, False, False) else: break decoder.end_utt() text_from_audio = [seg.word for seg in decoder.seg()] return text_from_audio if text_from_audio else 'Audio file doesn\'t contain words'
def __init__(self, profile, hmm=None, dict=None, lm=None, kws_threshold=None, keyphrase=None): self.profile = profile if keyphrase: if not dict: dict = fullpath('config/keyphrase.dic') if not lm: lm = fullpath('config/keyphrase.lm') else: if not dict: dict = fullpath('config/corpus.dic') if not lm: lm = fullpath('config/corpus.lm') if not hmm: hmm = 'share/pocketsphinx/model/en-us/en-us' config = Decoder.default_config() config.set_string('-hmm', os.path.join(SPHINX_ROOT, hmm)) config.set_string('-dict', dict) config.set_string('-lm', lm) config.set_string('-logfn', fullpath('config/sphinx.log')) if keyphrase: config.set_string('-keyphrase', keyphrase) if kws_threshold: config.set_float('-kws_threshold', kws_threshold) self.decoder = Decoder(config) self.transcribe = self.transcribe_darwin self.hyp = None
def __init__(self, key_phrase, dict_file, hmm_folder, threshold=1e-90, chunk_size=-1): from pocketsphinx import Decoder config = Decoder.default_config() config.set_string('-hmm', hmm_folder) config.set_string('-dict', dict_file) config.set_string('-keyphrase', key_phrase) config.set_float('-kws_threshold', float(threshold)) config.set_float('-samprate', 16000) config.set_int('-nfft', 2048) config.set_string('-logfn', '/dev/null') self.key_phrase = key_phrase self.buffer = b'\0' * pr.sample_depth * pr.buffer_samples self.pr = pr self.read_size = -1 if chunk_size == -1 else pr.sample_depth * chunk_size try: self.decoder = Decoder(config) except RuntimeError: options = dict(key_phrase=key_phrase, dict_file=dict_file, hmm_folder=hmm_folder, threshold=threshold) raise RuntimeError('Invalid Pocketsphinx options: ' + str(options))
def recog_wav(MODELDIR, wavfile): #print(MODELDIR) config = Decoder.default_config() config.set_string('-hmm', os.path.join(MODELDIR, 'en-us')) config.set_string('-lm', os.path.join(MODELDIR, 'en-us.lm.bin')) config.set_string('-dict', os.path.join(MODELDIR, 'cmudict-en-us.dict')) # Decode streaming data. decoder = Decoder(config) start = time.time() decoder.start_utt() wav_stream = open(wavfile, "rb") while True: buffer = wav_stream.read(1024) if buffer: decoder.process_raw(buffer, False, False) else: break decoder.end_utt() duration = time.time() - start print("Duration: " + str(duration)) #Benchmarking for seg in decoder.seg(): print(seg.word)
def onStart(self): super().onStart() if not self.checkLanguage(): self.downloadLanguage() try: pocketSphinxPath = self.getPocketSphinxPath() except: raise self._config = Decoder.default_config() self._config.set_string( '-hmm', f'{pocketSphinxPath}/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}' ) self._config.set_string( '-lm', f'{pocketSphinxPath}/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}.lm.bin' ) self._config.set_string( '-dict', f'{pocketSphinxPath}/model/cmudict-{self.LanguageManager.activeLanguageAndCountryCode.lower()}.dict' ) self._decoder = Decoder(self._config)
def record(listen_time): THRESHOLD = None WAVE_OUTPUT_FILENAME = "livewav.wav" p = pyaudio.PyAudio() if THRESHOLD == None: THRESHOLD = fetchThreshold() print THRESHOLD stream = p.open(format=FORMAT, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) print "* recording" frames = [] detected = False for i in range(0, RATE / CHUNK * listen_time): data = stream.read(CHUNK) frames.append(data) score = getScore(data) if score < THRESHOLD: continue else: detected = True if not detected: print "nothing detected" return("") print "* done recording" # stream.stop_stream() stream.close() p.terminate() # write data to WAVE file data = ''.join(frames) wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(1) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(data) wf.close() sysdir = os.getcwd() wavfile = sysdir + "/livewav.wav" config = Decoder.default_config() config.set_string('-hmm', hmdir) config.set_string('-lm', lmdir) config.set_string('-dict', dictd) config.set_string('-logfn', '/dev/null') speechRec = Decoder(config) with open(wavfile, 'rb') as wavFile: speechRec.decode_raw(wavFile) #result = speechRec.get_hyp() return(speechRec.hyp().hypstr)
def create_decoder(): model_path = get_model_path() config = Decoder.default_config() config.set_string("-hmm", os.path.join(model_path, "en-us")) config.set_string("-lm", os.path.join(model_path, "en-us.lm.bin")) config.set_string("-dict", os.path.join(model_path, "cmudict-en-us.dict")) config.set_string("-logfn", os.devnull) return Decoder(config)
def createConfig(self,pGramma): print ("[createConfig]+++") config = Decoder.default_config() config.set_string('-hmm', os.path.join(self.MODELDIR, 'hmm/liepa.cd_semi_200/')) config.set_string('-fsg', os.path.join("../resource/", pGramma+'.fsg')) #config.set_string('-jsgf', os.path.join("../resource/", pGramma+'.gram')) config.set_string('-dict', os.path.join("../resource/", 'service.dict')) print ("[createConfig]---") return config;
def listen(MODE): CORPUS = 6278 model_path = get_model_path() home_path = "/home/the0s/Desktop/HCR_Python" print(model_path) print(home_path) DATADIR = "/usr/local/lib/python2.7/dist-packages/pocketsphinx/data" config = Decoder.default_config() config.set_string('-hmm', os.path.join(model_path, 'hub4wsj_sc_8k')) config.set_string('-lm', os.path.join(home_path, str(CORPUS) + '.lm.bin')) config.set_string('-dict', os.path.join(home_path, str(CORPUS) + '.dic')) config.set_string('-logfn', '/dev/null') decoder = Decoder(config) p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024) stream.start_stream() in_speech_bf = False decoder.start_utt() while True: buf = stream.read(1024) if buf: decoder.process_raw(buf, False, False) if decoder.get_in_speech() != in_speech_bf: in_speech_bf = decoder.get_in_speech() if not in_speech_bf: decoder.end_utt() if decoder.hyp() is not None: buf = [s for s in decoder.hyp().hypstr.split()] print(buf) if len(buf) > 0: if MODE == 0: #DrinkRequest for item in buf: if checkRequest(item) != "NONE": output = checkRequest(item) stream.stop_stream() stream.close() return output if MODE == 1: #DrinkConfirm for item in buf: if checkConfirm(item) != "NONE": output = checkConfirm(item) stream.stop_stream() stream.close() return output decoder.start_utt() else: break decoder.end_utt()
def __init__(self): MODELDIR = get_model_path() CURR_DIR = os.path.dirname(os.path.realpath(__file__)) KEYPHRASE_THRESH_DIR = CURR_DIR + '/keyphrases.thresh' # Create a decoder with certain model config = Decoder.default_config() config.set_string('-hmm', os.path.join(MODELDIR, 'en-us')) config.set_string('-dict', \ os.path.join(MODELDIR, 'cmudict-en-us.dict')) config.set_string('-kws', KEYPHRASE_THRESH_DIR) #config.set_string('-logfn', '/dev/null') decoder = Decoder(config) p = pyaudio.PyAudio() host_info = p.get_host_api_info_by_index(0) device_index = 3 for i in range(host_info.get('deviceCount')): device_info = p.get_device_info_by_host_api_device_index(0, i) #print('\n\n\n\n'+str(i)+device_info.get('name') + " : " + str(device_info.get('maxInputChannels'))) if 'USB' in device_info.get('name'): device_index = i break ''' fire /1e18/ ''' stream = p.open( format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=1024, input_device_index=device_index) stream.start_stream() in_speech_bf = True decoder.start_utt() print("Starting to listen") while True: buf = stream.read(1024, exception_on_overflow = False) decoder.process_raw(buf, False, False) if decoder.hyp() != None: print("\nDetected: " + decoder.hyp().hypstr + "\n") decoder.end_utt() #print "Detected Move Forward, restarting search" decoder.start_utt() print("Am not listening any more") stream.stop_stream() stream.close() p.terminate()
def create_config(self, dict_name): config = Decoder.default_config() config.set_string('-hmm', join(BASEDIR, 'model', self.lang, 'hmm')) config.set_string('-dict', dict_name) config.set_string('-keyphrase', self.key_phrase) config.set_float('-kws_threshold', self.threshold) config.set_float('-samprate', self.sample_rate) config.set_int('-nfft', 2048) config.set_string('-logfn', '/dev/null') return config
def create_config(self, dict_name): config = Decoder.default_config() config.set_string('-hmm', join(BASEDIR, 'model', self.lang, 'hmm')) config.set_string('-dict', dict_name) config.set_string('-keyphrase', self.key_phrase) config.set_float('-kws_threshold', float(self.threshold)) config.set_float('-samprate', self.sample_rate) config.set_int('-nfft', 2048) config.set_string('-logfn', '/dev/null') return config
def create_config(self, dict_name): config = Decoder.default_config() config.set_string('-hmm', os.path.join(MODELDIR, 'en-us')) config.set_string('-dict', dict_name) config.set_string('-keyphrase', self.key_phrase) config.set_float('-kws_threshold', float(self.threshold)) config.set_float('-samprate', self.sample_rate) config.set_int('-nfft', 2048) config.set_string('-logfn', '/dev/null') return config
def create_config(self, dict_name): config = Decoder.default_config() config.set_string('-hmm', join(BASEDIR, 'model', self.lang, 'hmm')) config.set_string('-dict', dict_name) config.set_string('-keyphrase', self.key_phrase) config.set_float('-kws_threshold', float(self.threshold)) config.set_float('-samprate', self.sample_rate) config.set_int('-nfft', 2048) config.set_string('-logfn', '/home/sg/mycroft-core/scripts/logs/pocket.log') return config
def onStart(self): super().onStart() if not self.checkLanguage(): self.downloadLanguage() self._config = Decoder.default_config() self._config.set_string('-hmm', f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}') self._config.set_string('-lm', f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}.lm.bin') self._config.set_string('-dict', f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/cmudict-{self.LanguageManager.activeLanguageAndCountryCode.lower()}.dict') self._decoder = Decoder(self._config)
def __init__(self, key_phrase="hey mycroft", config=None, lang="en-us"): super().__init__(key_phrase, config, lang) # Hotword module imports from pocketsphinx import Decoder # Hotword module params self.phonemes = self.config.get("phonemes", "HH EY . M AY K R AO F T") self.num_phonemes = len(self.phonemes.split()) self.threshold = self.config.get("threshold", 1e-90) self.sample_rate = self.listener_config.get("sample_rate", 1600) dict_name = self.create_dict(self.key_phrase, self.phonemes) config = self.create_config(dict_name, Decoder.default_config()) self.decoder = Decoder(config)
def createConfig(self,pGramma): ''' Create configuration with acoustic model path, grammar and dictionary ''' print ("[createConfig]+++") config = Decoder.default_config() config.set_string('-hmm', os.path.join(self.MODELDIR, 'hmm/lt.cd_cont_200/')) config.set_string('-fsg', os.path.join("../resource/", pGramma+'.fsg')) #config.set_string('-jsgf', os.path.join("../resource/", pGramma+'.gram')) config.set_string('-dict', os.path.join("../resource/", 'service.dict')) print ("[createConfig]---") return config;
def __init__(self, key_phrase="hey mycroft", config=None, lang="en-us"): super(PocketsphinxHotWord, self).__init__(key_phrase, config, lang) # Hotword module imports from pocketsphinx import Decoder # Hotword module params self.phonemes = self.config.get("phonemes", "HH EY . M AY K R AO F T") self.num_phonemes = len(self.phonemes.split()) self.threshold = self.config.get("threshold", 1e-90) self.sample_rate = self.listener_config.get("sample_rate", 1600) dict_name = self.create_dict(self.key_phrase, self.phonemes) config = self.create_config(dict_name, Decoder.default_config()) self.decoder = Decoder(config)
def createConfig(self, pGramma): ''' Create configuration with acoustic model path, grammar and dictionary ''' print("[createConfig]+++") config = Decoder.default_config() config.set_string('-hmm', os.path.join(self.MODELDIR, 'hmm/lt.cd_cont_200/')) config.set_string('-fsg', os.path.join("../resource/", pGramma + '.fsg')) #config.set_string('-jsgf', os.path.join("../resource/", pGramma+'.gram')) config.set_string('-dict', os.path.join("../resource/", 'service.dict')) print("[createConfig]---") return config
def __init__(self, key_phrase="hey mycroft", config=None, lang="en-us"): super(PocketsphinxHotWord, self).__init__(key_phrase, config, lang) # Hotword module imports from pocketsphinx import Decoder # Hotword module config module = self.config.get("module") if module != "pocketsphinx": LOG.warning( str(module) + " module does not match with " "Hotword class pocketsphinx") # Hotword module params self.phonemes = self.config.get("phonemes", "HH EY . M AY K R AO F T") self.num_phonemes = len(self.phonemes.split()) self.threshold = self.config.get("threshold", 1e-90) self.sample_rate = self.listener_config.get("sample_rate", 1600) dict_name = self.create_dict(self.key_phrase, self.phonemes) config = self.create_config(dict_name, Decoder.default_config()) self.decoder = Decoder(config)
def get_decoder_config(): """ Get a populated configuration object for the pocketsphinx Decoder. """ model_dir = get_model_path() config = Decoder.default_config() config.set_string("-dict", os.path.join(model_dir, "cmudict-en-us.dict")) config.set_string("-fdict", os.path.join(model_dir, "en-us/noisedict")) config.set_string("-featparams", os.path.join(model_dir, "en-us/feat.params")) config.set_string("-hmm", os.path.join(model_dir, "en-us")) config.set_string("-lm", os.path.join(model_dir, "en-us.lm.bin")) config.set_string("-mdef", os.path.join(model_dir, "en-us/mdef")) config.set_string("-mean", os.path.join(model_dir, "en-us/means")) config.set_string("-sendump", os.path.join(model_dir, "en-us/sendump")) config.set_string("-tmat", os.path.join(model_dir, "en-us/transition_matrices")) config.set_string("-var", os.path.join(model_dir, "en-us/variances")) return config
def __init__(self, rt, on_activation: Callable): super().__init__(rt, on_activation) lang = rt.config['lang'] self.hmm_folder = join(rt.paths.user_config, 'models', lang) self.rate, self.width = self.rec_config['sample_rate'], self.rec_config['sample_width'] self.padding = b'\0' * int(self.rate * self.width * self.SILENCE_SEC) self.buffer = b'' download_extract_tar(self.url.format(lang=lang), self.hmm_folder) config = Decoder.default_config() config.set_string('-hmm', self.hmm_folder) config.set_string('-dict', self._create_dict(self.wake_word, self.config['phonemes'])) config.set_string('-keyphrase', self.wake_word) config.set_float('-kws_threshold', float(self.config['threshold'])) config.set_float('-samprate', self.rate) config.set_int('-nfft', 2048) config.set_string('-logfn', '/dev/null') self.ps = Decoder(config)
def run(self): conf = Decoder.default_config() conf.set_string('-hmm', self.config.hmmPS) conf.set_string('-lm', self.config.lmPS) conf.set_string('-dict', self.config.dictPS) if os.path.isfile(self.config.mllrPS): conf.set_string('-mllr', self.config.mllrPS) decoder = Decoder(conf) p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024) stream.start_stream() self.samplewith = p.get_sample_size(pyaudio.paInt16) in_speech_bf = True decoder.start_utt('') while not self._terminate: buf = stream.read(1024) if buf: if self.save: self.liSave.append(buf) self.numSave += 1 if self.numSave > self.maxSave: # nos protegemos de dejar el microfono encendido self.activeSave(self.fichWAV) decoder.process_raw(buf, False, False) if decoder.get_in_speech() != in_speech_bf: in_speech_bf = decoder.get_in_speech() if not in_speech_bf: decoder.end_utt() try: if decoder.hyp().hypstr != '': self.decode(decoder.hyp().hypstr) except AttributeError: pass decoder.start_utt('') else: break decoder.end_utt()
def run( self ): conf = Decoder.default_config() conf.set_string('-hmm', self.config.hmmPS) conf.set_string('-lm', self.config.lmPS) conf.set_string('-dict', self.config.dictPS) if os.path.isfile(self.config.mllrPS): conf.set_string('-mllr', self.config.mllrPS) decoder = Decoder(conf) p = pyaudio.PyAudio() stream = p.open( format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024 ) stream.start_stream() self.samplewith = p.get_sample_size(pyaudio.paInt16) in_speech_bf = True decoder.start_utt('') while not self._terminate: buf = stream.read(1024) if buf: if self.save: self.liSave.append(buf) self.numSave += 1 if self.numSave > self.maxSave: # nos protegemos de dejar el microfono encendido self.activeSave(self.fichWAV) decoder.process_raw(buf, False, False) if decoder.get_in_speech() != in_speech_bf: in_speech_bf = decoder.get_in_speech() if not in_speech_bf: decoder.end_utt() try: if decoder.hyp().hypstr != '': self.decode(decoder.hyp().hypstr) except AttributeError: pass decoder.start_utt('') else: break decoder.end_utt()
def __init__(self, file_name='aux.wav', raspi=False, local=True): ## load environment self.FILE_NAME = file_name self.audio = pyaudio.PyAudio() self.raspi = raspi self.local = local self.config = Decoder.default_config() self.config.set_string('-hmm', os.path.join(self.MODELDIR, 'acoustic-model')) self.config.set_string( '-dict', os.path.join(self.MODELDIR, 'pronounciation-dictionary.dict')) self.config.set_string('-logfn', os.devnull) self.decoder = Decoder(self.config) self.r = sr.Recognizer() print("adjunting...") with sr.Microphone() as source: self.r.adjust_for_ambient_noise(source) # tts if self.local: self.tts = pyttsx3.init() self.tts.setProperty('rate', self.RATE) self.tts.setProperty('volume', self.VOLUME) self.tts.setProperty('voice', 'spanish-latin-am') else: # Instantiates a client self.tts_client = texttospeech.TextToSpeechClient() # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral") self.tts_voice = texttospeech.types.VoiceSelectionParams( language_code='es-ES', ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE) # Select the type of audio file you want returned self.tts_audio_config = texttospeech.types.AudioConfig( audio_encoding=texttospeech.enums.AudioEncoding.MP3)
def begin_passive_listening(self): """Uses PocketSphinx to listen for the wakeword and call the active listening function """ config = Decoder.default_config() config.set_string('-hmm', os.path.join(get_model_path(), 'en-us')) config.set_string('-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict')) config.set_string('-keyphrase', self.config.get("general", "wake_word")) config.set_string('-logfn', 'nul') config.set_float('-kws_threshold', 1e-10) p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024) stream.start_stream() decoder = Decoder(config) decoder.start_utt() while True: buf = stream.read(1024) decoder.process_raw(buf, False, False) if decoder.hyp() is not None: logging.debug("Wake word recognized") speech_input = self.active_listen() if (speech_input != -1 and speech_input != -2 and speech_input != -3): for name, command in self.commands.items(): if speech_input in name: command() elif speech_input == -1: self.speak("Sorry, I didn't catch that.") decoder.end_utt() decoder.start_utt() logging.debug("Listening for wakeword again")
def __init__(self, config=Decoder.default_config()): assert isinstance(config, Config) search_args_set = search_arguments_set(config) if len(search_args_set) == 0: # Use the language model by default if nothing else is set set_lm_path(config) elif len(search_args_set) > 1: raise ConfigError( "more than one search argument was set in the Config " "object") # Set the required config paths if they aren't already set if not (config.get_string("-hmm") and config.get_string("-dict")): set_hmm_and_dict_paths(config) self._speech_start_callback = None self._hypothesis_callback = None self._utterance_state = self._UTT_ENDED super(PocketSphinx, self).__init__(config)
def __init__(self, file_name='aux.wav', raspi=False): self.FILE_NAME = file_name self.audio = pyaudio.PyAudio() self.raspi = raspi self.config = Decoder.default_config() self.config.set_string('-hmm', os.path.join(self.MODELDIR, 'acoustic-model')) self.config.set_string( '-dict', os.path.join(self.MODELDIR, 'pronounciation-dictionary.dict')) self.config.set_string('-logfn', os.devnull) self.decoder = Decoder(self.config) self.r = sr.Recognizer() print("adjunting...") with sr.Microphone() as source: self.r.adjust_for_ambient_noise(source) # tts self.tts = pyttsx3.init() self.tts.setProperty('rate', self.RATE) self.tts.setProperty('volume', self.VOLUME) self.tts.setProperty('voice', 'spanish-latin-am')
def __init__(self, in_fs, out_fs, mute_period_length, kws_frame_length): threading.Thread.__init__(self) # 初始化配置 self.daemon = True self.exit_flag = False self.in_fs = in_fs self.out_fs = out_fs self.mute_period_frames_count = int(in_fs * mute_period_length) self.kws_frames_count = int(in_fs * kws_frame_length) model_path = get_model_path() config = Decoder.default_config() config.set_string('-hmm', os.path.join(model_path, 'en-us')) # 声学模型路径 # config.set_string('-lm',"./tests/7567.lm") config.set_string('-dict', os.path.join(model_path, 'cmudict-en-us.dict')) # 字典路径 config.set_string('-keyphrase', 'alexa') config.set_float('-kws_threshold', 1e-20) config.set_string('-logfn', './logs/tmp') # INFO输出到其他位置 self.decoder = Decoder(config) self.decoder.start_utt() self.start()
''' Created on Dec 29, 2013 @author: Mindaugas Greibus ''' import sys, os from pocketsphinx import Decoder MODELDIR = "../models" # Create a decoder with certain model config = Decoder.default_config() config.set_string('-hmm', os.path.join(MODELDIR, 'hmm/lt.cd_cont_200/')) config.set_string('-jsgf', os.path.join(MODELDIR, 'lm/robotas.gram')) config.set_string('-dict', os.path.join(MODELDIR, 'dict/robotas.dict')) decoder = Decoder(config) decoder.decode_raw( open(os.path.join(MODELDIR, '../test/audio/varyk_pirmyn-16k.wav'), 'rb')) # Retrieve hypothesis. hypothesis = decoder.hyp() print('Best hypothesis: ', hypothesis.best_score, hypothesis.hypstr) print('Best hypothesis segments: ', [seg.word for seg in decoder.seg()])
def __init__(self): config = Decoder.default_config() config.set_string('-hmm', SPHINX_HMM) config.set_string('-lm', SPHINX_LM) config.set_string('-dict', SPHINX_DICT) self.decoder = Decoder(config)
def record(listen_time): THRESHOLD=None WAVE_OUTPUT_FILENAME = "livewav.wav" p = pyaudio.PyAudio() if THRESHOLD == None: THRESHOLD = fetchThreshold() print THRESHOLD stream = p.open(format=FORMAT, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) print "* recording" frames = [] detected=False for i in range(0, RATE / CHUNK * listen_time): data = stream.read(CHUNK) frames.append(data) score = getScore(data) if score < THRESHOLD: continue else: detected=True if not detected: print "nothing detected" return("") print "* done recording" #stream.stop_stream() stream.close() p.terminate() # write data to WAVE file data = ''.join(frames) wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(1) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(data) wf.close() sysdir = os.getcwd() wavfile = sysdir+"/livewav.wav" config = Decoder.default_config() config.set_string('-hmm', hmdir) config.set_string('-lm', lmdir) config.set_string('-dict', dictd) config.set_string('-logfn', '/dev/null') speechRec = Decoder(config) with open(wavfile, 'rb') as wavFile: speechRec.decode_raw(wavFile) #result = speechRec.get_hyp() return(speechRec.hyp().hypstr)
def recognition_worker(audio_file, queue, event, max_no_speech=120, debug=False, hmm='/usr/local/share/pocketsphinx/model/en-us/en-us', lm='/usr/local/share/pocketsphinx/model/en-us/en-us.lm.bin', cmudict='/usr/local/share/pocketsphinx/model/en-us/cmudict-en-us.dict'): ''' Read audio from `audio_file and feed it to pocketsphinx. Put recognized text in `queue`. Shut down if `event` is set. If no speech is detected for `max_no_speech` seconds, set `event` and quit. ''' from pocketsphinx import Decoder config = Decoder.default_config() config.set_string('-hmm', hmm) config.set_string('-lm', lm) config.set_string('-dict', cmudict) if not debug: config.set_string('-logfn', '/dev/null') decoder = Decoder(config) in_speech_bf = True no_speech_timer = None now_in_speech = False decoder.start_utt() try: with open(audio_file, 'rb') as f: f.read(40) # read RIFF header # TODO: Probably should sanity check the audio format... while not event.is_set(): buf = f.read(1024) if buf: decoder.process_raw(buf, False, False) now_in_speech = decoder.get_in_speech() if debug and now_in_speech: print('Found speech', file=sys.stderr) if now_in_speech != in_speech_bf: in_speech_bf = now_in_speech if not in_speech_bf: if debug: print('Processing speech', file=sys.stderr) # No speech, but there was speech before, so, process. decoder.end_utt() try: speech = decoder.hyp().hypstr if speech != '': if debug: print('Speech: ' + speech, file=sys.stderr) queue.put_nowait(speech) except AttributeError: pass decoder.start_utt() else: # Got some speech, reset timer. no_speech_timer = None else: if debug: print('No audio', file=sys.stderr) # Wait a bit... event.wait(0.1) if not now_in_speech: if no_speech_timer is None: no_speech_timer = datetime.datetime.now() elif (datetime.datetime.now() - no_speech_timer).total_seconds() > max_no_speech: if debug: print('No speech, timing out', file=sys.stderr) event.set() except KeyboardInterrupt: pass
def __init__(self, **kwargs): signal.signal(signal.SIGINT, self.stop) model_path = get_model_path() kwargs = { x: os.path.expandvars(kwargs[x]) if type(kwargs[x]) is str else kwargs[x] for x in kwargs } nodename = kwargs.pop('nodename') grammar_file = kwargs.pop('grammar_file', None) grammar_rule = kwargs.pop('grammar_rule', None) grammar_name = kwargs.pop('grammar_name', None) kwargs.pop('esiaf_input_topic') if kwargs.get('dic') is not None and kwargs.get('dict') is None: kwargs['dict'] = kwargs.pop('dic') if kwargs.get('hmm') is None: kwargs['hmm'] = os.path.join(model_path, 'en-us') if kwargs.get('lm') is None: kwargs['lm'] = os.path.join(model_path, 'en-us.lm.bin') if kwargs.get('dict') is None and kwargs.get('dic') is None: kwargs['dict'] = os.path.join(model_path, 'cmudict-en-us.dict') if kwargs.pop('verbose', False) is False: if sys.platform.startswith('win'): kwargs['logfn'] = 'nul' else: kwargs['logfn'] = '/dev/null' config = Decoder.default_config() print(kwargs) for key, value in kwargs.items(): if isinstance(value, bool): config.set_boolean('-{}'.format(key), value) elif isinstance(value, int): config.set_int('-{}'.format(key), value) elif isinstance(value, float): config.set_float('-{}'.format(key), value) elif isinstance(value, str): config.set_string('-{}'.format(key), value) self.decoder = Decoder(config) if grammar_file and grammar_rule and grammar_name: jsgf = Jsgf(grammar_file) rule = jsgf.get_rule(grammar_name + '.' + grammar_rule) fsg = jsgf.build_fsg(rule, self.decoder.get_logmath(), 7.5) self.decoder.set_fsg(grammar_name, fsg) self.decoder.set_search(grammar_name) self.start = None self.finish = None self.speech_publisher = rospy.Publisher(nodename + '/' + 'SpeechRec', SpeechInfo, queue_size=10)
''' Created on Dec 29, 2013 @author: Mindaugas Greibus ''' import sys, os from pocketsphinx import Decoder MODELDIR = "../models" # Create a decoder with certain model config = Decoder.default_config() config.set_string('-hmm', os.path.join(MODELDIR, 'hmm/lt.cd_cont_200/')) config.set_string('-jsgf', os.path.join(MODELDIR, 'lm/robotas.gram')) config.set_string('-dict', os.path.join(MODELDIR, 'dict/robotas.dict')) decoder = Decoder(config) decoder.decode_raw(open(os.path.join(MODELDIR, '../test/audio/varyk_pirmyn-16k.wav'), 'rb')) # Retrieve hypothesis. hypothesis = decoder.hyp() print ('Best hypothesis: ', hypothesis.best_score, hypothesis.hypstr) print ('Best hypothesis segments: ', [seg.word for seg in decoder.seg()])