def listen(MODE): CORPUS = 6278 model_path = get_model_path() home_path = "/home/the0s/Desktop/HCR_Python" print(model_path) print(home_path) DATADIR = "/usr/local/lib/python2.7/dist-packages/pocketsphinx/data" config = Decoder.default_config() config.set_string('-hmm', os.path.join(model_path, 'hub4wsj_sc_8k')) config.set_string('-lm', os.path.join(home_path, str(CORPUS) + '.lm.bin')) config.set_string('-dict', os.path.join(home_path, str(CORPUS) + '.dic')) config.set_string('-logfn', '/dev/null') decoder = Decoder(config) p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024) stream.start_stream() in_speech_bf = False decoder.start_utt() while True: buf = stream.read(1024) if buf: decoder.process_raw(buf, False, False) if decoder.get_in_speech() != in_speech_bf: in_speech_bf = decoder.get_in_speech() if not in_speech_bf: decoder.end_utt() if decoder.hyp() is not None: buf = [s for s in decoder.hyp().hypstr.split()] print(buf) if len(buf) > 0: if MODE == 0: #DrinkRequest for item in buf: if checkRequest(item) != "NONE": output = checkRequest(item) stream.stop_stream() stream.close() return output if MODE == 1: #DrinkConfirm for item in buf: if checkConfirm(item) != "NONE": output = checkConfirm(item) stream.stop_stream() stream.close() return output decoder.start_utt() else: break decoder.end_utt()
def run(self): conf = Decoder.default_config() conf.set_string('-hmm', self.config.hmmPS) conf.set_string('-lm', self.config.lmPS) conf.set_string('-dict', self.config.dictPS) if os.path.isfile(self.config.mllrPS): conf.set_string('-mllr', self.config.mllrPS) decoder = Decoder(conf) p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024) stream.start_stream() self.samplewith = p.get_sample_size(pyaudio.paInt16) in_speech_bf = True decoder.start_utt('') while not self._terminate: buf = stream.read(1024) if buf: if self.save: self.liSave.append(buf) self.numSave += 1 if self.numSave > self.maxSave: # nos protegemos de dejar el microfono encendido self.activeSave(self.fichWAV) decoder.process_raw(buf, False, False) if decoder.get_in_speech() != in_speech_bf: in_speech_bf = decoder.get_in_speech() if not in_speech_bf: decoder.end_utt() try: if decoder.hyp().hypstr != '': self.decode(decoder.hyp().hypstr) except AttributeError: pass decoder.start_utt('') else: break decoder.end_utt()
def run( self ): conf = Decoder.default_config() conf.set_string('-hmm', self.config.hmmPS) conf.set_string('-lm', self.config.lmPS) conf.set_string('-dict', self.config.dictPS) if os.path.isfile(self.config.mllrPS): conf.set_string('-mllr', self.config.mllrPS) decoder = Decoder(conf) p = pyaudio.PyAudio() stream = p.open( format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024 ) stream.start_stream() self.samplewith = p.get_sample_size(pyaudio.paInt16) in_speech_bf = True decoder.start_utt('') while not self._terminate: buf = stream.read(1024) if buf: if self.save: self.liSave.append(buf) self.numSave += 1 if self.numSave > self.maxSave: # nos protegemos de dejar el microfono encendido self.activeSave(self.fichWAV) decoder.process_raw(buf, False, False) if decoder.get_in_speech() != in_speech_bf: in_speech_bf = decoder.get_in_speech() if not in_speech_bf: decoder.end_utt() try: if decoder.hyp().hypstr != '': self.decode(decoder.hyp().hypstr) except AttributeError: pass decoder.start_utt('') else: break decoder.end_utt()
def retrieve_scores(word): filename = word + '.wav' grammarname = word + '-align.jsgf' model_path = get_model_path() # Initialize the config values config = DefaultConfig() config.set_boolean('-verbose', False) config.set_string('-hmm', os.path.join(model_path, 'en-us')) config.set_boolean('-lm', False) config.set_string('-dict', 'phonemes.dict.txt') config.set_boolean('-backtrace', True) config.set_boolean('-bestpath', False) config.set_boolean('-fsgusefiller', False) decoder = Decoder(config) # Set the search to JSGF Grammar jsgf = Jsgf(grammarname) rule = jsgf.get_rule('forcing.' + word) decoder.set_jsgf_file('grammar', grammarname) decoder.set_search('grammar') stream = open(filename, 'rb') utt_started = False scores = [] decoder.start_utt() while True: buf = stream.read(1024) if buf: decoder.process_raw(buf, False, False) in_speech = decoder.get_in_speech() if (in_speech and not utt_started): utt_started = True if (not in_speech and utt_started): decoder.end_utt() hyp = decoder.hyp() if hyp is not None: print('hyp: %s' % (hyp.best_score)) print_segments(decoder) scores = retrieve_segments(decoder) decoder.start_utt() utt_started = False else: break decoder.end_utt() print('scores:', scores) return scores
class VoiceService(object): audio_device = None buffer_size = 2048 sampling_rate = 16000 def __init__(self): config = get_decoder_config() self.decoder = Decoder(config) self.speech = pyttsx3.init() self.audio = sphinxbase.Ad(self.audio_device, self.sampling_rate) self.buffer = bytearray(self.buffer_size) self.default_search = self.decoder.get_search() self.in_speech = False self.max_history = 100 self.phrases = [] self.prompts = {} self.next_prompt_id = 1 self.current_prompt = None self.prompt_queue = queue.Queue() def create_prompt(self, message=None, message_url=None, search="enable", timeout=15): """ Create a new prompt and add it to the queue. Currently, only one type of prompt is supported. We play a message, then wait for someone to say a specific word (the search word) within the alloted amount of time. The status of the prompt can be retrieved by calling get_prompt with the appropriate id. timeout: prompt timeout in seconds, expected to be either None or numeric. """ if timeout is not None: # Be forgiving of caller who may have passed timeout as a string. timeout = float(timeout) prompt = { "created_time": time.time(), "detected": False, "detected_time": None, "id": self.get_next_prompt_id(), "message": message, "message_url": message_url, "search": search, "search_started": False, "search_started_time": None, "played": False, "played_time": None, "timeout": timeout, "timed_out": False } self.prompts[str(prompt['id'])] = prompt self.prompt_queue.put(prompt) return prompt def get_next_prompt_id(self): """ Get a unique ID for a prompt. """ tmp = self.next_prompt_id self.next_prompt_id += 1 return tmp def get_phrases(self): """ Get the history of detected phrases. """ return self.phrases def get_prompt(self, prompt_id): """ Get information about a prompt. """ return self.prompts[str(prompt_id)] def get_status(self): """ Get the system status. """ status = { "current_prompt": self.current_prompt, "in_speech": self.decoder.get_in_speech(), "queue_length": self.prompt_queue.qsize(), "search": self.decoder.get_search() } return status def play_prompt(self, prompt): prompt['played_time'] = time.time() if prompt.get("message_url", None) is not None: cmd = ["mplayer", "-ao", "pulse", prompt['message_url']] subprocess.call(cmd) elif prompt.get("message", None) is not None: self.speech.say(prompt['message']) self.speech.runAndWait() prompt['played'] = True def process_hypothesis(self, hypothesis): print("SPEECH {}".format(hypothesis.hypstr)) phrase = { "search": self.decoder.get_search(), "time": time.time(), "text": hypothesis.hypstr } self.phrases.append(phrase) del self.phrases[:-self.max_history] def run_next_prompt(self): if self.prompt_queue.empty(): self.create_prompt(None, search="paradrop", timeout=None) self.current_prompt = self.prompt_queue.get_nowait() self.decoder.set_search(self.current_prompt['search']) self.audio.stop_recording() self.play_prompt(self.current_prompt) self.audio.start_recording() self.current_prompt['search_started_time'] = time.time() self.current_prompt['search_started'] = True def detect_timeout(self): """ Check if the current prompt has timed out. """ if self.current_prompt is None: # No active prompt to timeout. return False if self.decoder.get_in_speech(): # Defer timeout if decoder reports that speech is in progress. A # person may be speaking the target phrase currently. return False if self.current_prompt['timeout'] is None: # If timeout is None, then only timeout when there is another item # in the queue. return not self.prompt_queue.empty() else: diff = time.time() - self.current_prompt['search_started_time'] return diff >= self.current_prompt['timeout'] def run(self): self.decoder.set_keyphrase("activate", "activate") self.decoder.set_keyphrase("allow", "allow") self.decoder.set_keyphrase("enable", "enable") self.decoder.set_keyphrase("paradrop", "para drop") self.audio.start_recording() while True: if self.current_prompt is None: self.run_next_prompt() self.decoder.start_utt() self.audio.readinto(self.buffer) self.decoder.process_raw(self.buffer, False, False) if self.in_speech and not self.decoder.get_in_speech(): self.decoder.end_utt() hypothesis = self.decoder.hyp() if hypothesis is not None: self.process_hypothesis(hypothesis) self.current_prompt['detected'] = True self.current_prompt['detected_time'] = time.time() self.current_prompt = None else: self.decoder.start_utt() if self.detect_timeout(): self.decoder.end_utt() self.current_prompt['timed_out'] = True self.current_prompt = None self.in_speech = self.decoder.get_in_speech()
class PocketSphinxASR(ASR): NAME = 'Pocketsphinx ASR' DEPENDENCIES = { 'system': [ 'swig', 'libpulse-dev' ], 'pip' : [ 'pocketsphinx==0.1.15' ] } LANGUAGE_PACKS = { 'en': [ f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/en-us/en-us.tar', f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/en-us/en-us.lm.bin', f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/en-us/cmudict-en-us.dict' ], 'fr': [ f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/fr-fr/fr-fr.tar', f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/fr-fr/fr-fr.lm.bin', f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/fr-fr/cmudict-fr-fr.dict' ], 'de': [ f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/de-de/de-de.tar', f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/de-de/de-de.lm.bin', f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/de-de/cmudict-de-de.dict' ] } def __init__(self): super().__init__() self._capableOfArbitraryCapture = True self._isOnlineASR = False self._decoder: Optional[Decoder] = None self._config = None def onStart(self): super().onStart() if not self.checkLanguage(): self.downloadLanguage() self._config = Decoder.default_config() self._config.set_string('-hmm', f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}') self._config.set_string('-lm', f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}.lm.bin') self._config.set_string('-dict', f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/cmudict-{self.LanguageManager.activeLanguageAndCountryCode.lower()}.dict') self._decoder = Decoder(self._config) def checkLanguage(self) -> bool: if not Path(self.Commons.rootDir(), f'venv/lib/python3.7/site-packages/pocketsphinx/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}').exists(): self.logInfo('Missing language model') return False return True def timeout(self): super().timeout() try: self._decoder.end_utt() except: # If this fails we don't care, at least we tried to close the utterance pass def downloadLanguage(self) -> bool: self.logInfo(f'Downloading language model for "{self.LanguageManager.activeLanguage}"') venv = Path(self.Commons.rootDir(), 'venv/lib/python3.7/site-packages/pocketsphinx/') for url in self.LANGUAGE_PACKS[self.LanguageManager.activeLanguage]: filename = Path(url).name download = Path(venv, 'model', filename) self.Commons.downloadFile(url=f'{url}?raw=true', dest=str(download)) if download.suffix == '.tar': dest = Path(venv, 'model', self.LanguageManager.activeLanguageAndCountryCode.lower()) if dest.exists(): shutil.rmtree(dest) tar = tarfile.open(str(download)) tar.extractall(str(dest)) download.unlink() self.logInfo('Downloaded and installed') return True def decodeStream(self, session: DialogSession) -> Optional[ASRResult]: super().decodeStream(session) result = None with Stopwatch() as processingTime: with Recorder(self._timeout) as recorder: self.ASRManager.addRecorder(session.siteId, recorder) self._decoder.start_utt() inSpeech = False for chunk in recorder: if self._timeout.isSet(): break self._decoder.process_raw(chunk, False, False) if self._decoder.get_in_speech() != inSpeech: inSpeech = self._decoder.get_in_speech() if not inSpeech: self._decoder.end_utt() result = self._decoder.hyp() if self._decoder.hyp() else None break self.end(recorder, session) return ASRResult( text=result.hypstr.strip(), session=session, likelihood=self._decoder.hyp().prob, processingTime=processingTime.time ) if result else None
def recognition_worker(audio_file, queue, event, max_no_speech=120, debug=False, hmm='/usr/local/share/pocketsphinx/model/en-us/en-us', lm='/usr/local/share/pocketsphinx/model/en-us/en-us.lm.bin', cmudict='/usr/local/share/pocketsphinx/model/en-us/cmudict-en-us.dict'): ''' Read audio from `audio_file and feed it to pocketsphinx. Put recognized text in `queue`. Shut down if `event` is set. If no speech is detected for `max_no_speech` seconds, set `event` and quit. ''' from pocketsphinx import Decoder config = Decoder.default_config() config.set_string('-hmm', hmm) config.set_string('-lm', lm) config.set_string('-dict', cmudict) if not debug: config.set_string('-logfn', '/dev/null') decoder = Decoder(config) in_speech_bf = True no_speech_timer = None now_in_speech = False decoder.start_utt() try: with open(audio_file, 'rb') as f: f.read(40) # read RIFF header # TODO: Probably should sanity check the audio format... while not event.is_set(): buf = f.read(1024) if buf: decoder.process_raw(buf, False, False) now_in_speech = decoder.get_in_speech() if debug and now_in_speech: print('Found speech', file=sys.stderr) if now_in_speech != in_speech_bf: in_speech_bf = now_in_speech if not in_speech_bf: if debug: print('Processing speech', file=sys.stderr) # No speech, but there was speech before, so, process. decoder.end_utt() try: speech = decoder.hyp().hypstr if speech != '': if debug: print('Speech: ' + speech, file=sys.stderr) queue.put_nowait(speech) except AttributeError: pass decoder.start_utt() else: # Got some speech, reset timer. no_speech_timer = None else: if debug: print('No audio', file=sys.stderr) # Wait a bit... event.wait(0.1) if not now_in_speech: if no_speech_timer is None: no_speech_timer = datetime.datetime.now() elif (datetime.datetime.now() - no_speech_timer).total_seconds() > max_no_speech: if debug: print('No speech, timing out', file=sys.stderr) event.set() except KeyboardInterrupt: pass
rate=RATE, input=True, frames_per_buffer=CHUNK) #Indicate listening for next utterance print("READY....") frames = [] utt_started = False decoder.start_utt(None) while True: data = stream.read(CHUNK) time.sleep(0.100) #frames.append(data) decoder.process_raw(data, False, False) in_speech = decoder.get_in_speech() if in_speech and not utt_started: #silence -> speech transition, #let user know that he is heard print("Started...\n") utt_started = True if not in_speech and utt_started: #speech -> silence transition, #time to start new utterance decoder.end_utt() # Retrieve hypothesis. hypothesis = decoder.hyp() if hypothesis is not None: print('Best hypothesis: ', hypothesis.best_score,
class PocketSphinxAsr(Asr): NAME = 'Pocketsphinx Asr' DEPENDENCIES = { 'system': ['swig', 'libpulse-dev'], 'pip': ['pocketsphinx==0.1.15'] } LANGUAGE_PACK = { f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/%lang%/%lang%.tar', f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/%lang%/%lang%.lm.bin', f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/%lang%/cmudict-%lang%.dict' } def __init__(self): super().__init__() self._capableOfArbitraryCapture = True self._isOnlineASR = False self._decoder: Optional[Decoder] = None self._config = None def onStart(self): super().onStart() if not self.checkLanguage(): self.downloadLanguage() self._config = Decoder.default_config() self._config.set_string( '-hmm', f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}' ) self._config.set_string( '-lm', f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}.lm.bin' ) self._config.set_string( '-dict', f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/cmudict-{self.LanguageManager.activeLanguageAndCountryCode.lower()}.dict' ) self._decoder = Decoder(self._config) def checkLanguage(self) -> bool: if not Path( self.Commons.rootDir(), f'venv/lib/python3.7/site-packages/pocketsphinx/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}' ).exists(): self.logInfo('Missing language model') return False return True def timeout(self): super().timeout() try: self._decoder.end_utt() except: # If this fails we don't care, at least we tried to close the utterance pass def downloadLanguage(self, forceLang: str = '') -> bool: lang = forceLang or self.LanguageManager.activeLanguageAndCountryCode self.logInfo(f'Downloading language model for "{lang}"') venv = Path(self.Commons.rootDir(), 'venv/lib/python3.7/site-packages/pocketsphinx/') for url in self.LANGUAGE_PACK: url = url.replace('%lang%', lang.lower()) filename = Path(url).name download = Path(venv, 'model', filename) result = self.Commons.downloadFile(url=f'{url}?raw=true', dest=str(download)) if not result: if forceLang: return False else: # TODO be universal self.downloadLanguage(forceLang='en-US') else: if download.suffix == '.tar': dest = Path(venv, 'model', lang.lower()) if dest.exists(): shutil.rmtree(dest) tar = tarfile.open(str(download)) tar.extractall(str(dest)) download.unlink() self.logInfo('Downloaded and installed') return True def decodeStream(self, session: DialogSession) -> Optional[ASRResult]: super().decodeStream(session) result = None counter = 0 with Stopwatch() as processingTime: with Recorder(self._timeout, session.user, session.deviceUid) as recorder: self.ASRManager.addRecorder(session.deviceUid, recorder) self._recorder = recorder self._decoder.start_utt() inSpeech = False for chunk in recorder: if self._timeout.isSet(): break self._decoder.process_raw(chunk, False, False) hypothesis = self._decoder.hyp() if hypothesis: counter += 1 if counter == 10: self.partialTextCaptured(session, hypothesis.hypstr, hypothesis.prob, processingTime.time) counter = 0 if self._decoder.get_in_speech() != inSpeech: inSpeech = self._decoder.get_in_speech() if not inSpeech: self._decoder.end_utt() result = self._decoder.hyp() if self._decoder.hyp( ) else None break self.end() return ASRResult( text=result.hypstr.strip(), session=session, likelihood=self._decoder.hyp().prob, processingTime=processingTime.time) if result else None
input=True, frames_per_buffer=CHUNK) #Indicate listening for next utterance print ("READY....") frames = [] utt_started = False decoder.start_utt(None) while True: data = stream.read(CHUNK) time.sleep (0.100) #frames.append(data) decoder.process_raw(data, False, False) in_speech = decoder.get_in_speech() if in_speech and not utt_started: #silence -> speech transition, #let user know that he is heard print("Started...\n") utt_started = True if not in_speech and utt_started: #speech -> silence transition, #time to start new utterance decoder.end_utt() # Retrieve hypothesis. hypothesis = decoder.hyp() if hypothesis is not None: print ('Best hypothesis: ', hypothesis.best_score, hypothesis.hypstr)
class VoiceIOHandler(JarvisIOHandler): def __init__(self): JarvisIOHandler.__init__(self) hmm = '/usr/local/share/pocketsphinx/model/en-us/en-us' dic ='/usr/local/share/pocketsphinx/model/en-us/cmudict-en-us.dict' lm ='/usr/local/share/pocketsphinx/model/en-us/en-us.lm.bin' config = Decoder.default_config() config.set_string('-hmm',hmm) config.set_string('-lm',lm) config.set_string('-dict',dic) config.set_string('-logfn','/dev/null') self.decoder = Decoder(config) self.microphone = pyaudio.PyAudio() pyvona_config = open('configs/pyvona.txt') pvcfg = pyvona_config.readlines() pyvona_config.close() self.voice = pyvona.create_voice(pvcfg[0].strip(),pvcfg[1].strip()) self.voice.region = 'us-west' self.voice.voice_name='Brian' self.voice.sentence_break = 200 googleSTT_config = open('configs/GoogleSTT.txt') self.key = googleSTT_config.readlines()[0].strip() googleSTT_config.close() self.recognizer = sr.Recognizer() with sr.Microphone() as source: self.recognizer.adjust_for_ambient_noise(source) def waitForInput(self): if self._isLowPower: utt = '' stream = self.microphone.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024) stream.start_stream() in_speech_bf = True self.decoder.start_utt() while True: buf = stream.read(1024) if buf: self.decoder.process_raw(buf, False, False) if self.decoder.get_in_speech() != in_speech_bf: in_speech_bf = self.decoder.get_in_speech() if not in_speech_bf: self.decoder.end_utt() try: if self.decoder.hyp().hypstr != '': utt = self.decoder.hyp().hypstr break except AttributeError: pass self.decoder.start_utt() stream.stop_stream() stream.close() print utt return utt.lower().strip() else: with sr.Microphone() as source: print 'Listening' audio = self.recognizer.listen(source) print 'Recognizing...' try: rec = self.recognizer.recognize_google(audio,key=self.key).lower().strip() print rec return rec except sr.UnknownValueError: print("Google Speech Recognition could not understand audio") return 'CNU' except sr.RequestError as e: print("Could not request results from Google Speech Recognition service; {0}".format(e)) return 'CNC' def output(self,text_to_output): self.voice.speak(text_to_output)