def __init__(self, state, queue, emitter, stt, wakeup_recognizer, wakeword_recognizer): super(AudioConsumer, self).__init__() self.daemon = True self.queue = queue self.state = state self.emitter = emitter self.stt = stt self.wakeup_recognizer = wakeup_recognizer self.wakeword_recognizer = wakeword_recognizer self.metrics = MetricsAggregator()
def __init__(self, state, queue, emitter, wakeup_recognizer, mycroft_recognizer, remote_recognizer): threading.Thread.__init__(self) self.daemon = True self.queue = queue self.state = state self.emitter = emitter self.wakeup_recognizer = wakeup_recognizer self.mycroft_recognizer = mycroft_recognizer self.remote_recognizer = remote_recognizer self.metrics = MetricsAggregator()
def __init__(self, state, queue, emitter, stt, wakeup_recognizer, wakeword_recognizer): super(AudioConsumer, self).__init__() self.daemon = True self.queue = queue self.state = state self.emitter = emitter self.stt = stt self.wakeup_recognizer = wakeup_recognizer self.wakeword_recognizer = wakeword_recognizer self.metrics = MetricsAggregator() self.config = ConfigurationManager.get() emitter.on("recognizer_loop:hotword", self.set_word)
def __init__(self, state, queue, emitter, wakeup_recognizer, wakeword_recognizer, wrapped_remote_recognizer, wakeup_prefixes, wakeup_words): threading.Thread.__init__(self) self.daemon = True self.queue = queue self.state = state self.emitter = emitter self.wakeup_recognizer = wakeup_recognizer self.ww_recognizer = wakeword_recognizer self.wrapped_remote_recognizer = wrapped_remote_recognizer self.wakeup_prefixes = wakeup_prefixes self.wakeup_words = wakeup_words self.metrics = MetricsAggregator()
class AudioConsumer(Thread): """ AudioConsumer Consumes AudioData chunks off the queue """ # In seconds, the minimum audio size to be sent to remote STT MIN_AUDIO_SIZE = 0.5 def __init__(self, state, queue, emitter, stt, wakeup_recognizer, mycroft_recognizer): super(AudioConsumer, self).__init__() self.daemon = True self.queue = queue self.state = state self.emitter = emitter self.stt = stt self.wakeup_recognizer = wakeup_recognizer self.mycroft_recognizer = mycroft_recognizer self.metrics = MetricsAggregator() def run(self): while self.state.running: self.read() def read(self): audio = self.queue.get() if self.state.sleeping: self.wake_up(audio) else: self.process(audio) # TODO: Localization def wake_up(self, audio): if self.wakeup_recognizer.is_recognized(audio.frame_data, self.metrics): SessionManager.touch() self.state.sleeping = False self.__speak("I'm awake.") self.metrics.increment("mycroft.wakeup") @staticmethod def _audio_length(audio): return float(len(audio.frame_data)) / ( audio.sample_rate * audio.sample_width) # TODO: Localization def process(self, audio): SessionManager.touch() payload = { 'utterance': self.mycroft_recognizer.key_phrase, 'session': SessionManager.get().session_id, } self.emitter.emit("recognizer_loop:wakeword", payload) if self._audio_length(audio) < self.MIN_AUDIO_SIZE: LOG.warn("Audio too short to be processed") elif connected(): self.transcribe(audio) else: self.__speak("Mycroft seems not to be connected to the Internet") def transcribe(self, audio): text = None try: text = self.stt.execute(audio).lower().strip() LOG.debug("STT: " + text) except sr.RequestError as e: LOG.error("Could not request Speech Recognition {0}".format(e)) except HTTPError as e: if e.response.status_code == 401: text = "pair my device" LOG.warn("Access Denied at mycroft.ai") except Exception as e: LOG.error(e) LOG.error("Speech Recognition could not understand audio") self.__speak("Sorry, I didn't catch that") if text: payload = { 'utterances': [text], 'session': SessionManager.get().session_id } self.emitter.emit("recognizer_loop:utterance", payload) self.metrics.attr('utterances', [text]) def __speak(self, utterance): payload = { 'utterance': utterance, 'session': SessionManager.get().session_id } self.emitter.emit("speak", Message("speak", payload))
class AudioConsumer(threading.Thread): """ AudioConsumer Consumes AudioData chunks off the queue """ # In seconds, the minimum audio size to be sent to remote STT MIN_AUDIO_SIZE = 0.5 def __init__(self, state, queue, emitter, wakeup_recognizer, mycroft_recognizer, remote_recognizer): threading.Thread.__init__(self) self.daemon = True self.queue = queue self.state = state self.emitter = emitter self.wakeup_recognizer = wakeup_recognizer self.mycroft_recognizer = mycroft_recognizer self.remote_recognizer = remote_recognizer self.metrics = MetricsAggregator() def run(self): while self.state.running: self.read_audio() @staticmethod def _audio_length(audio): return float(len(audio.frame_data)) / ( audio.sample_rate * audio.sample_width) def read_audio(self): audio_data = self.queue.get() if self.state.sleeping: self.try_wake_up(audio_data) else: self.process_audio(audio_data) def try_wake_up(self, audio): if self.wakeup_recognizer.is_recognized(audio.frame_data, self.metrics): SessionManager.touch() self.state.sleeping = False self.__speak("I'm awake.") # TODO: Localization self.metrics.increment("mycroft.wakeup") def process_audio(self, audio): try: self.transcribe([audio]) except sr.UnknownValueError: # TODO: Localization logger.warn("Speech Recognition could not understand audio") self.__speak("Sorry, I didn't catch that.") bus = dbus.SessionBus() remote_object = bus.get_object("org.gnome.Shell","/com/mycroftaignome/MycroftGnomeResult") setText = remote_object.setText("Sorry, I didn't catch that.", dbus_interface = "com.mycroftaignome.MycroftAiGnomeBox") def __speak(self, utterance): payload = { 'utterance': utterance, 'session': SessionManager.get().session_id } self.emitter.emit("speak", Message("speak", metadata=payload)) def _create_remote_stt_runnable(self, audio, utterances): def runnable(): try: text = self.remote_recognizer.transcribe( audio, metrics=self.metrics).lower() except sr.UnknownValueError: pass except sr.RequestError as e: logger.error( "Could not request results from Speech Recognition " "service; {0}".format(e)) except CerberusAccessDenied as e: logger.error("AccessDenied from Cerberus proxy.") self.__speak( "Your device is not registered yet. To start pairing, " "login at cerberus dot mycroft dot A.I") utterances.append("pair my device") bus = dbus.SessionBus() remote_object = bus.get_object("org.gnome.Shell","/com/mycroftaignome/MycroftGnomeResult") getvoiceQuery = remote_object.getvoiceQuery("Your device is not registered yet. To start pairing, login at cerberus.mycroft.ai", dbus_interface = "com.mycroftaignome.MycroftAiGnomeBox") url = 'http://cerberus.mycroft.ai' webbrowser.open(url) except Exception as e: logger.error("Unexpected exception: {0}".format(e)) else: logger.debug("STT: " + text) if text.strip() != '': utterances.append(text) bus = dbus.SessionBus() remote_object = bus.get_object("org.gnome.Shell","/com/mycroftaignome/MycroftGnomeResult") getvoiceQuery = remote_object.getvoiceQuery(text, dbus_interface = "com.mycroftaignome.MycroftAiGnomeBox") return runnable def transcribe(self, audio_segments): utterances = [] threads = [] for audio in audio_segments: if self._audio_length(audio) < self.MIN_AUDIO_SIZE: logger.debug("Audio too short to send to STT") continue target = self._create_remote_stt_runnable(audio, utterances) t = threading.Thread(target=target) t.start() threads.append(t) for thread in threads: thread.join() if len(utterances) > 0: payload = { 'utterances': utterances, 'session': SessionManager.get().session_id } self.emitter.emit("recognizer_loop:utterance", payload) self.metrics.attr('utterances', utterances) else: raise sr.UnknownValueError
class AudioConsumer(threading.Thread): """ AudioConsumer Consumes AudioData chunks off the queue """ # In seconds, the minimum audio size to be sent to remote STT MIN_AUDIO_SIZE = 0.5 def __init__(self, state, queue, emitter, wakeup_recognizer, mycroft_recognizer, remote_recognizer): threading.Thread.__init__(self) self.daemon = True self.queue = queue self.state = state self.emitter = emitter self.wakeup_recognizer = wakeup_recognizer self.mycroft_recognizer = mycroft_recognizer self.remote_recognizer = remote_recognizer self.metrics = MetricsAggregator() def run(self): while self.state.running: self.read_audio() @staticmethod def _audio_length(audio): return float(len( audio.frame_data)) / (audio.sample_rate * audio.sample_width) def read_audio(self): timer = Stopwatch() audio = self.queue.get() self.metrics.timer("mycroft.recognizer.audio.length_s", self._audio_length(audio)) self.queue.task_done() timer.start() if self.state.sleeping: self.process_wake_up(audio) elif self.state.skip_wakeword: self.process_skip_wake_word(audio) else: self.process_wake_word(audio, timer) self.metrics.flush() def process_wake_up(self, audio): if self.wakeup_recognizer.is_recognized(audio.frame_data, self.metrics): SessionManager.touch() self.state.sleeping = False self.__speak("I'm awake.") # TODO: Localization self.metrics.increment("mycroft.wakeup") def process_wake_word(self, audio, timer): hyp = self.mycroft_recognizer.transcribe(audio.frame_data, self.metrics) if self.mycroft_recognizer.contains(hyp): extractor = WordExtractor(audio, self.mycroft_recognizer, self.metrics) timer.lap() extractor.calculate_range() self.metrics.timer("mycroft.recognizer.extractor.time_s", timer.lap()) audio_before = extractor.get_audio_data_before() self.metrics.timer("mycroft.recognizer.audio_extracted.length_s", self._audio_length(audio_before)) audio_after = extractor.get_audio_data_after() self.metrics.timer("mycroft.recognizer.audio_extracted.length_s", self._audio_length(audio_after)) SessionManager.touch() payload = { 'utterance': hyp.hypstr, 'session': SessionManager.get().session_id, 'pos_begin': extractor.begin, 'pos_end': extractor.end } self.emitter.emit("recognizer_loop:wakeword", payload) try: self.transcribe([audio_before, audio_after]) except sr.UnknownValueError: self.__speak("Go ahead") self.state.skip_wakeword = True self.metrics.increment("mycroft.wakeword") def process_skip_wake_word(self, audio): SessionManager.touch() try: self.transcribe([audio]) except sr.UnknownValueError: logger.warn("Speech Recognition could not understand audio") self.__speak("Sorry, I didn't catch that.") self.metrics.increment("mycroft.recognizer.error") self.state.skip_wakeword = False def __speak(self, utterance): payload = { 'utterance': utterance, 'session': SessionManager.get().session_id } self.emitter.emit("speak", Message("speak", metadata=payload)) def _create_remote_stt_runnable(self, audio, utterances): def runnable(): try: text = self.remote_recognizer.transcribe( audio, metrics=self.metrics).lower() except sr.UnknownValueError: pass except sr.RequestError as e: logger.error( "Could not request results from Speech Recognition " "service; {0}".format(e)) except CerberusAccessDenied as e: logger.error("AccessDenied from Cerberus proxy.") self.__speak( "Your device is not registered yet. To start pairing, " "login at cerberus dot mycroft dot A.I") utterances.append("pair my device") except Exception as e: logger.error("Unexpected exception: {0}".format(e)) else: logger.debug("STT: " + text) if text.strip() != '': utterances.append(text) return runnable def transcribe(self, audio_segments): utterances = [] threads = [] for audio in audio_segments: if self._audio_length(audio) < self.MIN_AUDIO_SIZE: logger.debug("Audio too short to send to STT") continue target = self._create_remote_stt_runnable(audio, utterances) t = threading.Thread(target=target) t.start() threads.append(t) for thread in threads: thread.join() if len(utterances) > 0: payload = { 'utterances': utterances, 'session': SessionManager.get().session_id } self.emitter.emit("recognizer_loop:utterance", payload) self.metrics.attr('utterances', utterances) else: raise sr.UnknownValueError
class AudioConsumer(Thread): """ AudioConsumer Consumes AudioData chunks off the queue """ # In seconds, the minimum audio size to be sent to remote STT MIN_AUDIO_SIZE = 0.5 def __init__(self, state, queue, emitter, stt, wakeup_recognizer, wakeword_recognizer): super(AudioConsumer, self).__init__() self.daemon = True self.queue = queue self.state = state self.emitter = emitter self.stt = stt self.wakeup_recognizer = wakeup_recognizer self.wakeword_recognizer = wakeword_recognizer self.metrics = MetricsAggregator() def run(self): while self.state.running: self.read() def read(self): try: message = self.queue.get(timeout=0.5) except Empty: return if message is None: return tag, data = message if tag == AUDIO_DATA: if self.state.sleeping: self.wake_up(data) else: self.process(data) elif tag == STREAM_START: self.stt.stream_start() elif tag == STREAM_DATA: self.stt.stream_data(data) elif tag == STREAM_STOP: self.stt.stream_stop() else: LOG.error("Unknown audio queue type %r" % message) # TODO: Localization def wake_up(self, audio): if self.wakeup_recognizer.found_wake_word(audio.frame_data): SessionManager.touch() self.state.sleeping = False self.emitter.emit('recognizer_loop:awoken') self.metrics.increment("mycroft.wakeup") @staticmethod def _audio_length(audio): return float(len( audio.frame_data)) / (audio.sample_rate * audio.sample_width) # TODO: Localization def process(self, audio): SessionManager.touch() payload = { 'utterance': self.wakeword_recognizer.key_phrase, 'session': SessionManager.get().session_id, } self.emitter.emit("recognizer_loop:wakeword", payload) if self._audio_length(audio) < self.MIN_AUDIO_SIZE: LOG.warning("Audio too short to be processed") else: stopwatch = Stopwatch() with stopwatch: transcription = self.transcribe(audio) if transcription: ident = str(stopwatch.timestamp) + str(hash(transcription)) # STT succeeded, send the transcribed speech on for processing payload = { 'utterances': [transcription], 'lang': self.stt.lang, 'session': SessionManager.get().session_id, 'ident': ident } self.emitter.emit("recognizer_loop:utterance", payload) self.metrics.attr('utterances', [transcription]) else: ident = str(stopwatch.timestamp) # Report timing metrics report_timing(ident, 'stt', stopwatch, { 'transcription': transcription, 'stt': self.stt.__class__.__name__ }) def transcribe(self, audio): def send_unknown_intent(): """ Send message that nothing was transcribed. """ self.emitter.emit('recognizer_loop:speech.recognition.unknown') try: # Invoke the STT engine on the audio clip text = self.stt.execute(audio) if text is not None: text = text.lower().strip() LOG.debug("STT: " + text) else: send_unknown_intent() LOG.info('no words were transcribed') return text except sr.RequestError as e: LOG.error("Could not request Speech Recognition {0}".format(e)) except ConnectionError as e: LOG.error("Connection Error: {0}".format(e)) self.emitter.emit("recognizer_loop:no_internet") except HTTPError as e: if e.response.status_code == 401: LOG.warning("Access Denied at mycroft.ai") return "pair my device" # phrase to start the pairing process else: LOG.error(e.__class__.__name__ + ': ' + str(e)) except RequestException as e: LOG.error(e.__class__.__name__ + ': ' + str(e)) except Exception as e: send_unknown_intent() LOG.error(e) LOG.error("Speech Recognition could not understand audio") return None if connected(): dialog_name = 'backend.down' else: dialog_name = 'not connected to the internet' self.emitter.emit('speak', {'utterance': dialog.get(dialog_name)}) def __speak(self, utterance): payload = { 'utterance': utterance, 'session': SessionManager.get().session_id } self.emitter.emit("speak", payload)
class AudioConsumer(Thread): """ AudioConsumer Consumes AudioData chunks off the queue """ # In seconds, the minimum audio size to be sent to remote STT MIN_AUDIO_SIZE = 0.5 def __init__(self, state, queue, emitter, stt, wakeup_recognizer, wakeword_recognizer): super(AudioConsumer, self).__init__() self.daemon = True self.queue = queue self.state = state self.emitter = emitter self.stt = stt self.wakeup_recognizer = wakeup_recognizer self.wakeword_recognizer = wakeword_recognizer self.metrics = MetricsAggregator() def run(self): while self.state.running: self.read() def read(self): try: audio = self.queue.get(timeout=0.5) except Empty: return if audio is None: return if self.state.sleeping: self.wake_up(audio) else: self.process(audio) # TODO: Localization def wake_up(self, audio): if self.wakeup_recognizer.found_wake_word(audio.frame_data): SessionManager.touch() self.state.sleeping = False self.emitter.emit('recognizer_loop:awoken') self.metrics.increment("mycroft.wakeup") @staticmethod def _audio_length(audio): return float(len( audio.frame_data)) / (audio.sample_rate * audio.sample_width) # TODO: Localization def process(self, audio): SessionManager.touch() payload = { 'utterance': self.wakeword_recognizer.key_phrase, 'session': SessionManager.get().session_id, } self.emitter.emit("recognizer_loop:wakeword", payload) if self._audio_length(audio) < self.MIN_AUDIO_SIZE: LOG.warning("Audio too short to be processed") else: self.transcribe(audio) def transcribe(self, audio): text = None try: # Invoke the STT engine on the audio clip text = self.stt.execute(audio).lower().strip() LOG.debug("STT: " + text) except sr.RequestError as e: LOG.error("Could not request Speech Recognition {0}".format(e)) except ConnectionError as e: LOG.error("Connection Error: {0}".format(e)) self.emitter.emit("recognizer_loop:no_internet") except HTTPError as e: if e.response.status_code == 401: text = "pair my device" # phrase to start the pairing process LOG.warning("Access Denied at mycroft.ai") except Exception as e: self.emitter.emit('recognizer_loop:speech.recognition.unknown') LOG.error(e) LOG.error("Speech Recognition could not understand audio") if text: # STT succeeded, send the transcribed speech on for processing payload = { 'utterances': [text], 'lang': self.stt.lang, 'session': SessionManager.get().session_id } self.emitter.emit("recognizer_loop:utterance", payload) self.metrics.attr('utterances', [text]) def __speak(self, utterance): payload = { 'utterance': utterance, 'session': SessionManager.get().session_id } self.emitter.emit("speak", payload)
class AudioConsumer(threading.Thread): """ AudioConsumer Consumes AudioData chunks off the queue """ # In seconds, the minimum audio size to be sent to remote STT MIN_AUDIO_SIZE = 0.5 def __init__(self, state, queue, emitter, wakeup_recognizer, mycroft_recognizer, remote_recognizer): threading.Thread.__init__(self) self.daemon = True self.queue = queue self.state = state self.emitter = emitter self.wakeup_recognizer = wakeup_recognizer self.mycroft_recognizer = mycroft_recognizer self.remote_recognizer = remote_recognizer self.metrics = MetricsAggregator() def run(self): while self.state.running: self.read_audio() @staticmethod def _audio_length(audio): return float(len(audio.frame_data)) / ( audio.sample_rate * audio.sample_width) def read_audio(self): timer = Stopwatch() audio = self.queue.get() self.metrics.timer("mycroft.recognizer.audio.length_s", self._audio_length(audio)) self.queue.task_done() timer.start() if self.state.sleeping: self.process_wake_up(audio) elif self.state.skip_wakeword: self.process_skip_wake_word(audio) else: self.process_wake_word(audio, timer) self.metrics.flush() def process_wake_up(self, audio): if self.wakeup_recognizer.is_recognized(audio.frame_data, self.metrics): SessionManager.touch() self.state.sleeping = False self.__speak("I'm awake.") # TODO: Localization self.metrics.increment("mycroft.wakeup") def process_wake_word(self, audio, timer): hyp = self.mycroft_recognizer.transcribe(audio.frame_data, self.metrics) if self.mycroft_recognizer.contains(hyp): extractor = WordExtractor(audio, self.mycroft_recognizer, self.metrics) timer.lap() extractor.calculate_range() self.metrics.timer("mycroft.recognizer.extractor.time_s", timer.lap()) audio_before = extractor.get_audio_data_before() self.metrics.timer("mycroft.recognizer.audio_extracted.length_s", self._audio_length(audio_before)) audio_after = extractor.get_audio_data_after() self.metrics.timer("mycroft.recognizer.audio_extracted.length_s", self._audio_length(audio_after)) SessionManager.touch() payload = { 'utterance': hyp.hypstr, 'session': SessionManager.get().session_id, 'pos_begin': extractor.begin, 'pos_end': extractor.end } self.emitter.emit("recognizer_loop:wakeword", payload) try: self.transcribe([audio_before, audio_after]) except sr.UnknownValueError: self.__speak("Go ahead") self.state.skip_wakeword = True self.metrics.increment("mycroft.wakeword") def process_skip_wake_word(self, audio): SessionManager.touch() try: self.transcribe([audio]) except sr.UnknownValueError: logger.warn("Speech Recognition could not understand audio") self.__speak("Sorry, I didn't catch that.") self.metrics.increment("mycroft.recognizer.error") self.state.skip_wakeword = False def __speak(self, utterance): payload = { 'utterance': utterance, 'session': SessionManager.get().session_id } self.emitter.emit("speak", Message("speak", metadata=payload)) def _create_remote_stt_runnable(self, audio, utterances): def runnable(): try: text = self.remote_recognizer.transcribe( audio, metrics=self.metrics).lower() except sr.UnknownValueError: pass except sr.RequestError as e: logger.error( "Could not request results from Speech Recognition " "service; {0}".format(e)) except CerberusAccessDenied as e: logger.error("AccessDenied from Cerberus proxy.") self.__speak( "Your device is not registered yet. To start pairing, " "login at cerberus dot mycroft dot A.I") utterances.append("pair my device") except Exception as e: logger.error("Unexpected exception: {0}".format(e)) else: logger.debug("STT: " + text) if text.strip() != '': utterances.append(text) return runnable def transcribe(self, audio_segments): utterances = [] threads = [] for audio in audio_segments: if self._audio_length(audio) < self.MIN_AUDIO_SIZE: logger.debug("Audio too short to send to STT") continue target = self._create_remote_stt_runnable(audio, utterances) t = threading.Thread(target=target) t.start() threads.append(t) for thread in threads: thread.join() if len(utterances) > 0: payload = { 'utterances': utterances, 'session': SessionManager.get().session_id } self.emitter.emit("recognizer_loop:utterance", payload) self.metrics.attr('utterances', utterances) else: raise sr.UnknownValueError
class AudioConsumer(Thread): """ AudioConsumer Consumes AudioData chunks off the queue """ # In seconds, the minimum audio size to be sent to remote STT MIN_AUDIO_SIZE = 0.5 def __init__(self, state, queue, emitter, stt, wakeup_recognizer, mycroft_recognizer): super(AudioConsumer, self).__init__() self.daemon = True self.queue = queue self.state = state self.emitter = emitter self.stt = stt self.wakeup_recognizer = wakeup_recognizer self.mycroft_recognizer = mycroft_recognizer self.metrics = MetricsAggregator() def run(self): while self.state.running: self.read() def read(self): audio = self.queue.get() if self.state.sleeping: self.wake_up(audio) else: self.process(audio) # TODO: Localization def wake_up(self, audio): if self.wakeup_recognizer.is_recognized(audio.frame_data, self.metrics): SessionManager.touch() self.state.sleeping = False lines = ["I'm awake.", "System rebooted.", "All systems check. I am now online.", "Waking up."] self.__speak(choice(lines)) self.metrics.increment("mycroft.wakeup") @staticmethod def _audio_length(audio): return float(len(audio.frame_data)) / ( audio.sample_rate * audio.sample_width) # TODO: Localization def process(self, audio): SessionManager.touch() payload = { 'utterance': self.mycroft_recognizer.key_phrase, 'session': SessionManager.get().session_id, } self.emitter.emit("recognizer_loop:wakeword", payload) if self._audio_length(audio) < self.MIN_AUDIO_SIZE: LOG.warn("Audio too short to be processed") else: self.transcribe(audio) def transcribe(self, audio): text = None try: # Invoke the STT engine on the audio clip text = self.stt.execute(audio).lower().strip() LOG.debug("STT: " + text) except sr.RequestError as e: LOG.error("Could not request Speech Recognition {0}".format(e)) except ConnectionError as e: LOG.error("Connection Error: {0}".format(e)) self.__speak("Intelora seems not to be connected to the Internet.") except HTTPError as e: if e.response.status_code == 401: text = "pair my device" LOG.warn("Access Denied at Mycroft API") except Exception as e: LOG.error(e) LOG.error("Speech Recognition could not understand audio") lines = ["Sorry, I didn't catch that.", "Sorry, I didn't hear you clearly.", "Can you repeat what you said, please?", "Can you please say that again?"] self.__speak(choice(lines)) if text: # STT succeeded, send the transcribed speech on for processing payload = { 'utterances': [text], 'lang': self.stt.lang, 'session': SessionManager.get().session_id } self.emitter.emit("recognizer_loop:utterance", payload) self.metrics.attr('utterances', [text]) def __speak(self, utterance): payload = { 'utterance': utterance, 'session': SessionManager.get().session_id } self.emitter.emit("speak", Message("speak", payload))
class AudioConsumer(Thread): """ AudioConsumer Consumes AudioData chunks off the queue """ # In seconds, the minimum audio size to be sent to remote STT MIN_AUDIO_SIZE = 0.5 def __init__(self, state, queue, emitter, stt, wakeup_recognizer, wakeword_recognizer): super(AudioConsumer, self).__init__() self.daemon = True self.queue = queue self.state = state self.emitter = emitter self.stt = stt self.wakeup_recognizer = wakeup_recognizer self.wakeword_recognizer = wakeword_recognizer self.metrics = MetricsAggregator() self.config = ConfigurationManager.get() emitter.on("recognizer_loop:hotword", self.set_word) def set_word(self, event): if event.get("start_listening"): # set new hot word self.hotword = event.get("hotword", self.wakeword_recognizer.key_phrase) def run(self): while self.state.running: self.read() def read(self): try: audio = self.queue.get(timeout=0.5) except Empty: return if audio is None: return if self.state.sleeping: self.wake_up(audio) else: self.process(audio) # TODO: Localization def wake_up(self, audio): if self.wakeup_recognizer.found_wake_word(audio.frame_data): SessionManager.touch() self.state.sleeping = False self.__speak(mycroft.dialog.get("i am awake", self.stt.lang)) self.metrics.increment("mycroft.wakeup") @staticmethod def _audio_length(audio): return float(len( audio.frame_data)) / (audio.sample_rate * audio.sample_width) # TODO: Localization def process(self, audio): SessionManager.touch() """ if self.hotword: word = self.hotword self.hotword = None else: """ word = self.wakeword_recognizer.key_phrase payload = { 'utterance': word, 'session': SessionManager.get().session_id, } self.emitter.emit("recognizer_loop:wakeword", payload) if self._audio_length(audio) < self.MIN_AUDIO_SIZE: LOG.warning("Audio too short to be processed") else: self.transcribe(audio) def transcribe(self, audio): LOG.debug("Transcribing audio") text = None try: # Invoke the STT engine on the audio clip text = self.stt.execute(audio).lower().strip() LOG.debug("STT: --------->" + text) except sr.RequestError as e: LOG.error("Could not request Speech Recognition {0}".format(e)) except ConnectionError as e: LOG.error("Connection Error: {0}".format(e)) self.emitter.emit("recognizer_loop:no_internet") except HTTPError as e: if e.response.status_code == 401: text = "pair my device" # phrase to start the pairing process LOG.warning("Access Denied at mycroft.ai") except Exception as e: LOG.error(e) LOG.error("Speech Recognition could not understand audio") if text: # STT succeeded, send the transcribed speech on for processing LOG.error("maine samjha tune bola " + text) tellMeMore = "tell me more" if (text == tellMeMore): #hotWordListener = self.finalHotWord LOG.info("found tell me more in listener****") #text = text + " about " + hotWordListener with open("hotWordFile.txt", "rw+") as hotWordTemp: prevHotWord = hotWordTemp.read() hotWordTemp.truncate(0) text = "tell me about " + prevHotWord LOG.error(" naya wala maine samjha tune bola " + text) payload = { 'utterances': [text], 'lang': self.stt.lang, 'session': SessionManager.get().session_id } self.emitter.emit("recognizer_loop:utterance", payload) self.metrics.attr('utterances', [text]) def __speak(self, utterance): payload = { 'utterance': utterance, 'session': SessionManager.get().session_id } self.emitter.emit("speak", payload)
class AudioConsumer(Thread): """ AudioConsumer Consumes AudioData chunks off the queue """ # In seconds, the minimum audio size to be sent to remote STT MIN_AUDIO_SIZE = 0.5 def __init__(self, state, queue, emitter, stt, wakeup_recognizer, mycroft_recognizer): super(AudioConsumer, self).__init__() self.daemon = True self.queue = queue self.state = state self.emitter = emitter self.stt = stt self.wakeup_recognizer = wakeup_recognizer self.mycroft_recognizer = mycroft_recognizer self.metrics = MetricsAggregator() def run(self): while self.state.running: self.read() def read(self): audio = self.queue.get() if self.state.sleeping: self.wake_up(audio) else: self.process(audio) # TODO: Localization def wake_up(self, audio): if self.wakeup_recognizer.is_recognized(audio.frame_data, self.metrics): SessionManager.touch() self.state.sleeping = False self.__speak("I'm awake.") self.metrics.increment("mycroft.wakeup") @staticmethod def _audio_length(audio): return float(len( audio.frame_data)) / (audio.sample_rate * audio.sample_width) # TODO: Localization def process(self, audio): SessionManager.touch() payload = { 'utterance': self.mycroft_recognizer.key_phrase, 'session': SessionManager.get().session_id, } self.emitter.emit("recognizer_loop:wakeword", payload) if self._audio_length(audio) < self.MIN_AUDIO_SIZE: LOG.warn("Audio too short to be processed") self.emitter.emit("recognizer_loop:tooshort", {}) elif connected(): self.transcribe(audio) else: self.__speak("Mycroft seems not to be connected to the Internet") def transcribe(self, audio): text = None try: print("aud: " + str((audio))) initial = self.stt.execute(audio) print("initial: " + initial) text = initial.lower().strip() LOG.debug("STT: " + text) except sr.RequestError as e: LOG.error("Could not request Speech Recognition {0}".format(e)) except HTTPError as e: if e.response.status_code == 401: text = "pair my device" LOG.warn("Access Denied at mycroft.ai") except Exception as e: LOG.error(e) LOG.error("Speech Recognition could not understand audio") self.__speak("Sorry, I didn't catch that") if text: payload = { 'utterances': [text], 'session': SessionManager.get().session_id } self.emitter.emit("recognizer_loop:utterance", payload) self.metrics.attr('utterances', [text]) def __speak(self, utterance): print "going to speak " + utterance payload = { 'utterance': utterance, 'session': SessionManager.get().session_id } self.emitter.emit("speak", Message("speak", payload))
class AudioConsumer(Thread): """ AudioConsumer Consumes AudioData chunks off the queue """ # In seconds, the minimum audio size to be sent to remote STT MIN_AUDIO_SIZE = 0.5 def __init__(self, state, queue, emitter, stt, wakeup_recognizer, wakeword_recognizer): super(AudioConsumer, self).__init__() self.daemon = True self.queue = queue self.state = state self.emitter = emitter self.stt = stt self.wakeup_recognizer = wakeup_recognizer self.wakeword_recognizer = wakeword_recognizer self.metrics = MetricsAggregator() def run(self): while self.state.running: self.read() def read(self): try: audio = self.queue.get(timeout=0.5) except Empty: return if audio is None: return if self.state.sleeping: self.wake_up(audio) else: self.process(audio) # TODO: Localization def wake_up(self, audio): if self.wakeup_recognizer.found_wake_word(audio.frame_data): SessionManager.touch() self.state.sleeping = False self.__speak(mycroft.dialog.get("i am awake", self.stt.lang)) self.metrics.increment("mycroft.wakeup") @staticmethod def _audio_length(audio): return float(len(audio.frame_data)) / ( audio.sample_rate * audio.sample_width) # TODO: Localization def process(self, audio): SessionManager.touch() payload = { 'utterance': self.wakeword_recognizer.key_phrase, 'session': SessionManager.get().session_id, } self.emitter.emit("recognizer_loop:wakeword", payload) if self._audio_length(audio) < self.MIN_AUDIO_SIZE: LOG.warning("Audio too short to be processed") else: self.transcribe(audio) def transcribe(self, audio): text = None try: # Invoke the STT engine on the audio clip text = self.stt.execute(audio).lower().strip() LOG.debug("STT: " + text) except sr.RequestError as e: LOG.error("Could not request Speech Recognition {0}".format(e)) except ConnectionError as e: LOG.error("Connection Error: {0}".format(e)) self.emitter.emit("recognizer_loop:no_internet") except HTTPError as e: if e.response.status_code == 401: text = "pair my device" # phrase to start the pairing process LOG.warning("Access Denied at mycroft.ai") except Exception as e: LOG.error(e) LOG.error("Speech Recognition could not understand audio") if text: # STT succeeded, send the transcribed speech on for processing payload = { 'utterances': [text], 'lang': self.stt.lang, 'session': SessionManager.get().session_id } self.emitter.emit("recognizer_loop:utterance", payload) self.metrics.attr('utterances', [text]) def __speak(self, utterance): payload = { 'utterance': utterance, 'session': SessionManager.get().session_id } self.emitter.emit("speak", payload)
class AudioConsumer(threading.Thread): """ AudioConsumer Consumes AudioData chunks off the queue """ # In seconds, the minimum audio size to be sent to remote STT MIN_AUDIO_SIZE = 0.5 def __init__(self, state, queue, emitter, wakeup_recognizer, mycroft_recognizer, remote_recognizer): threading.Thread.__init__(self) self.daemon = True self.queue = queue self.state = state self.emitter = emitter self.wakeup_recognizer = wakeup_recognizer self.mycroft_recognizer = mycroft_recognizer self.remote_recognizer = remote_recognizer self.metrics = MetricsAggregator() def run(self): while self.state.running: self.read_audio() @staticmethod def _audio_length(audio): return float(len( audio.frame_data)) / (audio.sample_rate * audio.sample_width) def read_audio(self): audio_data = self.queue.get() if self.state.sleeping: self.try_wake_up(audio_data) else: self.process_audio(audio_data) def try_wake_up(self, audio): if self.wakeup_recognizer.is_recognized(audio.frame_data, self.metrics): SessionManager.touch() self.state.sleeping = False self.__speak("I'm awake.") # TODO: Localization self.metrics.increment("mycroft.wakeup") def process_audio(self, audio): SessionManager.touch() payload = { 'utterance': self.mycroft_recognizer.key_phrase, 'session': SessionManager.get().session_id, } self.emitter.emit("recognizer_loop:wakeword", payload) try: self.transcribe([audio]) except sr.UnknownValueError: # TODO: Localization logger.warn("Speech Recognition could not understand audio") # self.__speak("Sorry, I didn't catch that.") def __speak(self, utterance): payload = { 'utterance': utterance, 'session': SessionManager.get().session_id } self.emitter.emit("speak", Message("speak", metadata=payload)) def _create_remote_stt_runnable(self, audio, utterances): def runnable(): try: text = self.remote_recognizer.transcribe( audio, metrics=self.metrics).lower() except sr.UnknownValueError: pass except sr.RequestError as e: logger.error( "Could not request results from Speech Recognition " "service; {0}".format(e)) except CerberusAccessDenied as e: logger.error("AccessDenied from Cerberus proxy.") self.__speak( "Your device is not registered yet. To start pairing, " "browse to cerberus dot mycroft dot A.I") utterances.append("pair my device") except Exception as e: logger.error("Unexpected exception: {0}".format(e)) else: logger.debug("STT: " + text) if text.strip() != '': utterances.append(text) return runnable def transcribe(self, audio_segments): utterances = [] threads = [] if connected(): for audio in audio_segments: if self._audio_length(audio) < self.MIN_AUDIO_SIZE: logger.debug("Audio too short to send to STT") continue target = self._create_remote_stt_runnable(audio, utterances) t = threading.Thread(target=target) t.start() threads.append(t) for thread in threads: thread.join() if len(utterances) > 0: payload = { 'utterances': utterances, 'session': SessionManager.get().session_id } self.emitter.emit("recognizer_loop:utterance", payload) self.metrics.attr('utterances', utterances) else: raise sr.UnknownValueError else: # TODO: Localization # TODO: Enclosure virtualization (might not have a button) self.__speak("This device is not connected to the Internet." "Either plug in a network cable or hold the button" " on top for two seconds, then select wifi from the " "menu")
class AudioConsumer(threading.Thread): """ AudioConsumer Consumes AudioData chunks off the queue """ # In seconds, the minimum audio size to be sent to remote STT MIN_AUDIO_SIZE = 1.0 def __init__(self, state, queue, emitter, wakeup_recognizer, wakeword_recognizer, wrapped_remote_recognizer, wakeup_prefixes, wakeup_words): threading.Thread.__init__(self) self.daemon = True self.queue = queue self.state = state self.emitter = emitter self.wakeup_recognizer = wakeup_recognizer self.ww_recognizer = wakeword_recognizer self.wrapped_remote_recognizer = wrapped_remote_recognizer self.wakeup_prefixes = wakeup_prefixes self.wakeup_words = wakeup_words self.metrics = MetricsAggregator() def run(self): while self.state.running: self.try_consume_audio() @staticmethod def _audio_length(audio): return float(len( audio.frame_data)) / (audio.sample_rate * audio.sample_width) def try_consume_audio(self): timer = Stopwatch() hyp = None audio = self.queue.get() self.metrics.timer("mycroft.recognizer.audio.length_s", self._audio_length(audio)) self.queue.task_done() timer.start() if self.state.sleeping: hyp = self.wakeup_recognizer.transcribe(audio.get_wav_data(), metrics=self.metrics) if hyp and hyp.hypstr: logger.debug("sleeping recognition: " + hyp.hypstr) if hyp and hyp.hypstr.lower().find("wake up") >= 0: SessionManager.touch() self.state.sleeping = False self.__speak("I'm awake.") # TODO: Localization self.metrics.increment("mycroft.wakeup") else: if not self.state.skip_wakeword: hyp = self.ww_recognizer.transcribe(audio.get_wav_data(), metrics=self.metrics) if hyp and hyp.hypstr.lower().find("mycroft") >= 0: extractor = WakewordExtractor(audio, self.ww_recognizer, self.metrics) timer.lap() extractor.calculate_range() self.metrics.timer("mycroft.recognizer.extractor.time_s", timer.lap()) audio_before = extractor.get_audio_data_before() self.metrics.timer( "mycroft.recognizer.audio_extracted.length_s", self._audio_length(audio_before)) audio_after = extractor.get_audio_data_after() self.metrics.timer( "mycroft.recognizer.audio_extracted.length_s", self._audio_length(audio_after)) SessionManager.touch() payload = { 'utterance': hyp.hypstr, 'session': SessionManager.get().session_id, 'pos_begin': int(extractor.range.begin), 'pos_end': int(extractor.range.end) } self.emitter.emit("recognizer_loop:wakeword", payload) try: self.transcribe([audio_before, audio_after]) except sr.UnknownValueError: self.__speak("Go ahead") self.state.skip_wakeword = True self.metrics.increment("mycroft.wakeword") elif self.state.skip_wakeword: SessionManager.touch() try: self.transcribe([audio]) except sr.UnknownValueError: logger.warn( "Speech Recognition could not understand audio") self.__speak("Sorry, I didn't catch that.") self.metrics.increment("mycroft.recognizer.error") self.state.skip_wakeword = False else: self.metrics.clear() self.metrics.flush() def __speak(self, utterance): """ Speak commands should be asynchronous to avoid filling up the portaudio buffer. :param utterance: :return: """ def target(): self.emitter.emit( "speak", Message("speak", metadata={ 'utterance': utterance, 'session': SessionManager.get().session_id })) threading.Thread(target=target).start() def _create_remote_stt_runnable(self, audio, utterances): def runnable(): try: text = self.wrapped_remote_recognizer.transcribe( audio, metrics=self.metrics).lower() except sr.UnknownValueError: pass except sr.RequestError as e: logger.error( "Could not request results from Speech Recognition " "service; {0}".format(e)) except CerberusAccessDenied as e: logger.error("AccessDenied from Cerberus proxy.") self.__speak( "Your device is not registered yet. To start pairing, " "login at cerberus.mycroft.ai") utterances.append("pair my device") else: logger.debug("STT: " + text) if text.strip() != '': utterances.append(text) return runnable def transcribe(self, audio_segments): utterances = [] threads = [] for audio in audio_segments: if self._audio_length(audio) < self.MIN_AUDIO_SIZE: logger.debug("Audio too short to send to STT") continue target = self._create_remote_stt_runnable(audio, utterances) t = threading.Thread(target=target) t.start() threads.append(t) for thread in threads: thread.join() if len(utterances) > 0: payload = { 'utterances': utterances, 'session': SessionManager.get().session_id } self.emitter.emit("recognizer_loop:utterance", payload) self.metrics.attr('utterances', utterances) else: raise sr.UnknownValueError
class AudioConsumer(threading.Thread): """ AudioConsumer Consumes AudioData chunks off the queue """ # In seconds, the minimum audio size to be sent to remote STT MIN_AUDIO_SIZE = 1.0 def __init__( self, state, queue, emitter, wakeup_recognizer, wakeword_recognizer, wrapped_remote_recognizer, wakeup_prefixes, wakeup_words): threading.Thread.__init__(self) self.daemon = True self.queue = queue self.state = state self.emitter = emitter self.wakeup_recognizer = wakeup_recognizer self.ww_recognizer = wakeword_recognizer self.wrapped_remote_recognizer = wrapped_remote_recognizer self.wakeup_prefixes = wakeup_prefixes self.wakeup_words = wakeup_words self.metrics = MetricsAggregator() def run(self): while self.state.running: self.try_consume_audio() @staticmethod def _audio_length(audio): return float( len(audio.frame_data))/(audio.sample_rate*audio.sample_width) def try_consume_audio(self): timer = Stopwatch() hyp = None audio = self.queue.get() self.metrics.timer( "mycroft.recognizer.audio.length_s", self._audio_length(audio)) self.queue.task_done() timer.start() if self.state.sleeping: hyp = self.wakeup_recognizer.transcribe( audio.get_wav_data(), metrics=self.metrics) if hyp and hyp.hypstr: logger.debug("sleeping recognition: " + hyp.hypstr) if hyp and hyp.hypstr.lower().find("wake up") >= 0: SessionManager.touch() self.state.sleeping = False self.__speak("I'm awake.") # TODO: Localization self.metrics.increment("mycroft.wakeup") else: if not self.state.skip_wakeword: hyp = self.ww_recognizer.transcribe( audio.get_wav_data(), metrics=self.metrics) if hyp and hyp.hypstr.lower().find("mycroft") >= 0: extractor = WakewordExtractor( audio, self.ww_recognizer, self.metrics) timer.lap() extractor.calculate_range() self.metrics.timer( "mycroft.recognizer.extractor.time_s", timer.lap()) audio_before = extractor.get_audio_data_before() self.metrics.timer( "mycroft.recognizer.audio_extracted.length_s", self._audio_length(audio_before)) audio_after = extractor.get_audio_data_after() self.metrics.timer( "mycroft.recognizer.audio_extracted.length_s", self._audio_length(audio_after)) SessionManager.touch() payload = { 'utterance': hyp.hypstr, 'session': SessionManager.get().session_id, 'pos_begin': int(extractor.range.begin), 'pos_end': int(extractor.range.end) } self.emitter.emit("recognizer_loop:wakeword", payload) try: self.transcribe([audio_before, audio_after]) except sr.UnknownValueError: self.__speak("Go ahead") self.state.skip_wakeword = True self.metrics.increment("mycroft.wakeword") elif self.state.skip_wakeword: SessionManager.touch() try: self.transcribe([audio]) except sr.UnknownValueError: logger.warn( "Speech Recognition could not understand audio") self.__speak("Sorry, I didn't catch that.") self.metrics.increment("mycroft.recognizer.error") self.state.skip_wakeword = False else: self.metrics.clear() self.metrics.flush() def __speak(self, utterance): """ Speak commands should be asynchronous to avoid filling up the portaudio buffer. :param utterance: :return: """ def target(): self.emitter.emit( "speak", Message("speak", metadata={'utterance': utterance, 'session': SessionManager.get().session_id})) threading.Thread(target=target).start() def _create_remote_stt_runnable(self, audio, utterances): def runnable(): try: text = self.wrapped_remote_recognizer.transcribe( audio, metrics=self.metrics).lower() except sr.UnknownValueError: pass except sr.RequestError as e: logger.error( "Could not request results from Speech Recognition " "service; {0}".format(e)) except CerberusAccessDenied as e: logger.error("AccessDenied from Cerberus proxy.") self.__speak( "Your device is not registered yet. To start pairing, " "login at cerberus.mycroft.ai") utterances.append("pair my device") else: logger.debug("STT: " + text) if text.strip() != '': utterances.append(text) return runnable def transcribe(self, audio_segments): utterances = [] threads = [] for audio in audio_segments: if self._audio_length(audio) < self.MIN_AUDIO_SIZE: logger.debug("Audio too short to send to STT") continue target = self._create_remote_stt_runnable(audio, utterances) t = threading.Thread(target=target) t.start() threads.append(t) for thread in threads: thread.join() if len(utterances) > 0: payload = { 'utterances': utterances, 'session': SessionManager.get().session_id } self.emitter.emit("recognizer_loop:utterance", payload) self.metrics.attr('utterances', utterances) else: raise sr.UnknownValueError
class AudioConsumer(Thread): """ AudioConsumer Consumes AudioData chunks off the queue """ # In seconds, the minimum audio size to be sent to remote STT MIN_AUDIO_SIZE = 0.5 def __init__(self, state, queue, emitter, stt, wakeup_recognizer, wakeword_recognizer): super(AudioConsumer, self).__init__() self.daemon = True self.queue = queue self.state = state self.emitter = emitter self.stt = stt self.wakeup_recognizer = wakeup_recognizer self.wakeword_recognizer = wakeword_recognizer self.metrics = MetricsAggregator() def run(self): while self.state.running: self.read() def read(self): try: audio = self.queue.get(timeout=0.5) except Empty: return if audio is None: return if self.state.sleeping: self.wake_up(audio) else: self.process(audio) # TODO: Localization def wake_up(self, audio): if self.wakeup_recognizer.found_wake_word(audio.frame_data): SessionManager.touch() self.state.sleeping = False self.emitter.emit('recognizer_loop:awoken') self.metrics.increment("mycroft.wakeup") @staticmethod def _audio_length(audio): return float(len(audio.frame_data)) / ( audio.sample_rate * audio.sample_width) # TODO: Localization def process(self, audio): SessionManager.touch() payload = { 'utterance': self.wakeword_recognizer.key_phrase, 'session': SessionManager.get().session_id, } self.emitter.emit("recognizer_loop:wakeword", payload) if self._audio_length(audio) < self.MIN_AUDIO_SIZE: LOG.warning("Audio too short to be processed") else: stopwatch = Stopwatch() with stopwatch: transcription = self.transcribe(audio) if transcription: ident = str(stopwatch.timestamp) + str(hash(transcription)) # STT succeeded, send the transcribed speech on for processing payload = { 'utterances': [transcription], 'lang': self.stt.lang, 'session': SessionManager.get().session_id, 'ident': ident } self.emitter.emit("recognizer_loop:utterance", payload) self.metrics.attr('utterances', [transcription]) else: ident = str(stopwatch.timestamp) # Report timing metrics report_timing(ident, 'stt', stopwatch, {'transcription': transcription, 'stt': self.stt.__class__.__name__}) def transcribe(self, audio): try: # Invoke the STT engine on the audio clip text = self.stt.execute(audio).lower().strip() LOG.debug("STT: " + text) return text except sr.RequestError as e: LOG.error("Could not request Speech Recognition {0}".format(e)) except ConnectionError as e: LOG.error("Connection Error: {0}".format(e)) self.emitter.emit("recognizer_loop:no_internet") except HTTPError as e: if e.response.status_code == 401: LOG.warning("Access Denied at mycroft.ai") return "pair my device" # phrase to start the pairing process else: LOG.error(e.__class__.__name__ + ': ' + str(e)) except RequestException as e: LOG.error(e.__class__.__name__ + ': ' + str(e)) except Exception as e: self.emitter.emit('recognizer_loop:speech.recognition.unknown') if isinstance(e, IndexError): LOG.info('no words were transcribed') else: LOG.error(e) LOG.error("Speech Recognition could not understand audio") return None if connected(): dialog_name = 'backend.down' else: dialog_name = 'not connected to the internet' self.emitter.emit('speak', {'utterance': dialog.get(dialog_name)}) def __speak(self, utterance): payload = { 'utterance': utterance, 'session': SessionManager.get().session_id } self.emitter.emit("speak", payload)