class AudioConsumer(threading.Thread): """ AudioConsumer Consumes AudioData chunks off the queue """ # In seconds, the minimum audio size to be sent to remote STT MIN_AUDIO_SIZE = 0.5 def __init__(self, state, queue, emitter, wakeup_recognizer, mycroft_recognizer, remote_recognizer): threading.Thread.__init__(self) self.daemon = True self.queue = queue self.state = state self.emitter = emitter self.wakeup_recognizer = wakeup_recognizer self.mycroft_recognizer = mycroft_recognizer self.remote_recognizer = remote_recognizer self.metrics = MetricsAggregator() def run(self): while self.state.running: self.read_audio() @staticmethod def _audio_length(audio): return float(len( audio.frame_data)) / (audio.sample_rate * audio.sample_width) def read_audio(self): timer = Stopwatch() audio = self.queue.get() self.metrics.timer("mycroft.recognizer.audio.length_s", self._audio_length(audio)) self.queue.task_done() timer.start() if self.state.sleeping: self.process_wake_up(audio) elif self.state.skip_wakeword: self.process_skip_wake_word(audio) else: self.process_wake_word(audio, timer) self.metrics.flush() def process_wake_up(self, audio): if self.wakeup_recognizer.is_recognized(audio.frame_data, self.metrics): SessionManager.touch() self.state.sleeping = False self.__speak("I'm awake.") # TODO: Localization self.metrics.increment("mycroft.wakeup") def process_wake_word(self, audio, timer): hyp = self.mycroft_recognizer.transcribe(audio.frame_data, self.metrics) if self.mycroft_recognizer.contains(hyp): extractor = WordExtractor(audio, self.mycroft_recognizer, self.metrics) timer.lap() extractor.calculate_range() self.metrics.timer("mycroft.recognizer.extractor.time_s", timer.lap()) audio_before = extractor.get_audio_data_before() self.metrics.timer("mycroft.recognizer.audio_extracted.length_s", self._audio_length(audio_before)) audio_after = extractor.get_audio_data_after() self.metrics.timer("mycroft.recognizer.audio_extracted.length_s", self._audio_length(audio_after)) SessionManager.touch() payload = { 'utterance': hyp.hypstr, 'session': SessionManager.get().session_id, 'pos_begin': extractor.begin, 'pos_end': extractor.end } self.emitter.emit("recognizer_loop:wakeword", payload) try: self.transcribe([audio_before, audio_after]) except sr.UnknownValueError: self.__speak("Go ahead") self.state.skip_wakeword = True self.metrics.increment("mycroft.wakeword") def process_skip_wake_word(self, audio): SessionManager.touch() try: self.transcribe([audio]) except sr.UnknownValueError: logger.warn("Speech Recognition could not understand audio") self.__speak("Sorry, I didn't catch that.") self.metrics.increment("mycroft.recognizer.error") self.state.skip_wakeword = False def __speak(self, utterance): payload = { 'utterance': utterance, 'session': SessionManager.get().session_id } self.emitter.emit("speak", Message("speak", metadata=payload)) def _create_remote_stt_runnable(self, audio, utterances): def runnable(): try: text = self.remote_recognizer.transcribe( audio, metrics=self.metrics).lower() except sr.UnknownValueError: pass except sr.RequestError as e: logger.error( "Could not request results from Speech Recognition " "service; {0}".format(e)) except CerberusAccessDenied as e: logger.error("AccessDenied from Cerberus proxy.") self.__speak( "Your device is not registered yet. To start pairing, " "login at cerberus dot mycroft dot A.I") utterances.append("pair my device") except Exception as e: logger.error("Unexpected exception: {0}".format(e)) else: logger.debug("STT: " + text) if text.strip() != '': utterances.append(text) return runnable def transcribe(self, audio_segments): utterances = [] threads = [] for audio in audio_segments: if self._audio_length(audio) < self.MIN_AUDIO_SIZE: logger.debug("Audio too short to send to STT") continue target = self._create_remote_stt_runnable(audio, utterances) t = threading.Thread(target=target) t.start() threads.append(t) for thread in threads: thread.join() if len(utterances) > 0: payload = { 'utterances': utterances, 'session': SessionManager.get().session_id } self.emitter.emit("recognizer_loop:utterance", payload) self.metrics.attr('utterances', utterances) else: raise sr.UnknownValueError
class AudioConsumer(threading.Thread): """ AudioConsumer Consumes AudioData chunks off the queue """ # In seconds, the minimum audio size to be sent to remote STT MIN_AUDIO_SIZE = 1.0 def __init__(self, state, queue, emitter, wakeup_recognizer, wakeword_recognizer, wrapped_remote_recognizer, wakeup_prefixes, wakeup_words): threading.Thread.__init__(self) self.daemon = True self.queue = queue self.state = state self.emitter = emitter self.wakeup_recognizer = wakeup_recognizer self.ww_recognizer = wakeword_recognizer self.wrapped_remote_recognizer = wrapped_remote_recognizer self.wakeup_prefixes = wakeup_prefixes self.wakeup_words = wakeup_words self.metrics = MetricsAggregator() def run(self): while self.state.running: self.try_consume_audio() @staticmethod def _audio_length(audio): return float(len( audio.frame_data)) / (audio.sample_rate * audio.sample_width) def try_consume_audio(self): timer = Stopwatch() hyp = None audio = self.queue.get() self.metrics.timer("mycroft.recognizer.audio.length_s", self._audio_length(audio)) self.queue.task_done() timer.start() if self.state.sleeping: hyp = self.wakeup_recognizer.transcribe(audio.get_wav_data(), metrics=self.metrics) if hyp and hyp.hypstr: logger.debug("sleeping recognition: " + hyp.hypstr) if hyp and hyp.hypstr.lower().find("wake up") >= 0: SessionManager.touch() self.state.sleeping = False self.__speak("I'm awake.") # TODO: Localization self.metrics.increment("mycroft.wakeup") else: if not self.state.skip_wakeword: hyp = self.ww_recognizer.transcribe(audio.get_wav_data(), metrics=self.metrics) if hyp and hyp.hypstr.lower().find("mycroft") >= 0: extractor = WakewordExtractor(audio, self.ww_recognizer, self.metrics) timer.lap() extractor.calculate_range() self.metrics.timer("mycroft.recognizer.extractor.time_s", timer.lap()) audio_before = extractor.get_audio_data_before() self.metrics.timer( "mycroft.recognizer.audio_extracted.length_s", self._audio_length(audio_before)) audio_after = extractor.get_audio_data_after() self.metrics.timer( "mycroft.recognizer.audio_extracted.length_s", self._audio_length(audio_after)) SessionManager.touch() payload = { 'utterance': hyp.hypstr, 'session': SessionManager.get().session_id, 'pos_begin': int(extractor.range.begin), 'pos_end': int(extractor.range.end) } self.emitter.emit("recognizer_loop:wakeword", payload) try: self.transcribe([audio_before, audio_after]) except sr.UnknownValueError: self.__speak("Go ahead") self.state.skip_wakeword = True self.metrics.increment("mycroft.wakeword") elif self.state.skip_wakeword: SessionManager.touch() try: self.transcribe([audio]) except sr.UnknownValueError: logger.warn( "Speech Recognition could not understand audio") self.__speak("Sorry, I didn't catch that.") self.metrics.increment("mycroft.recognizer.error") self.state.skip_wakeword = False else: self.metrics.clear() self.metrics.flush() def __speak(self, utterance): """ Speak commands should be asynchronous to avoid filling up the portaudio buffer. :param utterance: :return: """ def target(): self.emitter.emit( "speak", Message("speak", metadata={ 'utterance': utterance, 'session': SessionManager.get().session_id })) threading.Thread(target=target).start() def _create_remote_stt_runnable(self, audio, utterances): def runnable(): try: text = self.wrapped_remote_recognizer.transcribe( audio, metrics=self.metrics).lower() except sr.UnknownValueError: pass except sr.RequestError as e: logger.error( "Could not request results from Speech Recognition " "service; {0}".format(e)) except CerberusAccessDenied as e: logger.error("AccessDenied from Cerberus proxy.") self.__speak( "Your device is not registered yet. To start pairing, " "login at cerberus.mycroft.ai") utterances.append("pair my device") else: logger.debug("STT: " + text) if text.strip() != '': utterances.append(text) return runnable def transcribe(self, audio_segments): utterances = [] threads = [] for audio in audio_segments: if self._audio_length(audio) < self.MIN_AUDIO_SIZE: logger.debug("Audio too short to send to STT") continue target = self._create_remote_stt_runnable(audio, utterances) t = threading.Thread(target=target) t.start() threads.append(t) for thread in threads: thread.join() if len(utterances) > 0: payload = { 'utterances': utterances, 'session': SessionManager.get().session_id } self.emitter.emit("recognizer_loop:utterance", payload) self.metrics.attr('utterances', utterances) else: raise sr.UnknownValueError
class AudioConsumer(threading.Thread): """ AudioConsumer Consumes AudioData chunks off the queue """ # In seconds, the minimum audio size to be sent to remote STT MIN_AUDIO_SIZE = 0.5 def __init__(self, state, queue, emitter, wakeup_recognizer, mycroft_recognizer, remote_recognizer): threading.Thread.__init__(self) self.daemon = True self.queue = queue self.state = state self.emitter = emitter self.wakeup_recognizer = wakeup_recognizer self.mycroft_recognizer = mycroft_recognizer self.remote_recognizer = remote_recognizer self.metrics = MetricsAggregator() def run(self): while self.state.running: self.read_audio() @staticmethod def _audio_length(audio): return float(len(audio.frame_data)) / ( audio.sample_rate * audio.sample_width) def read_audio(self): timer = Stopwatch() audio = self.queue.get() self.metrics.timer("mycroft.recognizer.audio.length_s", self._audio_length(audio)) self.queue.task_done() timer.start() if self.state.sleeping: self.process_wake_up(audio) elif self.state.skip_wakeword: self.process_skip_wake_word(audio) else: self.process_wake_word(audio, timer) self.metrics.flush() def process_wake_up(self, audio): if self.wakeup_recognizer.is_recognized(audio.frame_data, self.metrics): SessionManager.touch() self.state.sleeping = False self.__speak("I'm awake.") # TODO: Localization self.metrics.increment("mycroft.wakeup") def process_wake_word(self, audio, timer): hyp = self.mycroft_recognizer.transcribe(audio.frame_data, self.metrics) if self.mycroft_recognizer.contains(hyp): extractor = WordExtractor(audio, self.mycroft_recognizer, self.metrics) timer.lap() extractor.calculate_range() self.metrics.timer("mycroft.recognizer.extractor.time_s", timer.lap()) audio_before = extractor.get_audio_data_before() self.metrics.timer("mycroft.recognizer.audio_extracted.length_s", self._audio_length(audio_before)) audio_after = extractor.get_audio_data_after() self.metrics.timer("mycroft.recognizer.audio_extracted.length_s", self._audio_length(audio_after)) SessionManager.touch() payload = { 'utterance': hyp.hypstr, 'session': SessionManager.get().session_id, 'pos_begin': extractor.begin, 'pos_end': extractor.end } self.emitter.emit("recognizer_loop:wakeword", payload) try: self.transcribe([audio_before, audio_after]) except sr.UnknownValueError: self.__speak("Go ahead") self.state.skip_wakeword = True self.metrics.increment("mycroft.wakeword") def process_skip_wake_word(self, audio): SessionManager.touch() try: self.transcribe([audio]) except sr.UnknownValueError: logger.warn("Speech Recognition could not understand audio") self.__speak("Sorry, I didn't catch that.") self.metrics.increment("mycroft.recognizer.error") self.state.skip_wakeword = False def __speak(self, utterance): payload = { 'utterance': utterance, 'session': SessionManager.get().session_id } self.emitter.emit("speak", Message("speak", metadata=payload)) def _create_remote_stt_runnable(self, audio, utterances): def runnable(): try: text = self.remote_recognizer.transcribe( audio, metrics=self.metrics).lower() except sr.UnknownValueError: pass except sr.RequestError as e: logger.error( "Could not request results from Speech Recognition " "service; {0}".format(e)) except CerberusAccessDenied as e: logger.error("AccessDenied from Cerberus proxy.") self.__speak( "Your device is not registered yet. To start pairing, " "login at cerberus dot mycroft dot A.I") utterances.append("pair my device") except Exception as e: logger.error("Unexpected exception: {0}".format(e)) else: logger.debug("STT: " + text) if text.strip() != '': utterances.append(text) return runnable def transcribe(self, audio_segments): utterances = [] threads = [] for audio in audio_segments: if self._audio_length(audio) < self.MIN_AUDIO_SIZE: logger.debug("Audio too short to send to STT") continue target = self._create_remote_stt_runnable(audio, utterances) t = threading.Thread(target=target) t.start() threads.append(t) for thread in threads: thread.join() if len(utterances) > 0: payload = { 'utterances': utterances, 'session': SessionManager.get().session_id } self.emitter.emit("recognizer_loop:utterance", payload) self.metrics.attr('utterances', utterances) else: raise sr.UnknownValueError
class AudioConsumer(threading.Thread): """ AudioConsumer Consumes AudioData chunks off the queue """ # In seconds, the minimum audio size to be sent to remote STT MIN_AUDIO_SIZE = 1.0 def __init__( self, state, queue, emitter, wakeup_recognizer, wakeword_recognizer, wrapped_remote_recognizer, wakeup_prefixes, wakeup_words): threading.Thread.__init__(self) self.daemon = True self.queue = queue self.state = state self.emitter = emitter self.wakeup_recognizer = wakeup_recognizer self.ww_recognizer = wakeword_recognizer self.wrapped_remote_recognizer = wrapped_remote_recognizer self.wakeup_prefixes = wakeup_prefixes self.wakeup_words = wakeup_words self.metrics = MetricsAggregator() def run(self): while self.state.running: self.try_consume_audio() @staticmethod def _audio_length(audio): return float( len(audio.frame_data))/(audio.sample_rate*audio.sample_width) def try_consume_audio(self): timer = Stopwatch() hyp = None audio = self.queue.get() self.metrics.timer( "mycroft.recognizer.audio.length_s", self._audio_length(audio)) self.queue.task_done() timer.start() if self.state.sleeping: hyp = self.wakeup_recognizer.transcribe( audio.get_wav_data(), metrics=self.metrics) if hyp and hyp.hypstr: logger.debug("sleeping recognition: " + hyp.hypstr) if hyp and hyp.hypstr.lower().find("wake up") >= 0: SessionManager.touch() self.state.sleeping = False self.__speak("I'm awake.") # TODO: Localization self.metrics.increment("mycroft.wakeup") else: if not self.state.skip_wakeword: hyp = self.ww_recognizer.transcribe( audio.get_wav_data(), metrics=self.metrics) if hyp and hyp.hypstr.lower().find("mycroft") >= 0: extractor = WakewordExtractor( audio, self.ww_recognizer, self.metrics) timer.lap() extractor.calculate_range() self.metrics.timer( "mycroft.recognizer.extractor.time_s", timer.lap()) audio_before = extractor.get_audio_data_before() self.metrics.timer( "mycroft.recognizer.audio_extracted.length_s", self._audio_length(audio_before)) audio_after = extractor.get_audio_data_after() self.metrics.timer( "mycroft.recognizer.audio_extracted.length_s", self._audio_length(audio_after)) SessionManager.touch() payload = { 'utterance': hyp.hypstr, 'session': SessionManager.get().session_id, 'pos_begin': int(extractor.range.begin), 'pos_end': int(extractor.range.end) } self.emitter.emit("recognizer_loop:wakeword", payload) try: self.transcribe([audio_before, audio_after]) except sr.UnknownValueError: self.__speak("Go ahead") self.state.skip_wakeword = True self.metrics.increment("mycroft.wakeword") elif self.state.skip_wakeword: SessionManager.touch() try: self.transcribe([audio]) except sr.UnknownValueError: logger.warn( "Speech Recognition could not understand audio") self.__speak("Sorry, I didn't catch that.") self.metrics.increment("mycroft.recognizer.error") self.state.skip_wakeword = False else: self.metrics.clear() self.metrics.flush() def __speak(self, utterance): """ Speak commands should be asynchronous to avoid filling up the portaudio buffer. :param utterance: :return: """ def target(): self.emitter.emit( "speak", Message("speak", metadata={'utterance': utterance, 'session': SessionManager.get().session_id})) threading.Thread(target=target).start() def _create_remote_stt_runnable(self, audio, utterances): def runnable(): try: text = self.wrapped_remote_recognizer.transcribe( audio, metrics=self.metrics).lower() except sr.UnknownValueError: pass except sr.RequestError as e: logger.error( "Could not request results from Speech Recognition " "service; {0}".format(e)) except CerberusAccessDenied as e: logger.error("AccessDenied from Cerberus proxy.") self.__speak( "Your device is not registered yet. To start pairing, " "login at cerberus.mycroft.ai") utterances.append("pair my device") else: logger.debug("STT: " + text) if text.strip() != '': utterances.append(text) return runnable def transcribe(self, audio_segments): utterances = [] threads = [] for audio in audio_segments: if self._audio_length(audio) < self.MIN_AUDIO_SIZE: logger.debug("Audio too short to send to STT") continue target = self._create_remote_stt_runnable(audio, utterances) t = threading.Thread(target=target) t.start() threads.append(t) for thread in threads: thread.join() if len(utterances) > 0: payload = { 'utterances': utterances, 'session': SessionManager.get().session_id } self.emitter.emit("recognizer_loop:utterance", payload) self.metrics.attr('utterances', utterances) else: raise sr.UnknownValueError