def transcribe(self, wav_data, metrics=None): timer = Stopwatch() timer.start() self.local_recognizer.start_utt() self.local_recognizer.process_raw(wav_data, False, False) self.local_recognizer.end_utt() if metrics: metrics.timer("mycroft.stt.local.time_s", timer.stop()) return self.local_recognizer.hyp()
def transcribe(self, audio, language="en-US", show_all=False, metrics=None): timer = Stopwatch() timer.start() identity = IdentityManager().get() headers = {} if identity.token: headers['Authorization'] = 'Bearer %s:%s' % (identity.device_id, identity.token) response = requests.post(config.get("proxy_host") + "/stt/google_v2?language=%s&version=%s" % (language, self.version), audio.get_flac_data(), headers=headers) if metrics: t = timer.stop() metrics.timer("mycroft.cerberus.proxy.client.time_s", t) metrics.timer("mycroft.stt.remote.time_s", t) if response.status_code == 401: raise CerberusAccessDenied() try: actual_result = response.json() except: raise UnknownValueError() log.info("STT JSON: " + json.dumps(actual_result)) if show_all: return actual_result # return the best guess if "alternative" not in actual_result: raise UnknownValueError() alternatives = actual_result["alternative"] if len([alt for alt in alternatives if alt.get('confidence')]) > 0: # if there is at least one element with confidence, force it to # the front alternatives.sort(key=lambda e: e.get('confidence', 0.0), reverse=True) for entry in alternatives: if "transcript" in entry: return entry["transcript"] if len(alternatives) > 0: log.error("Found %d entries, but none with a transcript." % len(alternatives)) # no transcriptions available raise UnknownValueError()
def handle_speak(event): """ Handle "speak" message """ config = Configuration.get() Configuration.init(bus) global _last_stop_signal # Get conversation ID if event.context and 'ident' in event.context: ident = event.context['ident'] else: ident = 'unknown' start = time.time() # Time of speech request with lock: stopwatch = Stopwatch() stopwatch.start() utterance = event.data['utterance'] if event.data.get('expect_response', False): # When expect_response is requested, the listener will be restarted # at the end of the next bit of spoken audio. bus.once('recognizer_loop:audio_output_end', _start_listener) # This is a bit of a hack for Picroft. The analog audio on a Pi blocks # for 30 seconds fairly often, so we don't want to break on periods # (decreasing the chance of encountering the block). But we will # keep the split for non-Picroft installs since it give user feedback # faster on longer phrases. # # TODO: Remove or make an option? This is really a hack, anyway, # so we likely will want to get rid of this when not running on Mimic if (config.get('enclosure', {}).get('platform') != "picroft" and len(re.findall('<[^>]*>', utterance)) == 0): chunks = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\;|\?)\s', utterance) for chunk in chunks: # Check if somthing has aborted the speech if (_last_stop_signal > start or check_for_signal('buttonPress')): # Clear any newly queued speech tts.playback.clear() break try: mute_and_speak(chunk, ident) except KeyboardInterrupt: raise except Exception: LOG.error('Error in mute_and_speak', exc_info=True) else: mute_and_speak(utterance, ident) stopwatch.stop() report_timing(ident, 'speech', stopwatch, {'utterance': utterance, 'tts': tts.__class__.__name__})
def transcribe( self, audio, language="en-US", show_all=False, metrics=None): timer = Stopwatch() timer.start() identity = IdentityManager().get() headers = {} if identity.token: headers['Authorization'] = 'Bearer %s:%s' % ( identity.device_id, identity.token) response = requests.post(config.get("proxy_host") + "/stt/google_v2?language=%s&version=%s" % (language, self.version), audio.get_flac_data(), headers=headers) if metrics: t = timer.stop() metrics.timer("mycroft.cerberus.proxy.client.time_s", t) metrics.timer("mycroft.stt.remote.time_s", t) if response.status_code == 401: raise CerberusAccessDenied() try: actual_result = response.json() except: raise UnknownValueError() log.info("STT JSON: " + json.dumps(actual_result)) if show_all: return actual_result # return the best guess if "alternative" not in actual_result: raise UnknownValueError() alternatives = actual_result["alternative"] if len([alt for alt in alternatives if alt.get('confidence')]) > 0: # if there is at least one element with confidence, force it to # the front alternatives.sort( key=lambda e: e.get('confidence', 0.0), reverse=True) for entry in alternatives: if "transcript" in entry: return entry["transcript"] if len(alternatives) > 0: log.error( "Found %d entries, but none with a transcript." % len( alternatives)) # no transcriptions available raise UnknownValueError()
def process(self, audio): SessionManager.touch() payload = { 'utterance': self.wakeword_recognizer.key_phrase, 'session': SessionManager.get().session_id, } self.emitter.emit("recognizer_loop:wakeword", payload) if self._audio_length(audio) < self.MIN_AUDIO_SIZE: LOG.warning("Audio too short to be processed") else: stopwatch = Stopwatch() with stopwatch: transcription = self.transcribe(audio) if transcription: ident = str(stopwatch.timestamp) + str(hash(transcription)) # STT succeeded, send the transcribed speech on for processing payload = { 'utterances': [transcription], 'lang': self.stt.lang, 'session': SessionManager.get().session_id, 'ident': ident } self.emitter.emit("recognizer_loop:utterance", payload) self.metrics.attr('utterances', [transcription]) else: ident = str(stopwatch.timestamp) # Report timing metrics report_timing(ident, 'stt', stopwatch, { 'transcription': transcription, 'stt': self.stt.__class__.__name__ })
def wrapper(message): stopwatch = Stopwatch() try: # TODO: Fix for real in mycroft-messagebus-client # Makes sure the message type is consistent with the type declared # in mycroft.messagebus and isinstance will work. message = Message(message.msg_type, data=message.data, context=message.context) message = unmunge_message(message, skill_id) if on_start: on_start(message) with stopwatch: if len(signature(handler).parameters) == 0: handler() else: handler(message) except Exception as e: if on_error: on_error(e) finally: if on_end: on_end(message) # Send timing metrics context = message.context if context and 'ident' in context: report_timing(context['ident'], 'skill_handler', stopwatch, { 'handler': handler.__name__, 'skill_id': skill_id })
def read_audio(self): timer = Stopwatch() audio = self.queue.get() self.metrics.timer("mycroft.recognizer.audio.length_s", self._audio_length(audio)) self.queue.task_done() timer.start() if self.state.sleeping: self.process_wake_up(audio) elif self.state.skip_wakeword: self.process_skip_wake_word(audio) else: self.process_wake_word(audio, timer) self.metrics.flush()
def run(self): """Thread main loop. get audio and viseme data from queue and play.""" while not self._terminated: try: snd_type, data, visemes, ident = self.queue.get(timeout=2) self.blink(0.5) if not self._processing_queue: self._processing_queue = True self.tts.begin_audio() stopwatch = Stopwatch() with stopwatch: if snd_type == 'wav': self.p = play_wav(data, environment=self.pulse_env) elif snd_type == 'mp3': self.p = play_mp3(data, environment=self.pulse_env) if visemes: self.show_visemes(visemes) self.p.communicate() self.p.wait() send_playback_metric(stopwatch, ident) if self.queue.empty(): self.tts.end_audio() self._processing_queue = False self.blink(0.2) except Empty: pass except Exception as e: LOG.exception(e) if self._processing_queue: self.tts.end_audio() self._processing_queue = False
def wrapper(message): stopwatch = Stopwatch() try: message = unmunge_message(message, skill_id) if on_start: on_start(message) with stopwatch: if len(signature(handler).parameters) == 0: handler() else: handler(message) except Exception as e: if on_error: on_error(e) finally: if on_end: on_end(message) # Send timing metrics context = message.context if context and 'ident' in context: report_timing(context['ident'], 'skill_handler', stopwatch, {'handler': handler.__name__, 'skill_id': skill_id})
def handler(message): # indicate fallback handling start ws.emit(Message("mycroft.skill.handler.start", data={'handler': "fallback"})) stopwatch = Stopwatch() handler_name = None with stopwatch: for _, handler in sorted(cls.fallback_handlers.items(), key=operator.itemgetter(0)): try: if handler(message): # indicate completion handler_name = get_handler_name(handler) ws.emit(Message( 'mycroft.skill.handler.complete', data={'handler': "fallback", "fallback_handler": handler_name})) break except Exception: LOG.exception('Exception in fallback.') else: # No fallback could handle the utterance ws.emit(Message('complete_intent_failure')) warning = "No fallback could handle intent." LOG.warning(warning) # indicate completion with exception ws.emit(Message('mycroft.skill.handler.complete', data={'handler': "fallback", 'exception': warning})) # Send timing metric if message.context and message.context['ident']: ident = message.context['ident'] report_timing(ident, 'fallback_handler', stopwatch, {'handler': handler_name})
def handler(message): start, stop = message.data.get('fallback_range', (0, 101)) # indicate fallback handling start LOG.debug('Checking fallbacks in range ' '{} - {}'.format(start, stop)) bus.emit( message.forward("mycroft.skill.handler.start", data={'handler': "fallback"})) stopwatch = Stopwatch() handler_name = None with stopwatch: sorted_handlers = sorted(cls.fallback_handlers.items(), key=operator.itemgetter(0)) handlers = [ f[1] for f in sorted_handlers if start <= f[0] < stop ] for handler in handlers: try: if handler(message): # indicate completion status = True handler_name = get_handler_name(handler) bus.emit( message.forward( 'mycroft.skill.handler.complete', data={ 'handler': "fallback", "fallback_handler": handler_name })) break except Exception: LOG.exception('Exception in fallback.') else: status = False # indicate completion with exception warning = 'No fallback could handle intent.' bus.emit( message.forward('mycroft.skill.handler.complete', data={ 'handler': "fallback", 'exception': warning })) if 'fallback_range' not in message.data: # Old system TODO: Remove in 20.08 # No fallback could handle the utterance bus.emit(message.forward('complete_intent_failure')) LOG.warning(warning) # return if the utterance was handled to the caller bus.emit(message.response(data={'handled': status})) # Send timing metric if message.context.get('ident'): ident = message.context['ident'] report_timing(ident, 'fallback_handler', stopwatch, {'handler': handler_name})
def handle_utterance(self, message): """ Main entrypoint for handling user utterances with Mycroft skills Monitor the messagebus for 'recognizer_loop:utterance', typically generated by a spoken interaction but potentially also from a CLI or other method of injecting a 'user utterance' into the system. Utterances then work through this sequence to be handled: 1) Active skills attempt to handle using converse() 2) Adapt intent handlers 3) Padatious intent handlers 4) Other fallbacks Args: message (Message): The messagebus data """ try: # Get language of the utterance lang = message.data.get('lang', "en-us") set_active_lang(lang) utterances = message.data.get('utterances', '') stopwatch = Stopwatch() with stopwatch: # Give active skills an opportunity to handle the utterance converse = self._converse(utterances, lang) if not converse: # No conversation, use intent system to handle utterance intent = self._adapt_intent_match(utterances, lang) padatious_intent = PadatiousService.instance.calc_intent( utterances[0]) if converse: # Report that converse handled the intent and return ident = message.context['ident'] if message.context else None report_timing(ident, 'intent_service', stopwatch, {'intent_type': 'converse'}) return elif intent and not (padatious_intent and padatious_intent.conf >= 0.95): # Send the message to the Adapt intent's handler unless # Padatious is REALLY sure it was directed at it instead. reply = message.reply(intent.get('intent_type'), intent) else: # Allow fallback system to handle utterance # NOTE: Padatious intents are handled this way, too reply = message.reply('intent_failure', { 'utterance': utterances[0], 'lang': lang }) self.bus.emit(reply) self.send_metrics(intent, message.context, stopwatch) except Exception as e: LOG.exception(e)
def handle_speak(event): """Handle "speak" message Parse sentences and invoke text to speech service. """ config = Configuration.get() Configuration.set_config_update_handlers(bus) global _last_stop_signal # Get conversation ID if event.context and 'ident' in event.context: ident = event.context['ident'] else: ident = 'unknown' start = time.time() # Time of speech request with lock: stopwatch = Stopwatch() stopwatch.start() utterance = event.data['utterance'] listen = event.data.get('expect_response', False) # This is a bit of a hack for Picroft. The analog audio on a Pi blocks # for 30 seconds fairly often, so we don't want to break on periods # (decreasing the chance of encountering the block). But we will # keep the split for non-Picroft installs since it give user feedback # faster on longer phrases. # # TODO: Remove or make an option? This is really a hack, anyway, # so we likely will want to get rid of this when not running on Mimic if (config.get('enclosure', {}).get('platform') != "picroft" and len(re.findall('<[^>]*>', utterance)) == 0): # Remove any whitespace present after the period, # if a character (only alpha) ends with a period # ex: A. Lincoln -> A.Lincoln # so that we don't split at the period utterance = re.sub(r'\b([A-za-z][\.])(\s+)', r'\g<1>', utterance) chunks = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\;|\?)\s', utterance) # Apply the listen flag to the last chunk, set the rest to False chunks = [(chunks[i], listen if i == len(chunks) - 1 else False) for i in range(len(chunks))] for chunk, listen in chunks: # Check if somthing has aborted the speech if (_last_stop_signal > start or check_for_signal('buttonPress')): # Clear any newly queued speech tts.playback.clear() break try: mute_and_speak(chunk, ident, listen) except KeyboardInterrupt: raise except Exception: LOG.error('Error in mute_and_speak', exc_info=True) else: mute_and_speak(utterance, ident) stopwatch.stop() report_timing(ident, 'speech', stopwatch, {'utterance': utterance, 'tts': tts.__class__.__name__})
def try_consume_audio(self): timer = Stopwatch() hyp = None audio = self.queue.get() self.metrics.timer("mycroft.recognizer.audio.length_s", self._audio_length(audio)) self.queue.task_done() timer.start() if self.state.sleeping: hyp = self.wakeup_recognizer.transcribe(audio.get_wav_data(), metrics=self.metrics) if hyp and hyp.hypstr: logger.debug("sleeping recognition: " + hyp.hypstr) if hyp and hyp.hypstr.lower().find("wake up") >= 0: SessionManager.touch() self.state.sleeping = False self.__speak("I'm awake.") # TODO: Localization self.metrics.increment("mycroft.wakeup") else: if not self.state.skip_wakeword: hyp = self.ww_recognizer.transcribe(audio.get_wav_data(), metrics=self.metrics) if hyp and hyp.hypstr.lower().find("mycroft") >= 0: extractor = WakewordExtractor(audio, self.ww_recognizer, self.metrics) timer.lap() extractor.calculate_range() self.metrics.timer("mycroft.recognizer.extractor.time_s", timer.lap()) audio_before = extractor.get_audio_data_before() self.metrics.timer("mycroft.recognizer.audio_extracted.length_s", self._audio_length(audio_before)) audio_after = extractor.get_audio_data_after() self.metrics.timer("mycroft.recognizer.audio_extracted.length_s", self._audio_length(audio_after)) SessionManager.touch() payload = { 'utterance': hyp.hypstr, 'session': SessionManager.get().session_id, 'pos_begin': int(extractor.range.begin), 'pos_end': int(extractor.range.end) } self.emitter.emit("recognizer_loop:wakeword", payload) try: self.transcribe([audio_before, audio_after]) except sr.UnknownValueError: self.__speak("Go ahead") self.state.skip_wakeword = True self.metrics.increment("mycroft.wakeword") elif self.state.skip_wakeword: SessionManager.touch() try: self.transcribe([audio]) except sr.UnknownValueError: logger.warn("Speech Recognition could not understand audio") self.__speak("Sorry, I didn't catch that.") self.metrics.increment("mycroft.recognizer.error") self.state.skip_wakeword = False else: self.metrics.clear() self.metrics.flush()
def handle_speak(event): """ Handle "speak" message """ config = Configuration.get() Configuration.init(bus) global _last_stop_signal # Get conversation ID if event.context and 'ident' in event.context: ident = event.context['ident'] else: ident = 'unknown' start = time.time() # Time of speech request with lock: stopwatch = Stopwatch() stopwatch.start() utterance = event.data['utterance'] if event.data.get('expect_response', False): # When expect_response is requested, the listener will be restarted # at the end of the next bit of spoken audio. bus.once('recognizer_loop:audio_output_end', _start_listener) # This is a bit of a hack for Picroft. The analog audio on a Pi blocks # for 30 seconds fairly often, so we don't want to break on periods # (decreasing the chance of encountering the block). But we will # keep the split for non-Picroft installs since it give user feedback # faster on longer phrases. # # TODO: Remove or make an option? This is really a hack, anyway, # so we likely will want to get rid of this when not running on Mimic if (config.get('enclosure', {}).get('platform') != "picroft" and len(re.findall('<[^>]*>', utterance)) == 0): chunks = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\;|\?)\s', utterance) for chunk in chunks: # Check if somthing has aborted the speech if (_last_stop_signal > start or check_for_signal('buttonPress')): # Clear any newly queued speech tts.playback.clear() break try: mute_and_speak(chunk, ident) except KeyboardInterrupt: raise except Exception: LOG.error('Error in mute_and_speak', exc_info=True) else: mute_and_speak(utterance, ident) stopwatch.stop() report_timing(ident, 'speech', stopwatch, { 'utterance': utterance, 'tts': tts.__class__.__name__ })
def run(self): """Thread main loop. Get audio and extra data from queue and play. The queue messages is a tuple containing snd_type: 'mp3' or 'wav' telling the loop what format the data is in data: path to temporary audio data videmes: list of visemes to display while playing listen: if listening should be triggered at the end of the sentence. Playback of audio is started and the visemes are sent over the bus the loop then wait for the playback process to finish before starting checking the next position in queue. If the queue is empty the tts.end_audio() is called possibly triggering listening. """ while not self._terminated: try: (snd_type, data, visemes, ident, listen) = self.queue.get(timeout=2) self.blink(0.5) if not self._processing_queue: self._processing_queue = True self.tts.begin_audio() stopwatch = Stopwatch() with stopwatch: if snd_type == 'wav': self.p = play_wav(data, environment=self.pulse_env) elif snd_type == 'mp3': self.p = play_mp3(data, environment=self.pulse_env) if visemes: self.show_visemes(visemes) if self.p: self.p.communicate() self.p.wait() report_timing(ident, 'speech_playback', stopwatch) if self.queue.empty(): self.tts.end_audio(listen) self._processing_queue = False self.blink(0.2) except Empty: pass except Exception as e: LOG.exception(e) if self._processing_queue: self.tts.end_audio(listen) self._processing_queue = False
def get_tts(self, sentence, wav_file, speaker=None): stopwatch = Stopwatch() to_speak = format_speak_tags(sentence).lstrip("<speak>").rstrip( "</speak>") # TODO: Update utils to handle DM LOG.debug(to_speak) if to_speak: url = self._build_url(to_speak) with stopwatch: wav_data = urlopen(url).read() LOG.debug(f"Request time={stopwatch.time}") with stopwatch: with open(wav_file, "wb") as f: f.write(wav_data) LOG.debug(f"File access time={stopwatch.time}") return wav_file, None
def get_tts(self, sentence, wav_file, speaker=None): stopwatch = Stopwatch() speaker = speaker or dict() # Read utterance data from passed configuration request_lang = speaker.get("language", self.lang) to_speak = format_speak_tags(sentence) LOG.debug(to_speak) if to_speak: synthesizer = self._get_synthesizer(request_lang) with stopwatch: wav_data = synthesizer.tts(sentence) LOG.debug(f"Synthesis time={stopwatch.time}") with stopwatch: synthesizer.save_wav(wav_data, wav_file) LOG.debug(f"File access time={stopwatch.time}") return wav_file, None
def process(self, audio): path = pathlib.Path().absolute() settings_file = open(str(path)+'/mycroft/client/speech/set_config.txt', 'w') if self._audio_length(audio) >= self.MIN_AUDIO_SIZE: stopwatch = Stopwatch() with stopwatch: transcription = self.transcribe(audio) settings = {"rate": " '1.0' ", "volume": " '80%' "} #speed if "quickly" in transcription: settings["rate"] = " '1.6' " if "slowly" in transcription: settings["rate"] = " '.6' " #volume if "loudly" in transcription: settings["volume"] = " '100%' " if "softly" in transcription: settings["volume"] = " '50%' " settings_file.write(str(settings)) settings_file.close() if transcription: ident = str(stopwatch.timestamp) + str(hash(transcription)) # STT succeeded, send the transcribed speech on for processing payload = { 'utterances': [transcription], 'lang': self.stt.lang, 'session': SessionManager.get().session_id, 'ident': ident } self.emitter.emit("recognizer_loop:utterance", payload) self.metrics.attr('utterances', [transcription]) # Report timing metrics report_timing(ident, 'stt', stopwatch, {'transcription': transcription, 'stt': self.stt.__class__.__name__}) else: ident = str(stopwatch.timestamp) else: LOG.warning("Audio too short to be processed")
def wrapper(message): skill_data = {'name': get_handler_name(handler)} stopwatch = Stopwatch() try: message = unmunge_message(message, self.skill_id) # Indicate that the skill handler is starting if handler_info: # Indicate that the skill handler is starting if requested msg_type = handler_info + '.start' self.emitter.emit(Message(msg_type, skill_data)) with stopwatch: is_bound = bool(getattr(handler, 'im_self', None)) num_args = len(getargspec(handler).args) - is_bound if num_args == 0: handler() else: handler(message) self.settings.store() # Store settings if they've changed except Exception as e: # Convert "MyFancySkill" to "My Fancy Skill" for speaking handler_name = re.sub(r"([a-z])([A-Z])", r"\1 \2", self.name) msg_data = {'skill': handler_name} msg = dialog.get('skill.error', self.lang, msg_data) self.speak(msg) LOG.exception(msg) # append exception information in message skill_data['exception'] = e.message finally: if once: self.remove_event(name) # Indicate that the skill handler has completed if handler_info: msg_type = handler_info + '.complete' self.emitter.emit(Message(msg_type, skill_data)) # Send timing metrics context = message.context if context and 'ident' in context: report_timing(context['ident'], 'skill_handler', stopwatch, {'handler': handler.__name__})
def wrapper(message): skill_data = {'name': get_handler_name(handler)} stopwatch = Stopwatch() try: message = unmunge_message(message, self.skill_id) # Indicate that the skill handler is starting if handler_info: # Indicate that the skill handler is starting if requested msg_type = handler_info + '.start' self.bus.emit(message.reply(msg_type, skill_data)) if once: # Remove registered one-time handler before invoking, # allowing them to re-schedule themselves. self.remove_event(name) with stopwatch: if len(signature(handler).parameters) == 0: handler() else: handler(message) self.settings.store() # Store settings if they've changed except Exception as e: # Convert "MyFancySkill" to "My Fancy Skill" for speaking handler_name = camel_case_split(self.name) msg_data = {'skill': handler_name} msg = dialog.get('skill.error', self.lang, msg_data) self.speak(msg) LOG.exception(msg) # append exception information in message skill_data['exception'] = repr(e) finally: # Indicate that the skill handler has completed if handler_info: msg_type = handler_info + '.complete' self.bus.emit(message.reply(msg_type, skill_data)) # Send timing metrics context = message.context if context and 'ident' in context: report_timing(context['ident'], 'skill_handler', stopwatch, {'handler': handler.__name__})
def _get_synthesizer(self, language) -> Synthesizer: if '-' in language: language = language.split('-')[0] stopwatch = Stopwatch() with stopwatch: model_name = None for model in self.models: _, lang, dataset, name = model.split('/') print(f"{lang}|{name}") if language in lang: model_name = model if name == self.preferred_model: break model_path, config_path, model_item = self.manager.download_model( model_name) vocoder_name = model_item.get( "default_vocoder", "vocoder_models/universal/libri-tts/fullband-melgan") vocoder_path, vocoder_config_path, _ = self.manager.download_model( vocoder_name) speakers_file_path = '' encoder_path = '' encoder_config_path = '' use_cuda = False synthesizer = Synthesizer( model_path, config_path, speakers_file_path, vocoder_path, vocoder_config_path, encoder_path, encoder_config_path, use_cuda, ) LOG.debug(f"Get synthesizer time={stopwatch.time}") return synthesizer
def handle_utterance(self, message): """Main entrypoint for handling user utterances with Mycroft skills Monitor the messagebus for 'recognizer_loop:utterance', typically generated by a spoken interaction but potentially also from a CLI or other method of injecting a 'user utterance' into the system. Utterances then work through this sequence to be handled: 1) Active skills attempt to handle using converse() 2) Padatious high match intents (conf > 0.95) 3) Adapt intent handlers 5) High Priority Fallbacks 6) Padatious near match intents (conf > 0.8) 7) General Fallbacks 8) Padatious loose match intents (conf > 0.5) 9) Catch all fallbacks including Unknown intent handler If all these fail the complete_intent_failure message will be sent and a generic info of the failure will be spoken. Args: message (Message): The messagebus data """ try: lang = _get_message_lang(message) set_default_lf_lang(lang) utterances = message.data.get('utterances', []) combined = _normalize_all_utterances(utterances) stopwatch = Stopwatch() # Create matchers padatious_matcher = PadatiousMatcher(self.padatious_service) # List of functions to use to match the utterance with intent. # These are listed in priority order. match_funcs = [ self._converse, padatious_matcher.match_high, self.adapt_service.match_intent, self.fallback.high_prio, padatious_matcher.match_medium, self.fallback.medium_prio, padatious_matcher.match_low, self.fallback.low_prio ] match = None with stopwatch: # Loop through the matching functions until a match is found. for match_func in match_funcs: match = match_func(combined, lang, message) if match: break if match: if match.skill_id: self.add_active_skill(match.skill_id) # If the service didn't report back the skill_id it # takes on the responsibility of making the skill "active" # Launch skill if not handled by the match function if match.intent_type: reply = message.reply(match.intent_type, match.intent_data) # Add back original list of utterances for intent handlers # match.intent_data only includes the utterance with the # highest confidence. reply.data["utterances"] = utterances self.bus.emit(reply) else: # Nothing was able to handle the intent # Ask politely for forgiveness for failing in this vital task self.send_complete_intent_failure(message) self.send_metrics(match, message.context, stopwatch) except Exception as err: LOG.exception(err)
def wrapper(message): data = {'name': get_handler_name(handler)} try: # Indicate that the skill handler is starting if handler_info: # Indicate that the skill handler is starting if requested msg_type = handler_info + '.start' self.emitter.emit(Message(msg_type, data)) stopwatch = Stopwatch() with stopwatch: if need_self: # When registring from decorator self is required if len(getargspec(handler).args) == 2: handler(self, unmunge_message(message, self.skill_id)) elif len(getargspec(handler).args) == 1: handler(self) elif len(getargspec(handler).args) == 0: # Zero may indicate multiple decorators, trying the # usual call signatures try: handler(self, unmunge_message(message, self.skill_id)) except TypeError: handler(self) else: LOG.error("Unexpected argument count:" + str(len(getargspec(handler).args))) raise TypeError else: if len(getargspec(handler).args) == 2: handler(unmunge_message(message, self.skill_id)) elif len(getargspec(handler).args) == 1: handler() else: LOG.error("Unexpected argument count:" + str(len(getargspec(handler).args))) raise TypeError self.settings.store() # Store settings if they've changed except Exception as e: # Convert "MyFancySkill" to "My Fancy Skill" for speaking handler_name = re.sub("([a-z])([A-Z])", "\g<1> \g<2>", self.name) # TODO: Localize self.speak("An error occurred while processing a request in " + handler_name) LOG.error("An error occurred while processing a request in " + self.name, exc_info=True) # append exception information in message data['exception'] = e.message finally: if once: self.remove_event(name) # Indicate that the skill handler has completed if handler_info: msg_type = handler_info + '.complete' self.emitter.emit(Message(msg_type, data)) # Send timing metrics context = message.context if context and 'ident' in context: report_timing(context['ident'], 'skill_handler', stopwatch, {'handler': handler.__name__})
def try_consume_audio(self): timer = Stopwatch() hyp = None audio = self.queue.get() self.metrics.timer("mycroft.recognizer.audio.length_s", self._audio_length(audio)) self.queue.task_done() timer.start() if self.state.sleeping: hyp = self.wakeup_recognizer.transcribe(audio.get_wav_data(), metrics=self.metrics) if hyp and hyp.hypstr: logger.debug("sleeping recognition: " + hyp.hypstr) if hyp and hyp.hypstr.lower().find("wake up") >= 0: SessionManager.touch() self.state.sleeping = False self.__speak("I'm awake.") # TODO: Localization self.metrics.increment("mycroft.wakeup") else: if not self.state.skip_wakeword: hyp = self.ww_recognizer.transcribe(audio.get_wav_data(), metrics=self.metrics) if hyp and hyp.hypstr.lower().find("mycroft") >= 0: extractor = WakewordExtractor(audio, self.ww_recognizer, self.metrics) timer.lap() extractor.calculate_range() self.metrics.timer("mycroft.recognizer.extractor.time_s", timer.lap()) audio_before = extractor.get_audio_data_before() self.metrics.timer( "mycroft.recognizer.audio_extracted.length_s", self._audio_length(audio_before)) audio_after = extractor.get_audio_data_after() self.metrics.timer( "mycroft.recognizer.audio_extracted.length_s", self._audio_length(audio_after)) SessionManager.touch() payload = { 'utterance': hyp.hypstr, 'session': SessionManager.get().session_id, 'pos_begin': int(extractor.range.begin), 'pos_end': int(extractor.range.end) } self.emitter.emit("recognizer_loop:wakeword", payload) try: self.transcribe([audio_before, audio_after]) except sr.UnknownValueError: self.__speak("Go ahead") self.state.skip_wakeword = True self.metrics.increment("mycroft.wakeword") elif self.state.skip_wakeword: SessionManager.touch() try: self.transcribe([audio]) except sr.UnknownValueError: logger.warn( "Speech Recognition could not understand audio") self.__speak("Sorry, I didn't catch that.") self.metrics.increment("mycroft.recognizer.error") self.state.skip_wakeword = False else: self.metrics.clear() self.metrics.flush()
def handle_utterance(self, message): """ Main entrypoint for handling user utterances with Mycroft skills Monitor the messagebus for 'recognizer_loop:utterance', typically generated by a spoken interaction but potentially also from a CLI or other method of injecting a 'user utterance' into the system. Utterances then work through this sequence to be handled: 1) Active skills attempt to handle using converse() 2) Padatious high match intents (conf > 0.95) 3) Adapt intent handlers 5) Fallbacks: - Padatious near match intents (conf > 0.8) - General fallbacks - Padatious loose match intents (conf > 0.5) - Unknown intent handler Args: message (Message): The messagebus data """ try: # Get language of the utterance lang = message.data.get('lang', "en-us") set_active_lang(lang) utterances = message.data.get('utterances', []) # normalize() changes "it's a boy" to "it is a boy", etc. norm_utterances = [ normalize(u.lower(), remove_articles=False) for u in utterances ] # Build list with raw utterance(s) first, then optionally a # normalized version following. combined = utterances + list( set(norm_utterances) - set(utterances)) LOG.debug("Utterances: {}".format(combined)) stopwatch = Stopwatch() intent = None padatious_intent = None with stopwatch: # Give active skills an opportunity to handle the utterance converse = self._converse(combined, lang, message) if not converse: # No conversation, use intent system to handle utterance intent = self._adapt_intent_match(utterances, norm_utterances, lang) for utt in combined: _intent = PadatiousService.instance.calc_intent(utt) if _intent: best = padatious_intent.conf if padatious_intent \ else 0.0 if best < _intent.conf: padatious_intent = _intent LOG.debug("Padatious intent: {}".format(padatious_intent)) LOG.debug(" Adapt intent: {}".format(intent)) if converse: # Report that converse handled the intent and return LOG.debug("Handled in converse()") ident = None if message.context and 'ident' in message.context: ident = message.context['ident'] report_timing(ident, 'intent_service', stopwatch, {'intent_type': 'converse'}) return elif (intent and intent.get('confidence', 0.0) > 0.0 and not (padatious_intent and padatious_intent.conf >= 0.95)): # Send the message to the Adapt intent's handler unless # Padatious is REALLY sure it was directed at it instead. self.update_context(intent) # update active skills skill_id = intent['intent_type'].split(":")[0] self.add_active_skill(skill_id) # Adapt doesn't handle context injection for one_of keywords # correctly. Workaround this issue if possible. try: intent = workaround_one_of_context(intent) except LookupError: LOG.error('Error during workaround_one_of_context') reply = message.reply(intent.get('intent_type'), intent) else: # Allow fallback system to handle utterance # NOTE: A matched padatious_intent is handled this way, too # TODO: Need to redefine intent_failure when STT can return # multiple hypothesis -- i.e. len(utterances) > 1 reply = message.reply( 'intent_failure', { 'utterance': utterances[0], 'norm_utt': norm_utterances[0], 'lang': lang }) self.bus.emit(reply) self.send_metrics(intent, message.context, stopwatch) except Exception as e: LOG.exception(e)
def handle_utterance(self, message): """ Main entrypoint for handling user utterances with Mycroft skills Monitor the messagebus for 'recognizer_loop:utterance', typically generated by a spoken interaction but potentially also from a CLI or other method of injecting a 'user utterance' into the system. Utterances then work through this sequence to be handled: 1) Active skills attempt to handle using converse() 2) Adapt intent handlers 3) Padatious intent handlers 4) Other fallbacks Args: message (Message): The messagebus data """ # JN: Code borrowed from get_scheduled_event_status() in core.py completed_callback = False completed_status = 'failed' # assume fail def completion_handler(message): #JN nonlocal completed_callback nonlocal completed_status LOG.debug("Calback called: " + message.serialize()) LOG.debug(' type ' + str(type(message))) if message.data is not None: completed_status = message.data['status'] LOG.debug('Completed status is ' + completed_status) completed_callback = True def wait_for_reply(): #JN nonlocal completed_callback num_tries = 0 # wait upto 30 secs. weather takes e.g. 8 seconds LOG.debug('Waiting for reply, completed callback is ' + str(completed_callback)) while completed_callback is False and num_tries < 300: #LOG.info('Sleepiong') time.sleep(0.1) num_tries += 1 LOG.debug('Waited for reply, num_tries is ' + str(num_tries)) LOG.debug(' completed callback is ' + str(completed_callback)) completed_callback = False # for next time try: # Get language of the utterance lang = message.data.get('lang', "en-us") utterances = message.data.get('utterances', '') self.bus.on('skill.handler.complete', completion_handler) #JN: stopwatch doesn't seem to be used, so removed the with stopwatch... stopwatch = Stopwatch() #JN: Give active skills an opportunity to handle the utterance converse = self._converse(utterances, lang) #JN: code moved to here, finishes the converse stuff if converse: # Report that converse handled the intent and return LOG.debug('Converse handling intent') ident = message.context['ident'] if message.context else None report_timing(ident, 'intent_service', stopwatch, {'intent_type': 'converse'}) return # if not converse: - redundant # No conversation, use intent system to handle utterance for intent in self._adapt_intent_match(utterances, lang): # JN uses generator padatious_intent = PadatiousService.instance.calc_intent( utterances[0]) if intent and not (padatious_intent and padatious_intent.conf >= 0.95): # Send the message to the Adapt intent's handler unless # Padatious is REALLY sure it was directed at it instead. reply = message.reply(intent.get('intent_type'), intent) else: # Allow fallback system to handle utterance # NOTE: Padatious intents are handled this way, too LOG.info('Pedatious handing or failure?') reply = message.reply('intent_failure', { 'utterance': utterances[0], 'lang': lang }) LOG.debug('Intent bus call msg ' + reply.serialize()) self.bus.emit(reply) self.send_metrics(intent, message.context, stopwatch) wait_for_reply() if completed_status == 'succeeded': # we are finished now with this utterance LOG.debug('intent succeeded, utterance handled by ' + str(intent)) self.bus.remove('skill.handler.complete', completion_handler) return else: LOG.debug('intent failed, trying next one ' + str(intent)) LOG.info('Intent loop finished') # we couldn't find a successful handler # TODO: a handler that says why the semantics of every intent failed # rather than generic messages reply = message.reply('intent_failure', { 'utterance': utterances[0], 'lang': lang }) self.bus.emit(reply) self.bus.remove('skill.handler.complete', completion_handler) except Exception as e: LOG.exception(e)
def wrapper(message): try: # Indicate that the skill handler is starting name = get_handler_name(handler) self.emitter.emit( Message("mycroft.skill.handler.start", data={'handler': name})) stopwatch = Stopwatch() with stopwatch: if need_self: # When registring from decorator self is required if len(getargspec(handler).args) == 2: handler(self, message) elif len(getargspec(handler).args) == 1: handler(self) elif len(getargspec(handler).args) == 0: # Zero may indicate multiple decorators, trying the # usual call signatures try: handler(self, message) except TypeError: handler(self) else: LOG.error("Unexpected argument count:" + str(len(getargspec(handler).args))) raise TypeError else: if len(getargspec(handler).args) == 2: handler(message) elif len(getargspec(handler).args) == 1: handler() else: LOG.error("Unexpected argument count:" + str(len(getargspec(handler).args))) raise TypeError self.settings.store() # Store settings if they've changed # Send timing metrics context = message.context if context and 'ident' in context: report_timing(context['ident'], 'skill_handler', stopwatch, {'handler': handler.__name__}) except Exception as e: # Convert "MyFancySkill" to "My Fancy Skill" for speaking name = re.sub("([a-z])([A-Z])", "\g<1> \g<2>", self.name) # TODO: Localize self.speak("An error occurred while processing a request in " + name) LOG.error("An error occurred while processing a request in " + self.name, exc_info=True) # indicate completion with exception self.emitter.emit( Message('mycroft.skill.handler.complete', data={ 'handler': name, 'exception': e.message })) # Indicate that the skill handler has completed self.emitter.emit( Message('mycroft.skill.handler.complete', data={'handler': name}))