Example #1
0
 def transcribe(self, wav_data, metrics=None):
     timer = Stopwatch()
     timer.start()
     self.local_recognizer.start_utt()
     self.local_recognizer.process_raw(wav_data, False, False)
     self.local_recognizer.end_utt()
     if metrics:
         metrics.timer("mycroft.stt.local.time_s", timer.stop())
     return self.local_recognizer.hyp()
 def transcribe(self, wav_data, metrics=None):
     timer = Stopwatch()
     timer.start()
     self.local_recognizer.start_utt()
     self.local_recognizer.process_raw(wav_data, False, False)
     self.local_recognizer.end_utt()
     if metrics:
         metrics.timer("mycroft.stt.local.time_s", timer.stop())
     return self.local_recognizer.hyp()
    def transcribe(self,
                   audio,
                   language="en-US",
                   show_all=False,
                   metrics=None):
        timer = Stopwatch()
        timer.start()
        identity = IdentityManager().get()
        headers = {}
        if identity.token:
            headers['Authorization'] = 'Bearer %s:%s' % (identity.device_id,
                                                         identity.token)

        response = requests.post(config.get("proxy_host") +
                                 "/stt/google_v2?language=%s&version=%s" %
                                 (language, self.version),
                                 audio.get_flac_data(),
                                 headers=headers)

        if metrics:
            t = timer.stop()
            metrics.timer("mycroft.cerberus.proxy.client.time_s", t)
            metrics.timer("mycroft.stt.remote.time_s", t)

        if response.status_code == 401:
            raise CerberusAccessDenied()

        try:
            actual_result = response.json()
        except:
            raise UnknownValueError()

        log.info("STT JSON: " + json.dumps(actual_result))
        if show_all:
            return actual_result

        # return the best guess
        if "alternative" not in actual_result:
            raise UnknownValueError()
        alternatives = actual_result["alternative"]
        if len([alt for alt in alternatives if alt.get('confidence')]) > 0:
            # if there is at least one element with confidence, force it to
            # the front
            alternatives.sort(key=lambda e: e.get('confidence', 0.0),
                              reverse=True)

        for entry in alternatives:
            if "transcript" in entry:
                return entry["transcript"]

        if len(alternatives) > 0:
            log.error("Found %d entries, but none with a transcript." %
                      len(alternatives))

        # no transcriptions available
        raise UnknownValueError()
Example #4
0
def handle_speak(event):
    """
        Handle "speak" message
    """
    config = Configuration.get()
    Configuration.init(bus)
    global _last_stop_signal

    # Get conversation ID
    if event.context and 'ident' in event.context:
        ident = event.context['ident']
    else:
        ident = 'unknown'

    start = time.time()  # Time of speech request
    with lock:
        stopwatch = Stopwatch()
        stopwatch.start()
        utterance = event.data['utterance']
        if event.data.get('expect_response', False):
            # When expect_response is requested, the listener will be restarted
            # at the end of the next bit of spoken audio.
            bus.once('recognizer_loop:audio_output_end', _start_listener)

        # This is a bit of a hack for Picroft.  The analog audio on a Pi blocks
        # for 30 seconds fairly often, so we don't want to break on periods
        # (decreasing the chance of encountering the block).  But we will
        # keep the split for non-Picroft installs since it give user feedback
        # faster on longer phrases.
        #
        # TODO: Remove or make an option?  This is really a hack, anyway,
        # so we likely will want to get rid of this when not running on Mimic
        if (config.get('enclosure', {}).get('platform') != "picroft" and
                len(re.findall('<[^>]*>', utterance)) == 0):
            chunks = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\;|\?)\s',
                              utterance)
            for chunk in chunks:
                # Check if somthing has aborted the speech
                if (_last_stop_signal > start or
                        check_for_signal('buttonPress')):
                    # Clear any newly queued speech
                    tts.playback.clear()
                    break
                try:
                    mute_and_speak(chunk, ident)
                except KeyboardInterrupt:
                    raise
                except Exception:
                    LOG.error('Error in mute_and_speak', exc_info=True)
        else:
            mute_and_speak(utterance, ident)

        stopwatch.stop()
    report_timing(ident, 'speech', stopwatch, {'utterance': utterance,
                                               'tts': tts.__class__.__name__})
    def transcribe(
            self, audio, language="en-US", show_all=False, metrics=None):
        timer = Stopwatch()
        timer.start()
        identity = IdentityManager().get()
        headers = {}
        if identity.token:
            headers['Authorization'] = 'Bearer %s:%s' % (
                identity.device_id, identity.token)

        response = requests.post(config.get("proxy_host") +
                                 "/stt/google_v2?language=%s&version=%s"
                                 % (language, self.version),
                                 audio.get_flac_data(),
                                 headers=headers)

        if metrics:
            t = timer.stop()
            metrics.timer("mycroft.cerberus.proxy.client.time_s", t)
            metrics.timer("mycroft.stt.remote.time_s", t)

        if response.status_code == 401:
            raise CerberusAccessDenied()

        try:
            actual_result = response.json()
        except:
            raise UnknownValueError()

        log.info("STT JSON: " + json.dumps(actual_result))
        if show_all:
            return actual_result

        # return the best guess
        if "alternative" not in actual_result:
            raise UnknownValueError()
        alternatives = actual_result["alternative"]
        if len([alt for alt in alternatives if alt.get('confidence')]) > 0:
            # if there is at least one element with confidence, force it to
            # the front
            alternatives.sort(
                key=lambda e: e.get('confidence', 0.0), reverse=True)

        for entry in alternatives:
            if "transcript" in entry:
                return entry["transcript"]

        if len(alternatives) > 0:
            log.error(
                "Found %d entries, but none with a transcript." % len(
                    alternatives))

        # no transcriptions available
        raise UnknownValueError()
Example #6
0
    def process(self, audio):
        SessionManager.touch()
        payload = {
            'utterance': self.wakeword_recognizer.key_phrase,
            'session': SessionManager.get().session_id,
        }
        self.emitter.emit("recognizer_loop:wakeword", payload)

        if self._audio_length(audio) < self.MIN_AUDIO_SIZE:
            LOG.warning("Audio too short to be processed")
        else:
            stopwatch = Stopwatch()
            with stopwatch:
                transcription = self.transcribe(audio)
            if transcription:
                ident = str(stopwatch.timestamp) + str(hash(transcription))
                # STT succeeded, send the transcribed speech on for processing
                payload = {
                    'utterances': [transcription],
                    'lang': self.stt.lang,
                    'session': SessionManager.get().session_id,
                    'ident': ident
                }
                self.emitter.emit("recognizer_loop:utterance", payload)
                self.metrics.attr('utterances', [transcription])
            else:
                ident = str(stopwatch.timestamp)
            # Report timing metrics
            report_timing(ident, 'stt', stopwatch, {
                'transcription': transcription,
                'stt': self.stt.__class__.__name__
            })
Example #7
0
    def wrapper(message):
        stopwatch = Stopwatch()
        try:
            # TODO: Fix for real in mycroft-messagebus-client
            # Makes sure the message type is consistent with the type declared
            # in mycroft.messagebus and isinstance will work.
            message = Message(message.msg_type,
                              data=message.data,
                              context=message.context)

            message = unmunge_message(message, skill_id)
            if on_start:
                on_start(message)

            with stopwatch:
                if len(signature(handler).parameters) == 0:
                    handler()
                else:
                    handler(message)

        except Exception as e:
            if on_error:
                on_error(e)
        finally:
            if on_end:
                on_end(message)

            # Send timing metrics
            context = message.context
            if context and 'ident' in context:
                report_timing(context['ident'], 'skill_handler', stopwatch, {
                    'handler': handler.__name__,
                    'skill_id': skill_id
                })
Example #8
0
    def read_audio(self):
        timer = Stopwatch()
        audio = self.queue.get()
        self.metrics.timer("mycroft.recognizer.audio.length_s",
                           self._audio_length(audio))
        self.queue.task_done()
        timer.start()

        if self.state.sleeping:
            self.process_wake_up(audio)
        elif self.state.skip_wakeword:
            self.process_skip_wake_word(audio)
        else:
            self.process_wake_word(audio, timer)

        self.metrics.flush()
Example #9
0
    def read_audio(self):
        timer = Stopwatch()
        audio = self.queue.get()
        self.metrics.timer("mycroft.recognizer.audio.length_s",
                           self._audio_length(audio))
        self.queue.task_done()
        timer.start()

        if self.state.sleeping:
            self.process_wake_up(audio)
        elif self.state.skip_wakeword:
            self.process_skip_wake_word(audio)
        else:
            self.process_wake_word(audio, timer)

        self.metrics.flush()
Example #10
0
    def run(self):
        """Thread main loop. get audio and viseme data from queue and play."""
        while not self._terminated:
            try:
                snd_type, data, visemes, ident = self.queue.get(timeout=2)
                self.blink(0.5)
                if not self._processing_queue:
                    self._processing_queue = True
                    self.tts.begin_audio()

                stopwatch = Stopwatch()
                with stopwatch:
                    if snd_type == 'wav':
                        self.p = play_wav(data, environment=self.pulse_env)
                    elif snd_type == 'mp3':
                        self.p = play_mp3(data, environment=self.pulse_env)

                    if visemes:
                        self.show_visemes(visemes)
                    self.p.communicate()
                    self.p.wait()
                send_playback_metric(stopwatch, ident)

                if self.queue.empty():
                    self.tts.end_audio()
                    self._processing_queue = False
                self.blink(0.2)
            except Empty:
                pass
            except Exception as e:
                LOG.exception(e)
                if self._processing_queue:
                    self.tts.end_audio()
                    self._processing_queue = False
Example #11
0
    def wrapper(message):
        stopwatch = Stopwatch()
        try:
            message = unmunge_message(message, skill_id)
            if on_start:
                on_start(message)

            with stopwatch:
                if len(signature(handler).parameters) == 0:
                    handler()
                else:
                    handler(message)

        except Exception as e:
            if on_error:
                on_error(e)
        finally:
            if on_end:
                on_end(message)

            # Send timing metrics
            context = message.context
            if context and 'ident' in context:
                report_timing(context['ident'], 'skill_handler', stopwatch,
                              {'handler': handler.__name__,
                               'skill_id': skill_id})
Example #12
0
        def handler(message):
            # indicate fallback handling start
            ws.emit(Message("mycroft.skill.handler.start",
                            data={'handler': "fallback"}))

            stopwatch = Stopwatch()
            handler_name = None
            with stopwatch:
                for _, handler in sorted(cls.fallback_handlers.items(),
                                         key=operator.itemgetter(0)):
                    try:
                        if handler(message):
                            #  indicate completion
                            handler_name = get_handler_name(handler)
                            ws.emit(Message(
                                'mycroft.skill.handler.complete',
                                data={'handler': "fallback",
                                      "fallback_handler": handler_name}))
                            break
                    except Exception:
                        LOG.exception('Exception in fallback.')
                else:  # No fallback could handle the utterance
                    ws.emit(Message('complete_intent_failure'))
                    warning = "No fallback could handle intent."
                    LOG.warning(warning)
                    #  indicate completion with exception
                    ws.emit(Message('mycroft.skill.handler.complete',
                                    data={'handler': "fallback",
                                          'exception': warning}))

            # Send timing metric
            if message.context and message.context['ident']:
                ident = message.context['ident']
                report_timing(ident, 'fallback_handler', stopwatch,
                              {'handler': handler_name})
Example #13
0
        def handler(message):
            start, stop = message.data.get('fallback_range', (0, 101))
            # indicate fallback handling start
            LOG.debug('Checking fallbacks in range '
                      '{} - {}'.format(start, stop))
            bus.emit(
                message.forward("mycroft.skill.handler.start",
                                data={'handler': "fallback"}))

            stopwatch = Stopwatch()
            handler_name = None
            with stopwatch:
                sorted_handlers = sorted(cls.fallback_handlers.items(),
                                         key=operator.itemgetter(0))
                handlers = [
                    f[1] for f in sorted_handlers if start <= f[0] < stop
                ]
                for handler in handlers:
                    try:
                        if handler(message):
                            # indicate completion
                            status = True
                            handler_name = get_handler_name(handler)
                            bus.emit(
                                message.forward(
                                    'mycroft.skill.handler.complete',
                                    data={
                                        'handler': "fallback",
                                        "fallback_handler": handler_name
                                    }))
                            break
                    except Exception:
                        LOG.exception('Exception in fallback.')
                else:
                    status = False
                    #  indicate completion with exception
                    warning = 'No fallback could handle intent.'
                    bus.emit(
                        message.forward('mycroft.skill.handler.complete',
                                        data={
                                            'handler': "fallback",
                                            'exception': warning
                                        }))
                    if 'fallback_range' not in message.data:
                        # Old system TODO: Remove in 20.08
                        # No fallback could handle the utterance
                        bus.emit(message.forward('complete_intent_failure'))
                        LOG.warning(warning)

            # return if the utterance was handled to the caller
            bus.emit(message.response(data={'handled': status}))

            # Send timing metric
            if message.context.get('ident'):
                ident = message.context['ident']
                report_timing(ident, 'fallback_handler', stopwatch,
                              {'handler': handler_name})
Example #14
0
    def handle_utterance(self, message):
        """ Main entrypoint for handling user utterances with Mycroft skills

        Monitor the messagebus for 'recognizer_loop:utterance', typically
        generated by a spoken interaction but potentially also from a CLI
        or other method of injecting a 'user utterance' into the system.

        Utterances then work through this sequence to be handled:
        1) Active skills attempt to handle using converse()
        2) Adapt intent handlers
        3) Padatious intent handlers
        4) Other fallbacks

        Args:
            message (Message): The messagebus data
        """
        try:
            # Get language of the utterance
            lang = message.data.get('lang', "en-us")
            set_active_lang(lang)
            utterances = message.data.get('utterances', '')

            stopwatch = Stopwatch()
            with stopwatch:
                # Give active skills an opportunity to handle the utterance
                converse = self._converse(utterances, lang)

                if not converse:
                    # No conversation, use intent system to handle utterance
                    intent = self._adapt_intent_match(utterances, lang)
                    padatious_intent = PadatiousService.instance.calc_intent(
                        utterances[0])

            if converse:
                # Report that converse handled the intent and return
                ident = message.context['ident'] if message.context else None
                report_timing(ident, 'intent_service', stopwatch,
                              {'intent_type': 'converse'})
                return
            elif intent and not (padatious_intent
                                 and padatious_intent.conf >= 0.95):
                # Send the message to the Adapt intent's handler unless
                # Padatious is REALLY sure it was directed at it instead.
                reply = message.reply(intent.get('intent_type'), intent)
            else:
                # Allow fallback system to handle utterance
                # NOTE: Padatious intents are handled this way, too
                reply = message.reply('intent_failure', {
                    'utterance': utterances[0],
                    'lang': lang
                })
            self.bus.emit(reply)
            self.send_metrics(intent, message.context, stopwatch)
        except Exception as e:
            LOG.exception(e)
Example #15
0
def handle_speak(event):
    """Handle "speak" message

    Parse sentences and invoke text to speech service.
    """
    config = Configuration.get()
    Configuration.set_config_update_handlers(bus)
    global _last_stop_signal

    # Get conversation ID
    if event.context and 'ident' in event.context:
        ident = event.context['ident']
    else:
        ident = 'unknown'

    start = time.time()  # Time of speech request
    with lock:
        stopwatch = Stopwatch()
        stopwatch.start()
        utterance = event.data['utterance']
        listen = event.data.get('expect_response', False)
        # This is a bit of a hack for Picroft.  The analog audio on a Pi blocks
        # for 30 seconds fairly often, so we don't want to break on periods
        # (decreasing the chance of encountering the block).  But we will
        # keep the split for non-Picroft installs since it give user feedback
        # faster on longer phrases.
        #
        # TODO: Remove or make an option?  This is really a hack, anyway,
        # so we likely will want to get rid of this when not running on Mimic
        if (config.get('enclosure', {}).get('platform') != "picroft" and
                len(re.findall('<[^>]*>', utterance)) == 0):
            # Remove any whitespace present after the period,
            # if a character (only alpha) ends with a period
            # ex: A. Lincoln -> A.Lincoln
            # so that we don't split at the period
            utterance = re.sub(r'\b([A-za-z][\.])(\s+)', r'\g<1>', utterance)
            chunks = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\;|\?)\s',
                              utterance)
            # Apply the listen flag to the last chunk, set the rest to False
            chunks = [(chunks[i], listen if i == len(chunks) - 1 else False)
                      for i in range(len(chunks))]
            for chunk, listen in chunks:
                # Check if somthing has aborted the speech
                if (_last_stop_signal > start or
                        check_for_signal('buttonPress')):
                    # Clear any newly queued speech
                    tts.playback.clear()
                    break
                try:
                    mute_and_speak(chunk, ident, listen)
                except KeyboardInterrupt:
                    raise
                except Exception:
                    LOG.error('Error in mute_and_speak', exc_info=True)
        else:
            mute_and_speak(utterance, ident)

        stopwatch.stop()
    report_timing(ident, 'speech', stopwatch, {'utterance': utterance,
                                               'tts': tts.__class__.__name__})
Example #16
0
    def try_consume_audio(self):
        timer = Stopwatch()
        hyp = None
        audio = self.queue.get()
        self.metrics.timer("mycroft.recognizer.audio.length_s", self._audio_length(audio))
        self.queue.task_done()
        timer.start()
        if self.state.sleeping:
            hyp = self.wakeup_recognizer.transcribe(audio.get_wav_data(), metrics=self.metrics)
            if hyp and hyp.hypstr:
                logger.debug("sleeping recognition: " + hyp.hypstr)
            if hyp and hyp.hypstr.lower().find("wake up") >= 0:
                SessionManager.touch()
                self.state.sleeping = False
                self.__speak("I'm awake.")  # TODO: Localization
                self.metrics.increment("mycroft.wakeup")
        else:
            if not self.state.skip_wakeword:
                hyp = self.ww_recognizer.transcribe(audio.get_wav_data(), metrics=self.metrics)

            if hyp and hyp.hypstr.lower().find("mycroft") >= 0:
                extractor = WakewordExtractor(audio, self.ww_recognizer, self.metrics)
                timer.lap()
                extractor.calculate_range()
                self.metrics.timer("mycroft.recognizer.extractor.time_s", timer.lap())
                audio_before = extractor.get_audio_data_before()
                self.metrics.timer("mycroft.recognizer.audio_extracted.length_s", self._audio_length(audio_before))
                audio_after = extractor.get_audio_data_after()
                self.metrics.timer("mycroft.recognizer.audio_extracted.length_s", self._audio_length(audio_after))

                SessionManager.touch()
                payload = {
                    'utterance': hyp.hypstr,
                    'session': SessionManager.get().session_id,
                    'pos_begin': int(extractor.range.begin),
                    'pos_end': int(extractor.range.end)
                }
                self.emitter.emit("recognizer_loop:wakeword", payload)

                try:
                    self.transcribe([audio_before, audio_after])
                except sr.UnknownValueError:
                    self.__speak("Go ahead")
                    self.state.skip_wakeword = True
                    self.metrics.increment("mycroft.wakeword")

            elif self.state.skip_wakeword:
                SessionManager.touch()
                try:
                    self.transcribe([audio])
                except sr.UnknownValueError:
                    logger.warn("Speech Recognition could not understand audio")
                    self.__speak("Sorry, I didn't catch that.")
                    self.metrics.increment("mycroft.recognizer.error")
                self.state.skip_wakeword = False
            else:
                self.metrics.clear()
        self.metrics.flush()
Example #17
0
def handle_speak(event):
    """
        Handle "speak" message
    """
    config = Configuration.get()
    Configuration.init(bus)
    global _last_stop_signal

    # Get conversation ID
    if event.context and 'ident' in event.context:
        ident = event.context['ident']
    else:
        ident = 'unknown'

    start = time.time()  # Time of speech request
    with lock:
        stopwatch = Stopwatch()
        stopwatch.start()
        utterance = event.data['utterance']
        if event.data.get('expect_response', False):
            # When expect_response is requested, the listener will be restarted
            # at the end of the next bit of spoken audio.
            bus.once('recognizer_loop:audio_output_end', _start_listener)

        # This is a bit of a hack for Picroft.  The analog audio on a Pi blocks
        # for 30 seconds fairly often, so we don't want to break on periods
        # (decreasing the chance of encountering the block).  But we will
        # keep the split for non-Picroft installs since it give user feedback
        # faster on longer phrases.
        #
        # TODO: Remove or make an option?  This is really a hack, anyway,
        # so we likely will want to get rid of this when not running on Mimic
        if (config.get('enclosure', {}).get('platform') != "picroft"
                and len(re.findall('<[^>]*>', utterance)) == 0):
            chunks = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\;|\?)\s',
                              utterance)
            for chunk in chunks:
                # Check if somthing has aborted the speech
                if (_last_stop_signal > start
                        or check_for_signal('buttonPress')):
                    # Clear any newly queued speech
                    tts.playback.clear()
                    break
                try:
                    mute_and_speak(chunk, ident)
                except KeyboardInterrupt:
                    raise
                except Exception:
                    LOG.error('Error in mute_and_speak', exc_info=True)
        else:
            mute_and_speak(utterance, ident)

        stopwatch.stop()
    report_timing(ident, 'speech', stopwatch, {
        'utterance': utterance,
        'tts': tts.__class__.__name__
    })
Example #18
0
    def run(self):
        """Thread main loop. Get audio and extra data from queue and play.

        The queue messages is a tuple containing
        snd_type: 'mp3' or 'wav' telling the loop what format the data is in
        data: path to temporary audio data
        videmes: list of visemes to display while playing
        listen: if listening should be triggered at the end of the sentence.

        Playback of audio is started and the visemes are sent over the bus
        the loop then wait for the playback process to finish before starting
        checking the next position in queue.

        If the queue is empty the tts.end_audio() is called possibly triggering
        listening.
        """
        while not self._terminated:
            try:
                (snd_type, data, visemes, ident,
                 listen) = self.queue.get(timeout=2)
                self.blink(0.5)
                if not self._processing_queue:
                    self._processing_queue = True
                    self.tts.begin_audio()

                stopwatch = Stopwatch()
                with stopwatch:
                    if snd_type == 'wav':
                        self.p = play_wav(data, environment=self.pulse_env)
                    elif snd_type == 'mp3':
                        self.p = play_mp3(data, environment=self.pulse_env)
                    if visemes:
                        self.show_visemes(visemes)
                    if self.p:
                        self.p.communicate()
                        self.p.wait()
                report_timing(ident, 'speech_playback', stopwatch)

                if self.queue.empty():
                    self.tts.end_audio(listen)
                    self._processing_queue = False
                self.blink(0.2)
            except Empty:
                pass
            except Exception as e:
                LOG.exception(e)
                if self._processing_queue:
                    self.tts.end_audio(listen)
                    self._processing_queue = False
    def get_tts(self, sentence, wav_file, speaker=None):
        stopwatch = Stopwatch()

        to_speak = format_speak_tags(sentence).lstrip("<speak>").rstrip(
            "</speak>")  # TODO: Update utils to handle DM
        LOG.debug(to_speak)
        if to_speak:
            url = self._build_url(to_speak)
            with stopwatch:
                wav_data = urlopen(url).read()
            LOG.debug(f"Request time={stopwatch.time}")

            with stopwatch:
                with open(wav_file, "wb") as f:
                    f.write(wav_data)
            LOG.debug(f"File access time={stopwatch.time}")
        return wav_file, None
    def get_tts(self, sentence, wav_file, speaker=None):
        stopwatch = Stopwatch()
        speaker = speaker or dict()
        # Read utterance data from passed configuration
        request_lang = speaker.get("language", self.lang)

        to_speak = format_speak_tags(sentence)
        LOG.debug(to_speak)
        if to_speak:
            synthesizer = self._get_synthesizer(request_lang)
            with stopwatch:
                wav_data = synthesizer.tts(sentence)
            LOG.debug(f"Synthesis time={stopwatch.time}")

            with stopwatch:
                synthesizer.save_wav(wav_data, wav_file)
            LOG.debug(f"File access time={stopwatch.time}")
        return wav_file, None
Example #21
0
    def process(self, audio):
        path = pathlib.Path().absolute()
        settings_file = open(str(path)+'/mycroft/client/speech/set_config.txt', 'w')
        
        if self._audio_length(audio) >= self.MIN_AUDIO_SIZE:
            stopwatch = Stopwatch()
            with stopwatch:
                transcription = self.transcribe(audio) 
            
            settings = {"rate": "  '1.0'  ", "volume": " '80%' "}
            #speed
            if "quickly" in transcription:
                settings["rate"] = "  '1.6'  "
            if "slowly" in transcription:
                settings["rate"] = "  '.6'  "
            #volume
            if "loudly" in transcription:
                settings["volume"] = "  '100%'  "
            if "softly" in transcription:
                settings["volume"] = "  '50%'  "


            settings_file.write(str(settings))
            settings_file.close()

            if transcription:
                ident = str(stopwatch.timestamp) + str(hash(transcription))
                # STT succeeded, send the transcribed speech on for processing
                payload = {
                    'utterances': [transcription],
                    'lang': self.stt.lang,
                    'session': SessionManager.get().session_id,
                    'ident': ident
                }
                self.emitter.emit("recognizer_loop:utterance", payload)
                self.metrics.attr('utterances', [transcription])
                # Report timing metrics
                report_timing(ident, 'stt', stopwatch,
                              {'transcription': transcription,
                               'stt': self.stt.__class__.__name__})
            else:
                ident = str(stopwatch.timestamp)
        else:
            LOG.warning("Audio too short to be processed")
Example #22
0
        def wrapper(message):
            skill_data = {'name': get_handler_name(handler)}
            stopwatch = Stopwatch()
            try:
                message = unmunge_message(message, self.skill_id)
                # Indicate that the skill handler is starting
                if handler_info:
                    # Indicate that the skill handler is starting if requested
                    msg_type = handler_info + '.start'
                    self.emitter.emit(Message(msg_type, skill_data))

                with stopwatch:
                    is_bound = bool(getattr(handler, 'im_self', None))
                    num_args = len(getargspec(handler).args) - is_bound
                    if num_args == 0:
                        handler()
                    else:
                        handler(message)
                    self.settings.store()  # Store settings if they've changed

            except Exception as e:
                # Convert "MyFancySkill" to "My Fancy Skill" for speaking
                handler_name = re.sub(r"([a-z])([A-Z])", r"\1 \2", self.name)
                msg_data = {'skill': handler_name}
                msg = dialog.get('skill.error', self.lang, msg_data)
                self.speak(msg)
                LOG.exception(msg)
                # append exception information in message
                skill_data['exception'] = e.message
            finally:
                if once:
                    self.remove_event(name)

                # Indicate that the skill handler has completed
                if handler_info:
                    msg_type = handler_info + '.complete'
                    self.emitter.emit(Message(msg_type, skill_data))

                # Send timing metrics
                context = message.context
                if context and 'ident' in context:
                    report_timing(context['ident'], 'skill_handler', stopwatch,
                                  {'handler': handler.__name__})
Example #23
0
        def wrapper(message):
            skill_data = {'name': get_handler_name(handler)}
            stopwatch = Stopwatch()
            try:
                message = unmunge_message(message, self.skill_id)
                # Indicate that the skill handler is starting
                if handler_info:
                    # Indicate that the skill handler is starting if requested
                    msg_type = handler_info + '.start'
                    self.bus.emit(message.reply(msg_type, skill_data))

                if once:
                    # Remove registered one-time handler before invoking,
                    # allowing them to re-schedule themselves.
                    self.remove_event(name)

                with stopwatch:
                    if len(signature(handler).parameters) == 0:
                        handler()
                    else:
                        handler(message)
                    self.settings.store()  # Store settings if they've changed

            except Exception as e:
                # Convert "MyFancySkill" to "My Fancy Skill" for speaking
                handler_name = camel_case_split(self.name)
                msg_data = {'skill': handler_name}
                msg = dialog.get('skill.error', self.lang, msg_data)
                self.speak(msg)
                LOG.exception(msg)
                # append exception information in message
                skill_data['exception'] = repr(e)
            finally:
                # Indicate that the skill handler has completed
                if handler_info:
                    msg_type = handler_info + '.complete'
                    self.bus.emit(message.reply(msg_type, skill_data))

                # Send timing metrics
                context = message.context
                if context and 'ident' in context:
                    report_timing(context['ident'], 'skill_handler', stopwatch,
                                  {'handler': handler.__name__})
    def _get_synthesizer(self, language) -> Synthesizer:
        if '-' in language:
            language = language.split('-')[0]
        stopwatch = Stopwatch()
        with stopwatch:
            model_name = None

            for model in self.models:
                _, lang, dataset, name = model.split('/')
                print(f"{lang}|{name}")
                if language in lang:
                    model_name = model
                    if name == self.preferred_model:
                        break

            model_path, config_path, model_item = self.manager.download_model(
                model_name)
            vocoder_name = model_item.get(
                "default_vocoder",
                "vocoder_models/universal/libri-tts/fullband-melgan")
            vocoder_path, vocoder_config_path, _ = self.manager.download_model(
                vocoder_name)
            speakers_file_path = ''
            encoder_path = ''
            encoder_config_path = ''
            use_cuda = False

            synthesizer = Synthesizer(
                model_path,
                config_path,
                speakers_file_path,
                vocoder_path,
                vocoder_config_path,
                encoder_path,
                encoder_config_path,
                use_cuda,
            )
        LOG.debug(f"Get synthesizer time={stopwatch.time}")
        return synthesizer
Example #25
0
    def handle_utterance(self, message):
        """Main entrypoint for handling user utterances with Mycroft skills

        Monitor the messagebus for 'recognizer_loop:utterance', typically
        generated by a spoken interaction but potentially also from a CLI
        or other method of injecting a 'user utterance' into the system.

        Utterances then work through this sequence to be handled:
        1) Active skills attempt to handle using converse()
        2) Padatious high match intents (conf > 0.95)
        3) Adapt intent handlers
        5) High Priority Fallbacks
        6) Padatious near match intents (conf > 0.8)
        7) General Fallbacks
        8) Padatious loose match intents (conf > 0.5)
        9) Catch all fallbacks including Unknown intent handler

        If all these fail the complete_intent_failure message will be sent
        and a generic info of the failure will be spoken.

        Args:
            message (Message): The messagebus data
        """
        try:
            lang = _get_message_lang(message)
            set_default_lf_lang(lang)

            utterances = message.data.get('utterances', [])
            combined = _normalize_all_utterances(utterances)

            stopwatch = Stopwatch()

            # Create matchers
            padatious_matcher = PadatiousMatcher(self.padatious_service)

            # List of functions to use to match the utterance with intent.
            # These are listed in priority order.
            match_funcs = [
                self._converse, padatious_matcher.match_high,
                self.adapt_service.match_intent, self.fallback.high_prio,
                padatious_matcher.match_medium, self.fallback.medium_prio,
                padatious_matcher.match_low, self.fallback.low_prio
            ]

            match = None
            with stopwatch:
                # Loop through the matching functions until a match is found.
                for match_func in match_funcs:
                    match = match_func(combined, lang, message)
                    if match:
                        break
            if match:
                if match.skill_id:
                    self.add_active_skill(match.skill_id)
                    # If the service didn't report back the skill_id it
                    # takes on the responsibility of making the skill "active"

                # Launch skill if not handled by the match function
                if match.intent_type:
                    reply = message.reply(match.intent_type, match.intent_data)
                    # Add back original list of utterances for intent handlers
                    # match.intent_data only includes the utterance with the
                    # highest confidence.
                    reply.data["utterances"] = utterances
                    self.bus.emit(reply)

            else:
                # Nothing was able to handle the intent
                # Ask politely for forgiveness for failing in this vital task
                self.send_complete_intent_failure(message)
            self.send_metrics(match, message.context, stopwatch)
        except Exception as err:
            LOG.exception(err)
Example #26
0
        def wrapper(message):
            data = {'name': get_handler_name(handler)}
            try:
                # Indicate that the skill handler is starting
                if handler_info:
                    # Indicate that the skill handler is starting if requested
                    msg_type = handler_info + '.start'
                    self.emitter.emit(Message(msg_type, data))

                stopwatch = Stopwatch()
                with stopwatch:
                    if need_self:
                        # When registring from decorator self is required
                        if len(getargspec(handler).args) == 2:
                            handler(self, unmunge_message(message,
                                                          self.skill_id))
                        elif len(getargspec(handler).args) == 1:
                            handler(self)
                        elif len(getargspec(handler).args) == 0:
                            # Zero may indicate multiple decorators, trying the
                            # usual call signatures
                            try:
                                handler(self, unmunge_message(message,
                                                              self.skill_id))
                            except TypeError:
                                handler(self)
                        else:
                            LOG.error("Unexpected argument count:" +
                                      str(len(getargspec(handler).args)))
                            raise TypeError
                    else:
                        if len(getargspec(handler).args) == 2:
                            handler(unmunge_message(message, self.skill_id))
                        elif len(getargspec(handler).args) == 1:
                            handler()
                        else:
                            LOG.error("Unexpected argument count:" +
                                      str(len(getargspec(handler).args)))
                            raise TypeError
                    self.settings.store()  # Store settings if they've changed

            except Exception as e:
                # Convert "MyFancySkill" to "My Fancy Skill" for speaking
                handler_name = re.sub("([a-z])([A-Z])", "\g<1> \g<2>",
                                      self.name)
                # TODO: Localize
                self.speak("An error occurred while processing a request in " +
                           handler_name)
                LOG.error("An error occurred while processing a request in " +
                          self.name, exc_info=True)
                # append exception information in message
                data['exception'] = e.message
            finally:
                if once:
                    self.remove_event(name)

                # Indicate that the skill handler has completed
                if handler_info:
                    msg_type = handler_info + '.complete'
                    self.emitter.emit(Message(msg_type, data))

                # Send timing metrics
                context = message.context
                if context and 'ident' in context:
                    report_timing(context['ident'], 'skill_handler', stopwatch,
                                  {'handler': handler.__name__})
Example #27
0
    def try_consume_audio(self):
        timer = Stopwatch()
        hyp = None
        audio = self.queue.get()
        self.metrics.timer("mycroft.recognizer.audio.length_s",
                           self._audio_length(audio))
        self.queue.task_done()
        timer.start()
        if self.state.sleeping:
            hyp = self.wakeup_recognizer.transcribe(audio.get_wav_data(),
                                                    metrics=self.metrics)
            if hyp and hyp.hypstr:
                logger.debug("sleeping recognition: " + hyp.hypstr)
            if hyp and hyp.hypstr.lower().find("wake up") >= 0:
                SessionManager.touch()
                self.state.sleeping = False
                self.__speak("I'm awake.")  # TODO: Localization
                self.metrics.increment("mycroft.wakeup")
        else:
            if not self.state.skip_wakeword:
                hyp = self.ww_recognizer.transcribe(audio.get_wav_data(),
                                                    metrics=self.metrics)

            if hyp and hyp.hypstr.lower().find("mycroft") >= 0:
                extractor = WakewordExtractor(audio, self.ww_recognizer,
                                              self.metrics)
                timer.lap()
                extractor.calculate_range()
                self.metrics.timer("mycroft.recognizer.extractor.time_s",
                                   timer.lap())
                audio_before = extractor.get_audio_data_before()
                self.metrics.timer(
                    "mycroft.recognizer.audio_extracted.length_s",
                    self._audio_length(audio_before))
                audio_after = extractor.get_audio_data_after()
                self.metrics.timer(
                    "mycroft.recognizer.audio_extracted.length_s",
                    self._audio_length(audio_after))

                SessionManager.touch()
                payload = {
                    'utterance': hyp.hypstr,
                    'session': SessionManager.get().session_id,
                    'pos_begin': int(extractor.range.begin),
                    'pos_end': int(extractor.range.end)
                }
                self.emitter.emit("recognizer_loop:wakeword", payload)

                try:
                    self.transcribe([audio_before, audio_after])
                except sr.UnknownValueError:
                    self.__speak("Go ahead")
                    self.state.skip_wakeword = True
                    self.metrics.increment("mycroft.wakeword")

            elif self.state.skip_wakeword:
                SessionManager.touch()
                try:
                    self.transcribe([audio])
                except sr.UnknownValueError:
                    logger.warn(
                        "Speech Recognition could not understand audio")
                    self.__speak("Sorry, I didn't catch that.")
                    self.metrics.increment("mycroft.recognizer.error")
                self.state.skip_wakeword = False
            else:
                self.metrics.clear()
        self.metrics.flush()
Example #28
0
    def handle_utterance(self, message):
        """ Main entrypoint for handling user utterances with Mycroft skills

        Monitor the messagebus for 'recognizer_loop:utterance', typically
        generated by a spoken interaction but potentially also from a CLI
        or other method of injecting a 'user utterance' into the system.

        Utterances then work through this sequence to be handled:
        1) Active skills attempt to handle using converse()
        2) Padatious high match intents (conf > 0.95)
        3) Adapt intent handlers
        5) Fallbacks:
           - Padatious near match intents (conf > 0.8)
           - General fallbacks
           - Padatious loose match intents (conf > 0.5)
           - Unknown intent handler

        Args:
            message (Message): The messagebus data
        """
        try:
            # Get language of the utterance
            lang = message.data.get('lang', "en-us")
            set_active_lang(lang)

            utterances = message.data.get('utterances', [])
            # normalize() changes "it's a boy" to "it is a boy", etc.
            norm_utterances = [
                normalize(u.lower(), remove_articles=False) for u in utterances
            ]

            # Build list with raw utterance(s) first, then optionally a
            # normalized version following.
            combined = utterances + list(
                set(norm_utterances) - set(utterances))
            LOG.debug("Utterances: {}".format(combined))

            stopwatch = Stopwatch()
            intent = None
            padatious_intent = None
            with stopwatch:
                # Give active skills an opportunity to handle the utterance
                converse = self._converse(combined, lang, message)

                if not converse:
                    # No conversation, use intent system to handle utterance
                    intent = self._adapt_intent_match(utterances,
                                                      norm_utterances, lang)
                    for utt in combined:
                        _intent = PadatiousService.instance.calc_intent(utt)
                        if _intent:
                            best = padatious_intent.conf if padatious_intent \
                                else 0.0
                            if best < _intent.conf:
                                padatious_intent = _intent
                    LOG.debug("Padatious intent: {}".format(padatious_intent))
                    LOG.debug("    Adapt intent: {}".format(intent))

            if converse:
                # Report that converse handled the intent and return
                LOG.debug("Handled in converse()")
                ident = None
                if message.context and 'ident' in message.context:
                    ident = message.context['ident']
                report_timing(ident, 'intent_service', stopwatch,
                              {'intent_type': 'converse'})
                return
            elif (intent and intent.get('confidence', 0.0) > 0.0 and
                  not (padatious_intent and padatious_intent.conf >= 0.95)):
                # Send the message to the Adapt intent's handler unless
                # Padatious is REALLY sure it was directed at it instead.
                self.update_context(intent)
                # update active skills
                skill_id = intent['intent_type'].split(":")[0]
                self.add_active_skill(skill_id)
                # Adapt doesn't handle context injection for one_of keywords
                # correctly. Workaround this issue if possible.
                try:
                    intent = workaround_one_of_context(intent)
                except LookupError:
                    LOG.error('Error during workaround_one_of_context')
                reply = message.reply(intent.get('intent_type'), intent)
            else:
                # Allow fallback system to handle utterance
                # NOTE: A matched padatious_intent is handled this way, too
                # TODO: Need to redefine intent_failure when STT can return
                #       multiple hypothesis -- i.e. len(utterances) > 1
                reply = message.reply(
                    'intent_failure', {
                        'utterance': utterances[0],
                        'norm_utt': norm_utterances[0],
                        'lang': lang
                    })
            self.bus.emit(reply)
            self.send_metrics(intent, message.context, stopwatch)
        except Exception as e:
            LOG.exception(e)
    def handle_utterance(self, message):
        """ Main entrypoint for handling user utterances with Mycroft skills

        Monitor the messagebus for 'recognizer_loop:utterance', typically
        generated by a spoken interaction but potentially also from a CLI
        or other method of injecting a 'user utterance' into the system.

        Utterances then work through this sequence to be handled:
        1) Active skills attempt to handle using converse()
        2) Adapt intent handlers
        3) Padatious intent handlers
        4) Other fallbacks

        Args:
            message (Message): The messagebus data
        """

        # JN: Code borrowed from get_scheduled_event_status() in core.py
        completed_callback = False
        completed_status = 'failed'  # assume fail

        def completion_handler(message):  #JN
            nonlocal completed_callback
            nonlocal completed_status

            LOG.debug("Calback called: " + message.serialize())
            LOG.debug('   type ' + str(type(message)))
            if message.data is not None:
                completed_status = message.data['status']
                LOG.debug('Completed status is ' + completed_status)
            completed_callback = True

        def wait_for_reply():  #JN
            nonlocal completed_callback
            num_tries = 0  # wait upto 30 secs. weather takes e.g. 8 seconds

            LOG.debug('Waiting for reply, completed callback is ' +
                      str(completed_callback))

            while completed_callback is False and num_tries < 300:
                #LOG.info('Sleepiong')
                time.sleep(0.1)
                num_tries += 1
            LOG.debug('Waited for reply, num_tries is ' + str(num_tries))
            LOG.debug('   completed callback is ' + str(completed_callback))
            completed_callback = False  # for next time

        try:
            # Get language of the utterance
            lang = message.data.get('lang', "en-us")
            utterances = message.data.get('utterances', '')

            self.bus.on('skill.handler.complete', completion_handler)

            #JN:  stopwatch doesn't seem to be used, so removed the with stopwatch...
            stopwatch = Stopwatch()

            #JN:  Give active skills an opportunity to handle the utterance
            converse = self._converse(utterances, lang)

            #JN:  code moved to here, finishes the converse stuff
            if converse:
                # Report that converse handled the intent and return
                LOG.debug('Converse handling intent')
                ident = message.context['ident'] if message.context else None
                report_timing(ident, 'intent_service', stopwatch,
                              {'intent_type': 'converse'})
                return

            # if not converse: - redundant
            # No conversation, use intent system to handle utterance
            for intent in self._adapt_intent_match(utterances,
                                                   lang):  # JN uses generator
                padatious_intent = PadatiousService.instance.calc_intent(
                    utterances[0])

                if intent and not (padatious_intent
                                   and padatious_intent.conf >= 0.95):
                    # Send the message to the Adapt intent's handler unless
                    # Padatious is REALLY sure it was directed at it instead.
                    reply = message.reply(intent.get('intent_type'), intent)
                else:
                    # Allow fallback system to handle utterance
                    # NOTE: Padatious intents are handled this way, too
                    LOG.info('Pedatious handing or failure?')
                    reply = message.reply('intent_failure', {
                        'utterance': utterances[0],
                        'lang': lang
                    })
                LOG.debug('Intent bus call msg ' + reply.serialize())
                self.bus.emit(reply)
                self.send_metrics(intent, message.context, stopwatch)

                wait_for_reply()

                if completed_status == 'succeeded':
                    # we are finished now with this utterance
                    LOG.debug('intent succeeded, utterance handled by ' +
                              str(intent))
                    self.bus.remove('skill.handler.complete',
                                    completion_handler)
                    return
                else:
                    LOG.debug('intent failed, trying next one ' + str(intent))

            LOG.info('Intent loop finished')
            # we couldn't find a successful handler
            # TODO: a handler that says why the semantics of every intent failed
            # rather than generic messages
            reply = message.reply('intent_failure', {
                'utterance': utterances[0],
                'lang': lang
            })
            self.bus.emit(reply)
            self.bus.remove('skill.handler.complete', completion_handler)

        except Exception as e:
            LOG.exception(e)
Example #30
0
        def wrapper(message):
            try:
                # Indicate that the skill handler is starting
                name = get_handler_name(handler)
                self.emitter.emit(
                    Message("mycroft.skill.handler.start",
                            data={'handler': name}))

                stopwatch = Stopwatch()
                with stopwatch:
                    if need_self:
                        # When registring from decorator self is required
                        if len(getargspec(handler).args) == 2:
                            handler(self, message)
                        elif len(getargspec(handler).args) == 1:
                            handler(self)
                        elif len(getargspec(handler).args) == 0:
                            # Zero may indicate multiple decorators, trying the
                            # usual call signatures
                            try:
                                handler(self, message)
                            except TypeError:
                                handler(self)
                        else:
                            LOG.error("Unexpected argument count:" +
                                      str(len(getargspec(handler).args)))
                            raise TypeError
                    else:
                        if len(getargspec(handler).args) == 2:
                            handler(message)
                        elif len(getargspec(handler).args) == 1:
                            handler()
                        else:
                            LOG.error("Unexpected argument count:" +
                                      str(len(getargspec(handler).args)))
                            raise TypeError
                    self.settings.store()  # Store settings if they've changed

                # Send timing metrics
                context = message.context
                if context and 'ident' in context:
                    report_timing(context['ident'], 'skill_handler', stopwatch,
                                  {'handler': handler.__name__})

            except Exception as e:
                # Convert "MyFancySkill" to "My Fancy Skill" for speaking
                name = re.sub("([a-z])([A-Z])", "\g<1> \g<2>", self.name)
                # TODO: Localize
                self.speak("An error occurred while processing a request in " +
                           name)
                LOG.error("An error occurred while processing a request in " +
                          self.name,
                          exc_info=True)
                # indicate completion with exception
                self.emitter.emit(
                    Message('mycroft.skill.handler.complete',
                            data={
                                'handler': name,
                                'exception': e.message
                            }))
            # Indicate that the skill handler has completed
            self.emitter.emit(
                Message('mycroft.skill.handler.complete',
                        data={'handler': name}))