Beispiel #1
0
 def __init__(self, state, queue, emitter, stt, wakeup_recognizer,
              wakeword_recognizer):
     super(AudioConsumer, self).__init__()
     self.daemon = True
     self.queue = queue
     self.state = state
     self.emitter = emitter
     self.stt = stt
     self.wakeup_recognizer = wakeup_recognizer
     self.wakeword_recognizer = wakeword_recognizer
     self.metrics = MetricsAggregator()
Beispiel #2
0
 def __init__(self, state, queue, emitter, wakeup_recognizer,
              mycroft_recognizer, remote_recognizer):
     threading.Thread.__init__(self)
     self.daemon = True
     self.queue = queue
     self.state = state
     self.emitter = emitter
     self.wakeup_recognizer = wakeup_recognizer
     self.mycroft_recognizer = mycroft_recognizer
     self.remote_recognizer = remote_recognizer
     self.metrics = MetricsAggregator()
Beispiel #3
0
 def __init__(self, state, queue, emitter, stt, wakeup_recognizer,
              wakeword_recognizer):
     super(AudioConsumer, self).__init__()
     self.daemon = True
     self.queue = queue
     self.state = state
     self.emitter = emitter
     self.stt = stt
     self.wakeup_recognizer = wakeup_recognizer
     self.wakeword_recognizer = wakeword_recognizer
     self.metrics = MetricsAggregator()
     self.config = ConfigurationManager.get()
     emitter.on("recognizer_loop:hotword", self.set_word)
Beispiel #4
0
 def __init__(self, state, queue, emitter, wakeup_recognizer,
              wakeword_recognizer, wrapped_remote_recognizer,
              wakeup_prefixes, wakeup_words):
     threading.Thread.__init__(self)
     self.daemon = True
     self.queue = queue
     self.state = state
     self.emitter = emitter
     self.wakeup_recognizer = wakeup_recognizer
     self.ww_recognizer = wakeword_recognizer
     self.wrapped_remote_recognizer = wrapped_remote_recognizer
     self.wakeup_prefixes = wakeup_prefixes
     self.wakeup_words = wakeup_words
     self.metrics = MetricsAggregator()
Beispiel #5
0
 def __init__(self, state, queue, emitter, wakeup_recognizer,
              mycroft_recognizer, remote_recognizer):
     threading.Thread.__init__(self)
     self.daemon = True
     self.queue = queue
     self.state = state
     self.emitter = emitter
     self.wakeup_recognizer = wakeup_recognizer
     self.mycroft_recognizer = mycroft_recognizer
     self.remote_recognizer = remote_recognizer
     self.metrics = MetricsAggregator()
Beispiel #6
0
 def __init__(self, state, queue, emitter, stt,
              wakeup_recognizer, wakeword_recognizer):
     super(AudioConsumer, self).__init__()
     self.daemon = True
     self.queue = queue
     self.state = state
     self.emitter = emitter
     self.stt = stt
     self.wakeup_recognizer = wakeup_recognizer
     self.wakeword_recognizer = wakeword_recognizer
     self.metrics = MetricsAggregator()
Beispiel #7
0
 def __init__(self, state, queue, emitter, wakeup_recognizer, wakeword_recognizer,
              wrapped_remote_recognizer, wakeup_prefixes, wakeup_words):
     threading.Thread.__init__(self)
     self.daemon = True
     self.queue = queue
     self.state = state
     self.emitter = emitter
     self.wakeup_recognizer = wakeup_recognizer
     self.ww_recognizer = wakeword_recognizer
     self.wrapped_remote_recognizer = wrapped_remote_recognizer
     self.wakeup_prefixes = wakeup_prefixes
     self.wakeup_words = wakeup_words
     self.metrics = MetricsAggregator()
Beispiel #8
0
class AudioConsumer(Thread):
    """
    AudioConsumer
    Consumes AudioData chunks off the queue
    """

    # In seconds, the minimum audio size to be sent to remote STT
    MIN_AUDIO_SIZE = 0.5

    def __init__(self, state, queue, emitter, stt,
                 wakeup_recognizer, mycroft_recognizer):
        super(AudioConsumer, self).__init__()
        self.daemon = True
        self.queue = queue
        self.state = state
        self.emitter = emitter
        self.stt = stt
        self.wakeup_recognizer = wakeup_recognizer
        self.mycroft_recognizer = mycroft_recognizer
        self.metrics = MetricsAggregator()

    def run(self):
        while self.state.running:
            self.read()

    def read(self):
        audio = self.queue.get()

        if self.state.sleeping:
            self.wake_up(audio)
        else:
            self.process(audio)

    # TODO: Localization
    def wake_up(self, audio):
        if self.wakeup_recognizer.is_recognized(audio.frame_data,
                                                self.metrics):
            SessionManager.touch()
            self.state.sleeping = False
            self.__speak("I'm awake.")
            self.metrics.increment("mycroft.wakeup")

    @staticmethod
    def _audio_length(audio):
        return float(len(audio.frame_data)) / (
            audio.sample_rate * audio.sample_width)

    # TODO: Localization
    def process(self, audio):
        SessionManager.touch()
        payload = {
            'utterance': self.mycroft_recognizer.key_phrase,
            'session': SessionManager.get().session_id,
        }
        self.emitter.emit("recognizer_loop:wakeword", payload)

        if self._audio_length(audio) < self.MIN_AUDIO_SIZE:
            LOG.warn("Audio too short to be processed")
        elif connected():
            self.transcribe(audio)
        else:
            self.__speak("Mycroft seems not to be connected to the Internet")

    def transcribe(self, audio):
        text = None
        try:
            text = self.stt.execute(audio).lower().strip()
            LOG.debug("STT: " + text)
        except sr.RequestError as e:
            LOG.error("Could not request Speech Recognition {0}".format(e))
        except HTTPError as e:
            if e.response.status_code == 401:
                text = "pair my device"
                LOG.warn("Access Denied at mycroft.ai")
        except Exception as e:
            LOG.error(e)
            LOG.error("Speech Recognition could not understand audio")
            self.__speak("Sorry, I didn't catch that")
        if text:
            payload = {
                'utterances': [text],
                'session': SessionManager.get().session_id
            }
            self.emitter.emit("recognizer_loop:utterance", payload)
            self.metrics.attr('utterances', [text])

    def __speak(self, utterance):
        payload = {
            'utterance': utterance,
            'session': SessionManager.get().session_id
        }
        self.emitter.emit("speak", Message("speak", payload))
class AudioConsumer(threading.Thread):
    """
    AudioConsumer
    Consumes AudioData chunks off the queue
    """

    # In seconds, the minimum audio size to be sent to remote STT
    MIN_AUDIO_SIZE = 0.5

    def __init__(self, state, queue, emitter, wakeup_recognizer,
                 mycroft_recognizer, remote_recognizer):
        threading.Thread.__init__(self)
        self.daemon = True
        self.queue = queue
        self.state = state
        self.emitter = emitter
        self.wakeup_recognizer = wakeup_recognizer
        self.mycroft_recognizer = mycroft_recognizer
        self.remote_recognizer = remote_recognizer
        self.metrics = MetricsAggregator()

    def run(self):
        while self.state.running:
            self.read_audio()

    @staticmethod
    def _audio_length(audio):
        return float(len(audio.frame_data)) / (
            audio.sample_rate * audio.sample_width)

    def read_audio(self):
        audio_data = self.queue.get()

        if self.state.sleeping:
            self.try_wake_up(audio_data)
        else:
            self.process_audio(audio_data)

    def try_wake_up(self, audio):
        if self.wakeup_recognizer.is_recognized(audio.frame_data,
                                                self.metrics):
            SessionManager.touch()
            self.state.sleeping = False
            self.__speak("I'm awake.")  # TODO: Localization
            self.metrics.increment("mycroft.wakeup")

    def process_audio(self, audio):
        try:
            self.transcribe([audio])
        except sr.UnknownValueError:  # TODO: Localization
            logger.warn("Speech Recognition could not understand audio")
            self.__speak("Sorry, I didn't catch that.")
	    bus = dbus.SessionBus()
	    remote_object = bus.get_object("org.gnome.Shell","/com/mycroftaignome/MycroftGnomeResult")  
            setText = remote_object.setText("Sorry, I didn't catch that.", dbus_interface = "com.mycroftaignome.MycroftAiGnomeBox")	

    def __speak(self, utterance):
        payload = {
            'utterance': utterance,
            'session': SessionManager.get().session_id
        }
        self.emitter.emit("speak", Message("speak", metadata=payload))

    def _create_remote_stt_runnable(self, audio, utterances):
        def runnable():
            try:
                text = self.remote_recognizer.transcribe(
                    audio, metrics=self.metrics).lower()
            except sr.UnknownValueError:
                pass
            except sr.RequestError as e:
                logger.error(
                    "Could not request results from Speech Recognition "
                    "service; {0}".format(e))
            except CerberusAccessDenied as e:
                logger.error("AccessDenied from Cerberus proxy.")
                self.__speak(
                    "Your device is not registered yet. To start pairing, "
                    "login at cerberus dot mycroft dot A.I")
                utterances.append("pair my device")
		bus = dbus.SessionBus()
		remote_object = bus.get_object("org.gnome.Shell","/com/mycroftaignome/MycroftGnomeResult")  
		getvoiceQuery = remote_object.getvoiceQuery("Your device is not registered yet. To start pairing, login at cerberus.mycroft.ai", dbus_interface = "com.mycroftaignome.MycroftAiGnomeBox")
		url = 'http://cerberus.mycroft.ai'
		webbrowser.open(url)
            except Exception as e:
                logger.error("Unexpected exception: {0}".format(e))
            else:
                logger.debug("STT: " + text)
                if text.strip() != '':
                    utterances.append(text)
		bus = dbus.SessionBus()
		remote_object = bus.get_object("org.gnome.Shell","/com/mycroftaignome/MycroftGnomeResult")  
		getvoiceQuery = remote_object.getvoiceQuery(text, dbus_interface = "com.mycroftaignome.MycroftAiGnomeBox")
        return runnable

    def transcribe(self, audio_segments):
        utterances = []
        threads = []
        for audio in audio_segments:
            if self._audio_length(audio) < self.MIN_AUDIO_SIZE:
                logger.debug("Audio too short to send to STT")
                continue

            target = self._create_remote_stt_runnable(audio, utterances)
            t = threading.Thread(target=target)
            t.start()
            threads.append(t)

        for thread in threads:
            thread.join()
        if len(utterances) > 0:
            payload = {
                'utterances': utterances,
                'session': SessionManager.get().session_id
            }
            self.emitter.emit("recognizer_loop:utterance", payload)
            self.metrics.attr('utterances', utterances)
        else:
            raise sr.UnknownValueError
Beispiel #10
0
class AudioConsumer(threading.Thread):
    """
    AudioConsumer
    Consumes AudioData chunks off the queue
    """

    # In seconds, the minimum audio size to be sent to remote STT
    MIN_AUDIO_SIZE = 0.5

    def __init__(self, state, queue, emitter, wakeup_recognizer,
                 mycroft_recognizer, remote_recognizer):
        threading.Thread.__init__(self)
        self.daemon = True
        self.queue = queue
        self.state = state
        self.emitter = emitter
        self.wakeup_recognizer = wakeup_recognizer
        self.mycroft_recognizer = mycroft_recognizer
        self.remote_recognizer = remote_recognizer
        self.metrics = MetricsAggregator()

    def run(self):
        while self.state.running:
            self.read_audio()

    @staticmethod
    def _audio_length(audio):
        return float(len(
            audio.frame_data)) / (audio.sample_rate * audio.sample_width)

    def read_audio(self):
        timer = Stopwatch()
        audio = self.queue.get()
        self.metrics.timer("mycroft.recognizer.audio.length_s",
                           self._audio_length(audio))
        self.queue.task_done()
        timer.start()

        if self.state.sleeping:
            self.process_wake_up(audio)
        elif self.state.skip_wakeword:
            self.process_skip_wake_word(audio)
        else:
            self.process_wake_word(audio, timer)

        self.metrics.flush()

    def process_wake_up(self, audio):
        if self.wakeup_recognizer.is_recognized(audio.frame_data,
                                                self.metrics):
            SessionManager.touch()
            self.state.sleeping = False
            self.__speak("I'm awake.")  # TODO: Localization
            self.metrics.increment("mycroft.wakeup")

    def process_wake_word(self, audio, timer):
        hyp = self.mycroft_recognizer.transcribe(audio.frame_data,
                                                 self.metrics)

        if self.mycroft_recognizer.contains(hyp):
            extractor = WordExtractor(audio, self.mycroft_recognizer,
                                      self.metrics)
            timer.lap()
            extractor.calculate_range()
            self.metrics.timer("mycroft.recognizer.extractor.time_s",
                               timer.lap())
            audio_before = extractor.get_audio_data_before()
            self.metrics.timer("mycroft.recognizer.audio_extracted.length_s",
                               self._audio_length(audio_before))
            audio_after = extractor.get_audio_data_after()
            self.metrics.timer("mycroft.recognizer.audio_extracted.length_s",
                               self._audio_length(audio_after))

            SessionManager.touch()
            payload = {
                'utterance': hyp.hypstr,
                'session': SessionManager.get().session_id,
                'pos_begin': extractor.begin,
                'pos_end': extractor.end
            }
            self.emitter.emit("recognizer_loop:wakeword", payload)

            try:
                self.transcribe([audio_before, audio_after])
            except sr.UnknownValueError:
                self.__speak("Go ahead")
                self.state.skip_wakeword = True
                self.metrics.increment("mycroft.wakeword")

    def process_skip_wake_word(self, audio):
        SessionManager.touch()
        try:
            self.transcribe([audio])
        except sr.UnknownValueError:
            logger.warn("Speech Recognition could not understand audio")
            self.__speak("Sorry, I didn't catch that.")
            self.metrics.increment("mycroft.recognizer.error")
        self.state.skip_wakeword = False

    def __speak(self, utterance):
        payload = {
            'utterance': utterance,
            'session': SessionManager.get().session_id
        }
        self.emitter.emit("speak", Message("speak", metadata=payload))

    def _create_remote_stt_runnable(self, audio, utterances):
        def runnable():
            try:
                text = self.remote_recognizer.transcribe(
                    audio, metrics=self.metrics).lower()
            except sr.UnknownValueError:
                pass
            except sr.RequestError as e:
                logger.error(
                    "Could not request results from Speech Recognition "
                    "service; {0}".format(e))
            except CerberusAccessDenied as e:
                logger.error("AccessDenied from Cerberus proxy.")
                self.__speak(
                    "Your device is not registered yet. To start pairing, "
                    "login at cerberus dot mycroft dot A.I")
                utterances.append("pair my device")
            except Exception as e:
                logger.error("Unexpected exception: {0}".format(e))
            else:
                logger.debug("STT: " + text)
                if text.strip() != '':
                    utterances.append(text)

        return runnable

    def transcribe(self, audio_segments):
        utterances = []
        threads = []
        for audio in audio_segments:
            if self._audio_length(audio) < self.MIN_AUDIO_SIZE:
                logger.debug("Audio too short to send to STT")
                continue

            target = self._create_remote_stt_runnable(audio, utterances)
            t = threading.Thread(target=target)
            t.start()
            threads.append(t)

        for thread in threads:
            thread.join()
        if len(utterances) > 0:
            payload = {
                'utterances': utterances,
                'session': SessionManager.get().session_id
            }
            self.emitter.emit("recognizer_loop:utterance", payload)
            self.metrics.attr('utterances', utterances)
        else:
            raise sr.UnknownValueError
Beispiel #11
0
class AudioConsumer(Thread):
    """
    AudioConsumer
    Consumes AudioData chunks off the queue
    """

    # In seconds, the minimum audio size to be sent to remote STT
    MIN_AUDIO_SIZE = 0.5

    def __init__(self, state, queue, emitter, stt, wakeup_recognizer,
                 wakeword_recognizer):
        super(AudioConsumer, self).__init__()
        self.daemon = True
        self.queue = queue
        self.state = state
        self.emitter = emitter
        self.stt = stt
        self.wakeup_recognizer = wakeup_recognizer
        self.wakeword_recognizer = wakeword_recognizer
        self.metrics = MetricsAggregator()

    def run(self):
        while self.state.running:
            self.read()

    def read(self):
        try:
            message = self.queue.get(timeout=0.5)
        except Empty:
            return

        if message is None:
            return

        tag, data = message

        if tag == AUDIO_DATA:
            if self.state.sleeping:
                self.wake_up(data)
            else:
                self.process(data)
        elif tag == STREAM_START:
            self.stt.stream_start()
        elif tag == STREAM_DATA:
            self.stt.stream_data(data)
        elif tag == STREAM_STOP:
            self.stt.stream_stop()
        else:
            LOG.error("Unknown audio queue type %r" % message)

    # TODO: Localization
    def wake_up(self, audio):
        if self.wakeup_recognizer.found_wake_word(audio.frame_data):
            SessionManager.touch()
            self.state.sleeping = False
            self.emitter.emit('recognizer_loop:awoken')
            self.metrics.increment("mycroft.wakeup")

    @staticmethod
    def _audio_length(audio):
        return float(len(
            audio.frame_data)) / (audio.sample_rate * audio.sample_width)

    # TODO: Localization
    def process(self, audio):
        SessionManager.touch()
        payload = {
            'utterance': self.wakeword_recognizer.key_phrase,
            'session': SessionManager.get().session_id,
        }
        self.emitter.emit("recognizer_loop:wakeword", payload)

        if self._audio_length(audio) < self.MIN_AUDIO_SIZE:
            LOG.warning("Audio too short to be processed")
        else:
            stopwatch = Stopwatch()
            with stopwatch:
                transcription = self.transcribe(audio)
            if transcription:
                ident = str(stopwatch.timestamp) + str(hash(transcription))
                # STT succeeded, send the transcribed speech on for processing
                payload = {
                    'utterances': [transcription],
                    'lang': self.stt.lang,
                    'session': SessionManager.get().session_id,
                    'ident': ident
                }
                self.emitter.emit("recognizer_loop:utterance", payload)
                self.metrics.attr('utterances', [transcription])
            else:
                ident = str(stopwatch.timestamp)
            # Report timing metrics
            report_timing(ident, 'stt', stopwatch, {
                'transcription': transcription,
                'stt': self.stt.__class__.__name__
            })

    def transcribe(self, audio):
        def send_unknown_intent():
            """ Send message that nothing was transcribed. """
            self.emitter.emit('recognizer_loop:speech.recognition.unknown')

        try:
            # Invoke the STT engine on the audio clip
            text = self.stt.execute(audio)
            if text is not None:
                text = text.lower().strip()
                LOG.debug("STT: " + text)
            else:
                send_unknown_intent()
                LOG.info('no words were transcribed')
            return text
        except sr.RequestError as e:
            LOG.error("Could not request Speech Recognition {0}".format(e))
        except ConnectionError as e:
            LOG.error("Connection Error: {0}".format(e))

            self.emitter.emit("recognizer_loop:no_internet")
        except HTTPError as e:
            if e.response.status_code == 401:
                LOG.warning("Access Denied at mycroft.ai")
                return "pair my device"  # phrase to start the pairing process
            else:
                LOG.error(e.__class__.__name__ + ': ' + str(e))
        except RequestException as e:
            LOG.error(e.__class__.__name__ + ': ' + str(e))
        except Exception as e:
            send_unknown_intent()
            LOG.error(e)
            LOG.error("Speech Recognition could not understand audio")
            return None

        if connected():
            dialog_name = 'backend.down'
        else:
            dialog_name = 'not connected to the internet'
        self.emitter.emit('speak', {'utterance': dialog.get(dialog_name)})

    def __speak(self, utterance):
        payload = {
            'utterance': utterance,
            'session': SessionManager.get().session_id
        }
        self.emitter.emit("speak", payload)
Beispiel #12
0
class AudioConsumer(Thread):
    """
    AudioConsumer
    Consumes AudioData chunks off the queue
    """

    # In seconds, the minimum audio size to be sent to remote STT
    MIN_AUDIO_SIZE = 0.5

    def __init__(self, state, queue, emitter, stt, wakeup_recognizer,
                 wakeword_recognizer):
        super(AudioConsumer, self).__init__()
        self.daemon = True
        self.queue = queue
        self.state = state
        self.emitter = emitter
        self.stt = stt
        self.wakeup_recognizer = wakeup_recognizer
        self.wakeword_recognizer = wakeword_recognizer
        self.metrics = MetricsAggregator()

    def run(self):
        while self.state.running:
            self.read()

    def read(self):
        try:
            audio = self.queue.get(timeout=0.5)
        except Empty:
            return

        if audio is None:
            return

        if self.state.sleeping:
            self.wake_up(audio)
        else:
            self.process(audio)

    # TODO: Localization
    def wake_up(self, audio):
        if self.wakeup_recognizer.found_wake_word(audio.frame_data):
            SessionManager.touch()
            self.state.sleeping = False
            self.emitter.emit('recognizer_loop:awoken')
            self.metrics.increment("mycroft.wakeup")

    @staticmethod
    def _audio_length(audio):
        return float(len(
            audio.frame_data)) / (audio.sample_rate * audio.sample_width)

    # TODO: Localization
    def process(self, audio):
        SessionManager.touch()
        payload = {
            'utterance': self.wakeword_recognizer.key_phrase,
            'session': SessionManager.get().session_id,
        }
        self.emitter.emit("recognizer_loop:wakeword", payload)

        if self._audio_length(audio) < self.MIN_AUDIO_SIZE:
            LOG.warning("Audio too short to be processed")
        else:
            self.transcribe(audio)

    def transcribe(self, audio):
        text = None
        try:
            # Invoke the STT engine on the audio clip
            text = self.stt.execute(audio).lower().strip()
            LOG.debug("STT: " + text)
        except sr.RequestError as e:
            LOG.error("Could not request Speech Recognition {0}".format(e))
        except ConnectionError as e:
            LOG.error("Connection Error: {0}".format(e))
            self.emitter.emit("recognizer_loop:no_internet")
        except HTTPError as e:
            if e.response.status_code == 401:
                text = "pair my device"  # phrase to start the pairing process
                LOG.warning("Access Denied at mycroft.ai")
        except Exception as e:
            self.emitter.emit('recognizer_loop:speech.recognition.unknown')
            LOG.error(e)
            LOG.error("Speech Recognition could not understand audio")
        if text:
            # STT succeeded, send the transcribed speech on for processing
            payload = {
                'utterances': [text],
                'lang': self.stt.lang,
                'session': SessionManager.get().session_id
            }
            self.emitter.emit("recognizer_loop:utterance", payload)
            self.metrics.attr('utterances', [text])

    def __speak(self, utterance):
        payload = {
            'utterance': utterance,
            'session': SessionManager.get().session_id
        }
        self.emitter.emit("speak", payload)
Beispiel #13
0
class AudioConsumer(threading.Thread):
    """
    AudioConsumer
    Consumes AudioData chunks off the queue
    """

    # In seconds, the minimum audio size to be sent to remote STT
    MIN_AUDIO_SIZE = 0.5

    def __init__(self, state, queue, emitter, wakeup_recognizer,
                 mycroft_recognizer, remote_recognizer):
        threading.Thread.__init__(self)
        self.daemon = True
        self.queue = queue
        self.state = state
        self.emitter = emitter
        self.wakeup_recognizer = wakeup_recognizer
        self.mycroft_recognizer = mycroft_recognizer
        self.remote_recognizer = remote_recognizer
        self.metrics = MetricsAggregator()

    def run(self):
        while self.state.running:
            self.read_audio()

    @staticmethod
    def _audio_length(audio):
        return float(len(audio.frame_data)) / (
            audio.sample_rate * audio.sample_width)

    def read_audio(self):
        timer = Stopwatch()
        audio = self.queue.get()
        self.metrics.timer("mycroft.recognizer.audio.length_s",
                           self._audio_length(audio))
        self.queue.task_done()
        timer.start()

        if self.state.sleeping:
            self.process_wake_up(audio)
        elif self.state.skip_wakeword:
            self.process_skip_wake_word(audio)
        else:
            self.process_wake_word(audio, timer)

        self.metrics.flush()

    def process_wake_up(self, audio):
        if self.wakeup_recognizer.is_recognized(audio.frame_data,
                                                self.metrics):
            SessionManager.touch()
            self.state.sleeping = False
            self.__speak("I'm awake.")  # TODO: Localization
            self.metrics.increment("mycroft.wakeup")

    def process_wake_word(self, audio, timer):
        hyp = self.mycroft_recognizer.transcribe(audio.frame_data,
                                                 self.metrics)

        if self.mycroft_recognizer.contains(hyp):
            extractor = WordExtractor(audio, self.mycroft_recognizer,
                                      self.metrics)
            timer.lap()
            extractor.calculate_range()
            self.metrics.timer("mycroft.recognizer.extractor.time_s",
                               timer.lap())
            audio_before = extractor.get_audio_data_before()
            self.metrics.timer("mycroft.recognizer.audio_extracted.length_s",
                               self._audio_length(audio_before))
            audio_after = extractor.get_audio_data_after()
            self.metrics.timer("mycroft.recognizer.audio_extracted.length_s",
                               self._audio_length(audio_after))

            SessionManager.touch()
            payload = {
                'utterance': hyp.hypstr,
                'session': SessionManager.get().session_id,
                'pos_begin': extractor.begin,
                'pos_end': extractor.end
            }
            self.emitter.emit("recognizer_loop:wakeword", payload)

            try:
                self.transcribe([audio_before, audio_after])
            except sr.UnknownValueError:
                self.__speak("Go ahead")
                self.state.skip_wakeword = True
                self.metrics.increment("mycroft.wakeword")

    def process_skip_wake_word(self, audio):
        SessionManager.touch()
        try:
            self.transcribe([audio])
        except sr.UnknownValueError:
            logger.warn("Speech Recognition could not understand audio")
            self.__speak("Sorry, I didn't catch that.")
            self.metrics.increment("mycroft.recognizer.error")
        self.state.skip_wakeword = False

    def __speak(self, utterance):
        payload = {
            'utterance': utterance,
            'session': SessionManager.get().session_id
        }
        self.emitter.emit("speak", Message("speak", metadata=payload))

    def _create_remote_stt_runnable(self, audio, utterances):
        def runnable():
            try:
                text = self.remote_recognizer.transcribe(
                        audio, metrics=self.metrics).lower()
            except sr.UnknownValueError:
                pass
            except sr.RequestError as e:
                logger.error(
                        "Could not request results from Speech Recognition "
                        "service; {0}".format(e))
            except CerberusAccessDenied as e:
                logger.error("AccessDenied from Cerberus proxy.")
                self.__speak(
                        "Your device is not registered yet. To start pairing, "
                        "login at cerberus dot mycroft dot A.I")
                utterances.append("pair my device")
            except Exception as e:
                logger.error("Unexpected exception: {0}".format(e))
            else:
                logger.debug("STT: " + text)
                if text.strip() != '':
                    utterances.append(text)

        return runnable

    def transcribe(self, audio_segments):
        utterances = []
        threads = []
        for audio in audio_segments:
            if self._audio_length(audio) < self.MIN_AUDIO_SIZE:
                logger.debug("Audio too short to send to STT")
                continue

            target = self._create_remote_stt_runnable(audio, utterances)
            t = threading.Thread(target=target)
            t.start()
            threads.append(t)

        for thread in threads:
            thread.join()
        if len(utterances) > 0:
            payload = {
                'utterances': utterances,
                'session': SessionManager.get().session_id
            }
            self.emitter.emit("recognizer_loop:utterance", payload)
            self.metrics.attr('utterances', utterances)
        else:
            raise sr.UnknownValueError
Beispiel #14
0
class AudioConsumer(Thread):
    """
    AudioConsumer
    Consumes AudioData chunks off the queue
    """

    # In seconds, the minimum audio size to be sent to remote STT
    MIN_AUDIO_SIZE = 0.5

    def __init__(self, state, queue, emitter, stt,
                 wakeup_recognizer, mycroft_recognizer):
        super(AudioConsumer, self).__init__()
        self.daemon = True
        self.queue = queue
        self.state = state
        self.emitter = emitter
        self.stt = stt
        self.wakeup_recognizer = wakeup_recognizer
        self.mycroft_recognizer = mycroft_recognizer
        self.metrics = MetricsAggregator()

    def run(self):
        while self.state.running:
            self.read()

    def read(self):
        audio = self.queue.get()

        if self.state.sleeping:
            self.wake_up(audio)
        else:
            self.process(audio)

    # TODO: Localization
    def wake_up(self, audio):
        if self.wakeup_recognizer.is_recognized(audio.frame_data,
                                                self.metrics):
            SessionManager.touch()
            self.state.sleeping = False
            lines = ["I'm awake.",
                     "System rebooted.",
                     "All systems check. I am now online.",
                     "Waking up."]
            self.__speak(choice(lines))
            self.metrics.increment("mycroft.wakeup")

    @staticmethod
    def _audio_length(audio):
        return float(len(audio.frame_data)) / (
            audio.sample_rate * audio.sample_width)

    # TODO: Localization
    def process(self, audio):
        SessionManager.touch()
        payload = {
            'utterance': self.mycroft_recognizer.key_phrase,
            'session': SessionManager.get().session_id,
        }
        self.emitter.emit("recognizer_loop:wakeword", payload)

        if self._audio_length(audio) < self.MIN_AUDIO_SIZE:
            LOG.warn("Audio too short to be processed")
        else:
            self.transcribe(audio)

    def transcribe(self, audio):
        text = None
        try:
            # Invoke the STT engine on the audio clip
            text = self.stt.execute(audio).lower().strip()
            LOG.debug("STT: " + text)
        except sr.RequestError as e:
            LOG.error("Could not request Speech Recognition {0}".format(e))
        except ConnectionError as e:
            LOG.error("Connection Error: {0}".format(e))
            self.__speak("Intelora seems not to be connected to the Internet.")
        except HTTPError as e:
            if e.response.status_code == 401:
                text = "pair my device"
                LOG.warn("Access Denied at Mycroft API")
        except Exception as e:
            LOG.error(e)
            LOG.error("Speech Recognition could not understand audio")
            lines = ["Sorry, I didn't catch that.",
                     "Sorry, I didn't hear you clearly.",
                     "Can you repeat what you said, please?",
                     "Can you please say that again?"]
            self.__speak(choice(lines))
        if text:
            # STT succeeded, send the transcribed speech on for processing
            payload = {
                'utterances': [text],
                'lang': self.stt.lang,
                'session': SessionManager.get().session_id
            }
            self.emitter.emit("recognizer_loop:utterance", payload)
            self.metrics.attr('utterances', [text])

    def __speak(self, utterance):
        payload = {
            'utterance': utterance,
            'session': SessionManager.get().session_id
        }
        self.emitter.emit("speak", Message("speak", payload))
Beispiel #15
0
class AudioConsumer(Thread):
    """
    AudioConsumer
    Consumes AudioData chunks off the queue
    """

    # In seconds, the minimum audio size to be sent to remote STT
    MIN_AUDIO_SIZE = 0.5

    def __init__(self, state, queue, emitter, stt, wakeup_recognizer,
                 wakeword_recognizer):
        super(AudioConsumer, self).__init__()
        self.daemon = True
        self.queue = queue
        self.state = state
        self.emitter = emitter
        self.stt = stt
        self.wakeup_recognizer = wakeup_recognizer
        self.wakeword_recognizer = wakeword_recognizer
        self.metrics = MetricsAggregator()
        self.config = ConfigurationManager.get()
        emitter.on("recognizer_loop:hotword", self.set_word)

    def set_word(self, event):
        if event.get("start_listening"):
            # set new hot word
            self.hotword = event.get("hotword",
                                     self.wakeword_recognizer.key_phrase)

    def run(self):
        while self.state.running:
            self.read()

    def read(self):
        try:
            audio = self.queue.get(timeout=0.5)
        except Empty:
            return

        if audio is None:
            return

        if self.state.sleeping:
            self.wake_up(audio)
        else:
            self.process(audio)

    # TODO: Localization
    def wake_up(self, audio):
        if self.wakeup_recognizer.found_wake_word(audio.frame_data):
            SessionManager.touch()
            self.state.sleeping = False
            self.__speak(mycroft.dialog.get("i am awake", self.stt.lang))
            self.metrics.increment("mycroft.wakeup")

    @staticmethod
    def _audio_length(audio):
        return float(len(
            audio.frame_data)) / (audio.sample_rate * audio.sample_width)

    # TODO: Localization
    def process(self, audio):
        SessionManager.touch()
        """
	if self.hotword:
            word = self.hotword
            self.hotword = None
        else:
	"""
        word = self.wakeword_recognizer.key_phrase
        payload = {
            'utterance': word,
            'session': SessionManager.get().session_id,
        }
        self.emitter.emit("recognizer_loop:wakeword", payload)

        if self._audio_length(audio) < self.MIN_AUDIO_SIZE:
            LOG.warning("Audio too short to be processed")
        else:
            self.transcribe(audio)

    def transcribe(self, audio):
        LOG.debug("Transcribing audio")
        text = None
        try:
            # Invoke the STT engine on the audio clip
            text = self.stt.execute(audio).lower().strip()
            LOG.debug("STT: --------->" + text)
        except sr.RequestError as e:
            LOG.error("Could not request Speech Recognition {0}".format(e))
        except ConnectionError as e:
            LOG.error("Connection Error: {0}".format(e))
            self.emitter.emit("recognizer_loop:no_internet")
        except HTTPError as e:
            if e.response.status_code == 401:
                text = "pair my device"  # phrase to start the pairing process
                LOG.warning("Access Denied at mycroft.ai")
        except Exception as e:
            LOG.error(e)
            LOG.error("Speech Recognition could not understand audio")
        if text:
            # STT succeeded, send the transcribed speech on for processing
            LOG.error("maine samjha tune bola " + text)
            tellMeMore = "tell me more"
            if (text == tellMeMore):
                #hotWordListener = self.finalHotWord
                LOG.info("found tell me more in listener****")
                #text = text + " about " + hotWordListener
                with open("hotWordFile.txt", "rw+") as hotWordTemp:
                    prevHotWord = hotWordTemp.read()
                    hotWordTemp.truncate(0)
                    text = "tell me about " + prevHotWord
                    LOG.error(" naya wala maine samjha tune bola " + text)

            payload = {
                'utterances': [text],
                'lang': self.stt.lang,
                'session': SessionManager.get().session_id
            }
            self.emitter.emit("recognizer_loop:utterance", payload)
            self.metrics.attr('utterances', [text])

    def __speak(self, utterance):
        payload = {
            'utterance': utterance,
            'session': SessionManager.get().session_id
        }
        self.emitter.emit("speak", payload)
Beispiel #16
0
class AudioConsumer(Thread):
    """
    AudioConsumer
    Consumes AudioData chunks off the queue
    """

    # In seconds, the minimum audio size to be sent to remote STT
    MIN_AUDIO_SIZE = 0.5

    def __init__(self, state, queue, emitter, stt, wakeup_recognizer,
                 mycroft_recognizer):
        super(AudioConsumer, self).__init__()
        self.daemon = True
        self.queue = queue
        self.state = state
        self.emitter = emitter
        self.stt = stt
        self.wakeup_recognizer = wakeup_recognizer
        self.mycroft_recognizer = mycroft_recognizer
        self.metrics = MetricsAggregator()

    def run(self):
        while self.state.running:
            self.read()

    def read(self):
        audio = self.queue.get()

        if self.state.sleeping:
            self.wake_up(audio)
        else:
            self.process(audio)

    # TODO: Localization
    def wake_up(self, audio):
        if self.wakeup_recognizer.is_recognized(audio.frame_data,
                                                self.metrics):
            SessionManager.touch()
            self.state.sleeping = False
            self.__speak("I'm awake.")
            self.metrics.increment("mycroft.wakeup")

    @staticmethod
    def _audio_length(audio):
        return float(len(
            audio.frame_data)) / (audio.sample_rate * audio.sample_width)

    # TODO: Localization
    def process(self, audio):
        SessionManager.touch()
        payload = {
            'utterance': self.mycroft_recognizer.key_phrase,
            'session': SessionManager.get().session_id,
        }
        self.emitter.emit("recognizer_loop:wakeword", payload)

        if self._audio_length(audio) < self.MIN_AUDIO_SIZE:
            LOG.warn("Audio too short to be processed")
            self.emitter.emit("recognizer_loop:tooshort", {})
        elif connected():
            self.transcribe(audio)
        else:
            self.__speak("Mycroft seems not to be connected to the Internet")

    def transcribe(self, audio):
        text = None
        try:
            print("aud: " + str((audio)))
            initial = self.stt.execute(audio)
            print("initial: " + initial)
            text = initial.lower().strip()
            LOG.debug("STT: " + text)
        except sr.RequestError as e:
            LOG.error("Could not request Speech Recognition {0}".format(e))
        except HTTPError as e:
            if e.response.status_code == 401:
                text = "pair my device"
                LOG.warn("Access Denied at mycroft.ai")
        except Exception as e:
            LOG.error(e)
            LOG.error("Speech Recognition could not understand audio")
            self.__speak("Sorry, I didn't catch that")
        if text:
            payload = {
                'utterances': [text],
                'session': SessionManager.get().session_id
            }
            self.emitter.emit("recognizer_loop:utterance", payload)
            self.metrics.attr('utterances', [text])

    def __speak(self, utterance):
        print "going to speak " + utterance
        payload = {
            'utterance': utterance,
            'session': SessionManager.get().session_id
        }
        self.emitter.emit("speak", Message("speak", payload))
Beispiel #17
0
class AudioConsumer(Thread):
    """
    AudioConsumer
    Consumes AudioData chunks off the queue
    """

    # In seconds, the minimum audio size to be sent to remote STT
    MIN_AUDIO_SIZE = 0.5

    def __init__(self, state, queue, emitter, stt,
                 wakeup_recognizer, wakeword_recognizer):
        super(AudioConsumer, self).__init__()
        self.daemon = True
        self.queue = queue
        self.state = state
        self.emitter = emitter
        self.stt = stt
        self.wakeup_recognizer = wakeup_recognizer
        self.wakeword_recognizer = wakeword_recognizer
        self.metrics = MetricsAggregator()

    def run(self):
        while self.state.running:
            self.read()

    def read(self):
        try:
            audio = self.queue.get(timeout=0.5)
        except Empty:
            return

        if audio is None:
            return

        if self.state.sleeping:
            self.wake_up(audio)
        else:
            self.process(audio)

    # TODO: Localization
    def wake_up(self, audio):
        if self.wakeup_recognizer.found_wake_word(audio.frame_data):
            SessionManager.touch()
            self.state.sleeping = False
            self.__speak(mycroft.dialog.get("i am awake", self.stt.lang))
            self.metrics.increment("mycroft.wakeup")

    @staticmethod
    def _audio_length(audio):
        return float(len(audio.frame_data)) / (
            audio.sample_rate * audio.sample_width)

    # TODO: Localization
    def process(self, audio):
        SessionManager.touch()
        payload = {
            'utterance': self.wakeword_recognizer.key_phrase,
            'session': SessionManager.get().session_id,
        }
        self.emitter.emit("recognizer_loop:wakeword", payload)

        if self._audio_length(audio) < self.MIN_AUDIO_SIZE:
            LOG.warning("Audio too short to be processed")
        else:
            self.transcribe(audio)

    def transcribe(self, audio):
        text = None
        try:
            # Invoke the STT engine on the audio clip
            text = self.stt.execute(audio).lower().strip()
            LOG.debug("STT: " + text)
        except sr.RequestError as e:
            LOG.error("Could not request Speech Recognition {0}".format(e))
        except ConnectionError as e:
            LOG.error("Connection Error: {0}".format(e))
            self.emitter.emit("recognizer_loop:no_internet")
        except HTTPError as e:
            if e.response.status_code == 401:
                text = "pair my device"  # phrase to start the pairing process
                LOG.warning("Access Denied at mycroft.ai")
        except Exception as e:
            LOG.error(e)
            LOG.error("Speech Recognition could not understand audio")
        if text:
            # STT succeeded, send the transcribed speech on for processing
            payload = {
                'utterances': [text],
                'lang': self.stt.lang,
                'session': SessionManager.get().session_id
            }
            self.emitter.emit("recognizer_loop:utterance", payload)
            self.metrics.attr('utterances', [text])

    def __speak(self, utterance):
        payload = {
            'utterance': utterance,
            'session': SessionManager.get().session_id
        }
        self.emitter.emit("speak", payload)
Beispiel #18
0
class AudioConsumer(threading.Thread):
    """
    AudioConsumer
    Consumes AudioData chunks off the queue
    """

    # In seconds, the minimum audio size to be sent to remote STT
    MIN_AUDIO_SIZE = 0.5

    def __init__(self, state, queue, emitter, wakeup_recognizer,
                 mycroft_recognizer, remote_recognizer):
        threading.Thread.__init__(self)
        self.daemon = True
        self.queue = queue
        self.state = state
        self.emitter = emitter
        self.wakeup_recognizer = wakeup_recognizer
        self.mycroft_recognizer = mycroft_recognizer
        self.remote_recognizer = remote_recognizer
        self.metrics = MetricsAggregator()

    def run(self):
        while self.state.running:
            self.read_audio()

    @staticmethod
    def _audio_length(audio):
        return float(len(
            audio.frame_data)) / (audio.sample_rate * audio.sample_width)

    def read_audio(self):
        audio_data = self.queue.get()

        if self.state.sleeping:
            self.try_wake_up(audio_data)
        else:
            self.process_audio(audio_data)

    def try_wake_up(self, audio):
        if self.wakeup_recognizer.is_recognized(audio.frame_data,
                                                self.metrics):
            SessionManager.touch()
            self.state.sleeping = False
            self.__speak("I'm awake.")  # TODO: Localization
            self.metrics.increment("mycroft.wakeup")

    def process_audio(self, audio):
        SessionManager.touch()
        payload = {
            'utterance': self.mycroft_recognizer.key_phrase,
            'session': SessionManager.get().session_id,
        }
        self.emitter.emit("recognizer_loop:wakeword", payload)
        try:
            self.transcribe([audio])
        except sr.UnknownValueError:  # TODO: Localization
            logger.warn("Speech Recognition could not understand audio")
            # self.__speak("Sorry, I didn't catch that.")

    def __speak(self, utterance):
        payload = {
            'utterance': utterance,
            'session': SessionManager.get().session_id
        }
        self.emitter.emit("speak", Message("speak", metadata=payload))

    def _create_remote_stt_runnable(self, audio, utterances):
        def runnable():
            try:
                text = self.remote_recognizer.transcribe(
                    audio, metrics=self.metrics).lower()
            except sr.UnknownValueError:
                pass
            except sr.RequestError as e:
                logger.error(
                    "Could not request results from Speech Recognition "
                    "service; {0}".format(e))
            except CerberusAccessDenied as e:
                logger.error("AccessDenied from Cerberus proxy.")
                self.__speak(
                    "Your device is not registered yet. To start pairing, "
                    "browse to cerberus dot mycroft dot A.I")
                utterances.append("pair my device")
            except Exception as e:
                logger.error("Unexpected exception: {0}".format(e))
            else:
                logger.debug("STT: " + text)
                if text.strip() != '':
                    utterances.append(text)

        return runnable

    def transcribe(self, audio_segments):
        utterances = []
        threads = []
        if connected():
            for audio in audio_segments:
                if self._audio_length(audio) < self.MIN_AUDIO_SIZE:
                    logger.debug("Audio too short to send to STT")
                    continue

                target = self._create_remote_stt_runnable(audio, utterances)
                t = threading.Thread(target=target)
                t.start()
                threads.append(t)

            for thread in threads:
                thread.join()
            if len(utterances) > 0:
                payload = {
                    'utterances': utterances,
                    'session': SessionManager.get().session_id
                }
                self.emitter.emit("recognizer_loop:utterance", payload)
                self.metrics.attr('utterances', utterances)
            else:
                raise sr.UnknownValueError
        else:  # TODO: Localization
            # TODO: Enclosure virtualization (might not have a button)
            self.__speak("This device is not connected to the Internet."
                         "Either plug in a network cable or hold the button"
                         " on top for two seconds, then select wifi from the "
                         "menu")
Beispiel #19
0
class AudioConsumer(threading.Thread):
    """
    AudioConsumer
    Consumes AudioData chunks off the queue
    """

    # In seconds, the minimum audio size to be sent to remote STT
    MIN_AUDIO_SIZE = 1.0

    def __init__(self, state, queue, emitter, wakeup_recognizer,
                 wakeword_recognizer, wrapped_remote_recognizer,
                 wakeup_prefixes, wakeup_words):
        threading.Thread.__init__(self)
        self.daemon = True
        self.queue = queue
        self.state = state
        self.emitter = emitter
        self.wakeup_recognizer = wakeup_recognizer
        self.ww_recognizer = wakeword_recognizer
        self.wrapped_remote_recognizer = wrapped_remote_recognizer
        self.wakeup_prefixes = wakeup_prefixes
        self.wakeup_words = wakeup_words
        self.metrics = MetricsAggregator()

    def run(self):
        while self.state.running:
            self.try_consume_audio()

    @staticmethod
    def _audio_length(audio):
        return float(len(
            audio.frame_data)) / (audio.sample_rate * audio.sample_width)

    def try_consume_audio(self):
        timer = Stopwatch()
        hyp = None
        audio = self.queue.get()
        self.metrics.timer("mycroft.recognizer.audio.length_s",
                           self._audio_length(audio))
        self.queue.task_done()
        timer.start()
        if self.state.sleeping:
            hyp = self.wakeup_recognizer.transcribe(audio.get_wav_data(),
                                                    metrics=self.metrics)
            if hyp and hyp.hypstr:
                logger.debug("sleeping recognition: " + hyp.hypstr)
            if hyp and hyp.hypstr.lower().find("wake up") >= 0:
                SessionManager.touch()
                self.state.sleeping = False
                self.__speak("I'm awake.")  # TODO: Localization
                self.metrics.increment("mycroft.wakeup")
        else:
            if not self.state.skip_wakeword:
                hyp = self.ww_recognizer.transcribe(audio.get_wav_data(),
                                                    metrics=self.metrics)

            if hyp and hyp.hypstr.lower().find("mycroft") >= 0:
                extractor = WakewordExtractor(audio, self.ww_recognizer,
                                              self.metrics)
                timer.lap()
                extractor.calculate_range()
                self.metrics.timer("mycroft.recognizer.extractor.time_s",
                                   timer.lap())
                audio_before = extractor.get_audio_data_before()
                self.metrics.timer(
                    "mycroft.recognizer.audio_extracted.length_s",
                    self._audio_length(audio_before))
                audio_after = extractor.get_audio_data_after()
                self.metrics.timer(
                    "mycroft.recognizer.audio_extracted.length_s",
                    self._audio_length(audio_after))

                SessionManager.touch()
                payload = {
                    'utterance': hyp.hypstr,
                    'session': SessionManager.get().session_id,
                    'pos_begin': int(extractor.range.begin),
                    'pos_end': int(extractor.range.end)
                }
                self.emitter.emit("recognizer_loop:wakeword", payload)

                try:
                    self.transcribe([audio_before, audio_after])
                except sr.UnknownValueError:
                    self.__speak("Go ahead")
                    self.state.skip_wakeword = True
                    self.metrics.increment("mycroft.wakeword")

            elif self.state.skip_wakeword:
                SessionManager.touch()
                try:
                    self.transcribe([audio])
                except sr.UnknownValueError:
                    logger.warn(
                        "Speech Recognition could not understand audio")
                    self.__speak("Sorry, I didn't catch that.")
                    self.metrics.increment("mycroft.recognizer.error")
                self.state.skip_wakeword = False
            else:
                self.metrics.clear()
        self.metrics.flush()

    def __speak(self, utterance):
        """
        Speak commands should be asynchronous to avoid filling up the
        portaudio buffer.
        :param utterance:
        :return:
        """
        def target():
            self.emitter.emit(
                "speak",
                Message("speak",
                        metadata={
                            'utterance': utterance,
                            'session': SessionManager.get().session_id
                        }))

        threading.Thread(target=target).start()

    def _create_remote_stt_runnable(self, audio, utterances):
        def runnable():
            try:
                text = self.wrapped_remote_recognizer.transcribe(
                    audio, metrics=self.metrics).lower()
            except sr.UnknownValueError:
                pass
            except sr.RequestError as e:
                logger.error(
                    "Could not request results from Speech Recognition "
                    "service; {0}".format(e))
            except CerberusAccessDenied as e:
                logger.error("AccessDenied from Cerberus proxy.")
                self.__speak(
                    "Your device is not registered yet. To start pairing, "
                    "login at cerberus.mycroft.ai")
                utterances.append("pair my device")
            else:
                logger.debug("STT: " + text)
                if text.strip() != '':
                    utterances.append(text)

        return runnable

    def transcribe(self, audio_segments):
        utterances = []
        threads = []
        for audio in audio_segments:
            if self._audio_length(audio) < self.MIN_AUDIO_SIZE:
                logger.debug("Audio too short to send to STT")
                continue

            target = self._create_remote_stt_runnable(audio, utterances)
            t = threading.Thread(target=target)
            t.start()
            threads.append(t)

        for thread in threads:
            thread.join()
        if len(utterances) > 0:
            payload = {
                'utterances': utterances,
                'session': SessionManager.get().session_id
            }
            self.emitter.emit("recognizer_loop:utterance", payload)
            self.metrics.attr('utterances', utterances)
        else:
            raise sr.UnknownValueError
Beispiel #20
0
class AudioConsumer(threading.Thread):
    """
    AudioConsumer
    Consumes AudioData chunks off the queue
    """

    # In seconds, the minimum audio size to be sent to remote STT
    MIN_AUDIO_SIZE = 1.0

    def __init__(
            self, state, queue, emitter, wakeup_recognizer,
            wakeword_recognizer, wrapped_remote_recognizer, wakeup_prefixes,
            wakeup_words):
        threading.Thread.__init__(self)
        self.daemon = True
        self.queue = queue
        self.state = state
        self.emitter = emitter
        self.wakeup_recognizer = wakeup_recognizer
        self.ww_recognizer = wakeword_recognizer
        self.wrapped_remote_recognizer = wrapped_remote_recognizer
        self.wakeup_prefixes = wakeup_prefixes
        self.wakeup_words = wakeup_words
        self.metrics = MetricsAggregator()

    def run(self):
        while self.state.running:
            self.try_consume_audio()

    @staticmethod
    def _audio_length(audio):
        return float(
            len(audio.frame_data))/(audio.sample_rate*audio.sample_width)

    def try_consume_audio(self):
        timer = Stopwatch()
        hyp = None
        audio = self.queue.get()
        self.metrics.timer(
            "mycroft.recognizer.audio.length_s", self._audio_length(audio))
        self.queue.task_done()
        timer.start()
        if self.state.sleeping:
            hyp = self.wakeup_recognizer.transcribe(
                audio.get_wav_data(), metrics=self.metrics)
            if hyp and hyp.hypstr:
                logger.debug("sleeping recognition: " + hyp.hypstr)
            if hyp and hyp.hypstr.lower().find("wake up") >= 0:
                SessionManager.touch()
                self.state.sleeping = False
                self.__speak("I'm awake.")  # TODO: Localization
                self.metrics.increment("mycroft.wakeup")
        else:
            if not self.state.skip_wakeword:
                hyp = self.ww_recognizer.transcribe(
                    audio.get_wav_data(), metrics=self.metrics)

            if hyp and hyp.hypstr.lower().find("mycroft") >= 0:
                extractor = WakewordExtractor(
                    audio, self.ww_recognizer, self.metrics)
                timer.lap()
                extractor.calculate_range()
                self.metrics.timer(
                    "mycroft.recognizer.extractor.time_s", timer.lap())
                audio_before = extractor.get_audio_data_before()
                self.metrics.timer(
                    "mycroft.recognizer.audio_extracted.length_s",
                    self._audio_length(audio_before))
                audio_after = extractor.get_audio_data_after()
                self.metrics.timer(
                    "mycroft.recognizer.audio_extracted.length_s",
                    self._audio_length(audio_after))

                SessionManager.touch()
                payload = {
                    'utterance': hyp.hypstr,
                    'session': SessionManager.get().session_id,
                    'pos_begin': int(extractor.range.begin),
                    'pos_end': int(extractor.range.end)
                }
                self.emitter.emit("recognizer_loop:wakeword", payload)

                try:
                    self.transcribe([audio_before, audio_after])
                except sr.UnknownValueError:
                    self.__speak("Go ahead")
                    self.state.skip_wakeword = True
                    self.metrics.increment("mycroft.wakeword")

            elif self.state.skip_wakeword:
                SessionManager.touch()
                try:
                    self.transcribe([audio])
                except sr.UnknownValueError:
                    logger.warn(
                        "Speech Recognition could not understand audio")
                    self.__speak("Sorry, I didn't catch that.")
                    self.metrics.increment("mycroft.recognizer.error")
                self.state.skip_wakeword = False
            else:
                self.metrics.clear()
        self.metrics.flush()

    def __speak(self, utterance):
        """
        Speak commands should be asynchronous to avoid filling up the
        portaudio buffer.
        :param utterance:
        :return:
        """
        def target():
            self.emitter.emit(
                "speak",
                Message("speak",
                        metadata={'utterance': utterance,
                                  'session': SessionManager.get().session_id}))

        threading.Thread(target=target).start()

    def _create_remote_stt_runnable(self, audio, utterances):
        def runnable():
            try:
                text = self.wrapped_remote_recognizer.transcribe(
                    audio, metrics=self.metrics).lower()
            except sr.UnknownValueError:
                pass
            except sr.RequestError as e:
                logger.error(
                    "Could not request results from Speech Recognition "
                    "service; {0}".format(e))
            except CerberusAccessDenied as e:
                logger.error("AccessDenied from Cerberus proxy.")
                self.__speak(
                    "Your device is not registered yet. To start pairing, "
                    "login at cerberus.mycroft.ai")
                utterances.append("pair my device")
            else:
                logger.debug("STT: " + text)
                if text.strip() != '':
                    utterances.append(text)
        return runnable

    def transcribe(self, audio_segments):
        utterances = []
        threads = []
        for audio in audio_segments:
            if self._audio_length(audio) < self.MIN_AUDIO_SIZE:
                logger.debug("Audio too short to send to STT")
                continue

            target = self._create_remote_stt_runnable(audio, utterances)
            t = threading.Thread(target=target)
            t.start()
            threads.append(t)

        for thread in threads:
            thread.join()
        if len(utterances) > 0:
            payload = {
                'utterances': utterances,
                'session': SessionManager.get().session_id
            }
            self.emitter.emit("recognizer_loop:utterance", payload)
            self.metrics.attr('utterances', utterances)
        else:
            raise sr.UnknownValueError
Beispiel #21
0
class AudioConsumer(Thread):
    """
    AudioConsumer
    Consumes AudioData chunks off the queue
    """

    # In seconds, the minimum audio size to be sent to remote STT
    MIN_AUDIO_SIZE = 0.5

    def __init__(self, state, queue, emitter, stt,
                 wakeup_recognizer, wakeword_recognizer):
        super(AudioConsumer, self).__init__()
        self.daemon = True
        self.queue = queue
        self.state = state
        self.emitter = emitter
        self.stt = stt
        self.wakeup_recognizer = wakeup_recognizer
        self.wakeword_recognizer = wakeword_recognizer
        self.metrics = MetricsAggregator()

    def run(self):
        while self.state.running:
            self.read()

    def read(self):
        try:
            audio = self.queue.get(timeout=0.5)
        except Empty:
            return

        if audio is None:
            return

        if self.state.sleeping:
            self.wake_up(audio)
        else:
            self.process(audio)

    # TODO: Localization
    def wake_up(self, audio):
        if self.wakeup_recognizer.found_wake_word(audio.frame_data):
            SessionManager.touch()
            self.state.sleeping = False
            self.emitter.emit('recognizer_loop:awoken')
            self.metrics.increment("mycroft.wakeup")

    @staticmethod
    def _audio_length(audio):
        return float(len(audio.frame_data)) / (
            audio.sample_rate * audio.sample_width)

    # TODO: Localization
    def process(self, audio):
        SessionManager.touch()
        payload = {
            'utterance': self.wakeword_recognizer.key_phrase,
            'session': SessionManager.get().session_id,
        }
        self.emitter.emit("recognizer_loop:wakeword", payload)

        if self._audio_length(audio) < self.MIN_AUDIO_SIZE:
            LOG.warning("Audio too short to be processed")
        else:
            stopwatch = Stopwatch()
            with stopwatch:
                transcription = self.transcribe(audio)
            if transcription:
                ident = str(stopwatch.timestamp) + str(hash(transcription))
                # STT succeeded, send the transcribed speech on for processing
                payload = {
                    'utterances': [transcription],
                    'lang': self.stt.lang,
                    'session': SessionManager.get().session_id,
                    'ident': ident
                }
                self.emitter.emit("recognizer_loop:utterance", payload)
                self.metrics.attr('utterances', [transcription])
            else:
                ident = str(stopwatch.timestamp)
            # Report timing metrics
            report_timing(ident, 'stt', stopwatch,
                          {'transcription': transcription,
                           'stt': self.stt.__class__.__name__})

    def transcribe(self, audio):
        try:
            # Invoke the STT engine on the audio clip
            text = self.stt.execute(audio).lower().strip()
            LOG.debug("STT: " + text)
            return text
        except sr.RequestError as e:
            LOG.error("Could not request Speech Recognition {0}".format(e))
        except ConnectionError as e:
            LOG.error("Connection Error: {0}".format(e))

            self.emitter.emit("recognizer_loop:no_internet")
        except HTTPError as e:
            if e.response.status_code == 401:
                LOG.warning("Access Denied at mycroft.ai")
                return "pair my device"  # phrase to start the pairing process
            else:
                LOG.error(e.__class__.__name__ + ': ' + str(e))
        except RequestException as e:
            LOG.error(e.__class__.__name__ + ': ' + str(e))
        except Exception as e:
            self.emitter.emit('recognizer_loop:speech.recognition.unknown')
            if isinstance(e, IndexError):
                LOG.info('no words were transcribed')
            else:
                LOG.error(e)
            LOG.error("Speech Recognition could not understand audio")
            return None
        if connected():
            dialog_name = 'backend.down'
        else:
            dialog_name = 'not connected to the internet'
        self.emitter.emit('speak', {'utterance': dialog.get(dialog_name)})

    def __speak(self, utterance):
        payload = {
            'utterance': utterance,
            'session': SessionManager.get().session_id
        }
        self.emitter.emit("speak", payload)