Exemple #1
0
 def process_wake_up(self, audio):
     if self.wakeup_recognizer.is_recognized(audio.frame_data,
                                             self.metrics):
         SessionManager.touch()
         self.state.sleeping = False
         self.__speak("I'm awake.")  # TODO: Localization
         self.metrics.increment("mycroft.wakeup")
 def wake_up(self, audio):
     if self.wakeup_recognizer.is_recognized(audio.frame_data,
                                             self.metrics):
         SessionManager.touch()
         self.state.sleeping = False
         self.__speak(mycroft.dialog.get("i am awake", self.stt.lang))
         self.metrics.increment("mycroft.wakeup")
Exemple #3
0
    def process_wake_word(self, audio, timer):
        hyp = self.mycroft_recognizer.transcribe(audio.frame_data,
                                                 self.metrics)

        if self.mycroft_recognizer.contains(hyp):
            extractor = WordExtractor(audio, self.mycroft_recognizer,
                                      self.metrics)
            timer.lap()
            extractor.calculate_range()
            self.metrics.timer("mycroft.recognizer.extractor.time_s",
                               timer.lap())
            audio_before = extractor.get_audio_data_before()
            self.metrics.timer("mycroft.recognizer.audio_extracted.length_s",
                               self._audio_length(audio_before))
            audio_after = extractor.get_audio_data_after()
            self.metrics.timer("mycroft.recognizer.audio_extracted.length_s",
                               self._audio_length(audio_after))

            SessionManager.touch()
            payload = {
                'utterance': hyp.hypstr,
                'session': SessionManager.get().session_id,
                'pos_begin': extractor.begin,
                'pos_end': extractor.end
            }
            self.emitter.emit("recognizer_loop:wakeword", payload)

            try:
                self.transcribe([audio_before, audio_after])
            except sr.UnknownValueError:
                self.__speak("Go ahead")
                self.state.skip_wakeword = True
                self.metrics.increment("mycroft.wakeword")
Exemple #4
0
 def process_wake_up(self, audio):
     if self.wakeup_recognizer.is_recognized(audio.frame_data,
                                             self.metrics):
         SessionManager.touch()
         self.state.sleeping = False
         self.__speak("I'm awake.")  # TODO: Localization
         self.metrics.increment("mycroft.wakeup")
 def wake_up(self, audio):
     if self.wakeup_recognizer.is_recognized(audio.frame_data,
                                             self.metrics):
         SessionManager.touch()
         self.state.sleeping = False
         self.__speak(mycroft.dialog.get("i am awake", self.stt.lang))
         self.metrics.increment("mycroft.wakeup")
Exemple #6
0
    def process_wake_word(self, audio, timer):
        hyp = self.mycroft_recognizer.transcribe(audio.frame_data,
                                                 self.metrics)

        if self.mycroft_recognizer.contains(hyp):
            extractor = WordExtractor(audio, self.mycroft_recognizer,
                                      self.metrics)
            timer.lap()
            extractor.calculate_range()
            self.metrics.timer("mycroft.recognizer.extractor.time_s",
                               timer.lap())
            audio_before = extractor.get_audio_data_before()
            self.metrics.timer("mycroft.recognizer.audio_extracted.length_s",
                               self._audio_length(audio_before))
            audio_after = extractor.get_audio_data_after()
            self.metrics.timer("mycroft.recognizer.audio_extracted.length_s",
                               self._audio_length(audio_after))

            SessionManager.touch()
            payload = {
                'utterance': hyp.hypstr,
                'session': SessionManager.get().session_id,
                'pos_begin': extractor.begin,
                'pos_end': extractor.end
            }
            self.emitter.emit("recognizer_loop:wakeword", payload)

            try:
                self.transcribe([audio_before, audio_after])
            except sr.UnknownValueError:
                self.__speak("Go ahead")
                self.state.skip_wakeword = True
                self.metrics.increment("mycroft.wakeword")
Exemple #7
0
    def process(self, audio):
        SessionManager.touch()
        payload = {
            'utterance': self.wakeword_recognizer.key_phrase,
            'session': SessionManager.get().session_id,
        }
        self.emitter.emit("recognizer_loop:wakeword", payload)

        if self._audio_length(audio) < self.MIN_AUDIO_SIZE:
            LOG.warning("Audio too short to be processed")
        else:
            stopwatch = Stopwatch()
            with stopwatch:
                transcription = self.transcribe(audio)
            if transcription:
                ident = str(stopwatch.timestamp) + str(hash(transcription))
                # STT succeeded, send the transcribed speech on for processing
                payload = {
                    'utterances': [transcription],
                    'lang': self.stt.lang,
                    'session': SessionManager.get().session_id,
                    'ident': ident
                }
                self.emitter.emit("recognizer_loop:utterance", payload)
                self.metrics.attr('utterances', [transcription])
            else:
                ident = str(stopwatch.timestamp)
            # Report timing metrics
            report_timing(ident, 'stt', stopwatch,
                          {'transcription': transcription,
                           'stt': self.stt.__class__.__name__})
Exemple #8
0
    def process(self, audio):
        SessionManager.touch()
        payload = {
            'utterance': self.wakeword_recognizer.key_phrase,
            'session': SessionManager.get().session_id,
        }
        self.emitter.emit("recognizer_loop:wakeword", payload)

        if self._audio_length(audio) < self.MIN_AUDIO_SIZE:
            LOG.warning("Audio too short to be processed")
        else:
            stopwatch = Stopwatch()
            with stopwatch:
                transcription = self.transcribe(audio)
            if transcription:
                ident = str(stopwatch.timestamp) + str(hash(transcription))
                # STT succeeded, send the transcribed speech on for processing
                payload = {
                    'utterances': [transcription],
                    'lang': self.stt.lang,
                    'session': SessionManager.get().session_id,
                    'ident': ident
                }
                self.emitter.emit("recognizer_loop:utterance", payload)
                self.metrics.attr('utterances', [transcription])
            else:
                ident = str(stopwatch.timestamp)
            # Report timing metrics
            report_timing(ident, 'stt', stopwatch, {
                'transcription': transcription,
                'stt': self.stt.__class__.__name__
            })
Exemple #9
0
 def process_skip_wake_word(self, audio):
     SessionManager.touch()
     try:
         self.transcribe([audio])
     except sr.UnknownValueError:
         logger.warn("Speech Recognition could not understand audio")
         self.__speak("Sorry, I didn't catch that.")
         self.metrics.increment("mycroft.recognizer.error")
     self.state.skip_wakeword = False
Exemple #10
0
 def process_skip_wake_word(self, audio):
     SessionManager.touch()
     try:
         self.transcribe([audio])
     except sr.UnknownValueError:
         logger.warn("Speech Recognition could not understand audio")
         self.__speak("Sorry, I didn't catch that.")
         self.metrics.increment("mycroft.recognizer.error")
     self.state.skip_wakeword = False
Exemple #11
0
    def _send_wakeword_info(self, emitter):
        """Send messagebus message indicating that a wakeword was received.

        Arguments:
            emitter: bus emitter to send information on.
        """
        SessionManager.touch()
        payload = {'utterance': self.wake_word_name,
                   'session': SessionManager.get().session_id}
        emitter.emit("recognizer_loop:wakeword", payload)
Exemple #12
0
 def wake_up(self, audio):
     if self.wakeup_recognizer.is_recognized(audio.frame_data,
                                             self.metrics):
         SessionManager.touch()
         self.state.sleeping = False
         lines = ["I'm awake.",
                  "System rebooted.",
                  "All systems check. I am now online.",
                  "Waking up."]
         self.__speak(choice(lines))
         self.metrics.increment("mycroft.wakeup")
Exemple #13
0
 def process_audio(self, audio):
     SessionManager.touch()
     payload = {
         'utterance': self.mycroft_recognizer.key_phrase,
         'session': SessionManager.get().session_id,
     }
     self.emitter.emit("recognizer_loop:wakeword", payload)
     try:
         self.transcribe([audio])
     except sr.UnknownValueError:  # TODO: Localization
         logger.warn("Speech Recognition could not understand audio")
Exemple #14
0
 def process_audio(self, audio):
     SessionManager.touch()
     payload = {
         'utterance': self.mycroft_recognizer.key_phrase,
         'session': SessionManager.get().session_id,
     }
     self.emitter.emit("recognizer_loop:wakeword", payload)
     try:
         self.transcribe([audio])
     except sr.UnknownValueError:  # TODO: Localization
         logger.warn("Speech Recognition could not understand audio")
         self.__speak("Sorry, I didn't catch that.")
Exemple #15
0
    def process(self, audio):
        SessionManager.touch()
        payload = {
            'utterance': self.wakeword_recognizer.key_phrase,
            'session': SessionManager.get().session_id,
        }
        self.emitter.emit("recognizer_loop:wakeword", payload)

        if self._audio_length(audio) < self.MIN_AUDIO_SIZE:
            LOG.warning("Audio too short to be processed")
        else:
            self.transcribe(audio)
Exemple #16
0
    def process(self, audio):
        SessionManager.touch()
        payload = {
            'utterance': self.wakeword_recognizer.key_phrase,
            'session': SessionManager.get().session_id,
        }
        self.emitter.emit("recognizer_loop:wakeword", payload)

        if self._audio_length(audio) < self.MIN_AUDIO_SIZE:
            LOG.warning("Audio too short to be processed")
        else:
            self.transcribe(audio)
Exemple #17
0
    def process(self, audio):
        SessionManager.touch()
        payload = {
            'utterance': self.mycroft_recognizer.key_phrase,
            'session': SessionManager.get().session_id,
        }
        self.emitter.emit("recognizer_loop:wakeword", payload)

        if self._audio_length(audio) < self.MIN_AUDIO_SIZE:
            LOG.warn("Audio too short to be processed")
        elif connected():
            self.transcribe(audio)
        else:
            self.__speak("Mycroft seems not to be connected to the Internet")
Exemple #18
0
    def process(self, audio):
        SessionManager.touch()
        payload = {
            'utterance': self.mycroft_recognizer.key_phrase,
            'session': SessionManager.get().session_id,
        }
        self.emitter.emit("recognizer_loop:wakeword", payload)

        if self._audio_length(audio) < self.MIN_AUDIO_SIZE:
            LOG.warn("Audio too short to be processed")
            self.emitter.emit("recognizer_loop:tooshort", {})
        elif connected():
            self.transcribe(audio)
        else:
            self.__speak("Mycroft seems not to be connected to the Internet")
Exemple #19
0
 def __speak(self, utterance):
     print "going to speak " + utterance
     payload = {
         'utterance': utterance,
         'session': SessionManager.get().session_id
     }
     self.emitter.emit("speak", Message("speak", payload))
Exemple #20
0
 def transcribe(self, audio):
     text = None
     try:
         print("aud: " + str((audio)))
         initial = self.stt.execute(audio)
         print("initial: " + initial)
         text = initial.lower().strip()
         LOG.debug("STT: " + text)
     except sr.RequestError as e:
         LOG.error("Could not request Speech Recognition {0}".format(e))
     except HTTPError as e:
         if e.response.status_code == 401:
             text = "pair my device"
             LOG.warn("Access Denied at mycroft.ai")
     except Exception as e:
         LOG.error(e)
         LOG.error("Speech Recognition could not understand audio")
         self.__speak("Sorry, I didn't catch that")
     if text:
         payload = {
             'utterances': [text],
             'session': SessionManager.get().session_id
         }
         self.emitter.emit("recognizer_loop:utterance", payload)
         self.metrics.attr('utterances', [text])
Exemple #21
0
    def transcribe(self, audio_segments):
        utterances = []
        threads = []
        if connected():
            for audio in audio_segments:
                if self._audio_length(audio) < self.MIN_AUDIO_SIZE:
                    logger.debug("Audio too short to send to STT")
                    continue

                target = self._create_remote_stt_runnable(audio, utterances)
                t = threading.Thread(target=target)
                t.start()
                threads.append(t)

            for thread in threads:
                thread.join()
            if len(utterances) > 0:
                payload = {
                    'utterances': utterances,
                    'session': SessionManager.get().session_id
                }
                self.emitter.emit("recognizer_loop:utterance", payload)
                self.metrics.attr('utterances', utterances)
            else:
                raise sr.UnknownValueError
        else:  # TODO: Localization
            self.__speak("This device is not connected to the Internet")
Exemple #22
0
 def transcribe(self, audio):
     text = None
     try:
         # Invoke the STT engine on the audio clip
         text = self.stt.execute(audio).lower().strip()
         LOG.debug("STT: " + text)
     except sr.RequestError as e:
         LOG.error("Could not request Speech Recognition {0}".format(e))
     except ConnectionError as e:
         LOG.error("Connection Error: {0}".format(e))
         self.__speak("Intelora seems not to be connected to the Internet.")
     except HTTPError as e:
         if e.response.status_code == 401:
             text = "pair my device"
             LOG.warn("Access Denied at Mycroft API")
     except Exception as e:
         LOG.error(e)
         LOG.error("Speech Recognition could not understand audio")
         lines = ["Sorry, I didn't catch that.",
                  "Sorry, I didn't hear you clearly.",
                  "Can you repeat what you said, please?",
                  "Can you please say that again?"]
         self.__speak(choice(lines))
     if text:
         # STT succeeded, send the transcribed speech on for processing
         payload = {
             'utterances': [text],
             'lang': self.stt.lang,
             'session': SessionManager.get().session_id
         }
         self.emitter.emit("recognizer_loop:utterance", payload)
         self.metrics.attr('utterances', [text])
Exemple #23
0
 def transcribe(self, audio):
     text = None
     try:
         # Invoke the STT engine on the audio clip
         text = self.stt.execute(audio).lower().strip()
         LOG.debug("STT: " + text)
     except sr.RequestError as e:
         LOG.error("Could not request Speech Recognition {0}".format(e))
     except ConnectionError as e:
         LOG.error("Connection Error: {0}".format(e))
         self.emitter.emit("recognizer_loop:no_internet")
     except HTTPError as e:
         if e.response.status_code == 401:
             text = "pair my device"  # phrase to start the pairing process
             LOG.warning("Access Denied at mycroft.ai")
     except Exception as e:
         LOG.error(e)
         LOG.error("Speech Recognition could not understand audio")
     if text:
         # STT succeeded, send the transcribed speech on for processing
         payload = {
             'utterances': [text],
             'lang': self.stt.lang,
             'session': SessionManager.get().session_id
         }
         self.emitter.emit("recognizer_loop:utterance", payload)
         self.metrics.attr('utterances', [text])
Exemple #24
0
    def transcribe(self, audio_segments):
        utterances = []
        threads = []
        if connected():
            for audio in audio_segments:
                if self._audio_length(audio) < self.MIN_AUDIO_SIZE:
                    logger.debug("Audio too short to send to STT")
                    continue

                target = self._create_remote_stt_runnable(audio, utterances)
                t = threading.Thread(target=target)
                t.start()
                threads.append(t)

            for thread in threads:
                thread.join()
            if len(utterances) > 0:
                payload = {
                    'utterances': utterances,
                    'session': SessionManager.get().session_id
                }
                self.emitter.emit("recognizer_loop:utterance", payload)
                self.metrics.attr('utterances', utterances)
            else:
                raise sr.UnknownValueError
        else:  # TODO: Localization
            self.__speak("This device is not connected to the Internet")
Exemple #25
0
 def transcribe(self, audio):
     text = None
     try:
         # Invoke the STT engine on the audio clip
         text = self.stt.execute(audio).lower().strip()
         LOG.debug("STT: " + text)
     except sr.RequestError as e:
         LOG.error("Could not request Speech Recognition {0}".format(e))
     except ConnectionError as e:
         LOG.error("Connection Error: {0}".format(e))
         self.emitter.emit("recognizer_loop:no_internet")
     except HTTPError as e:
         if e.response.status_code == 401:
             text = "pair my device"  # phrase to start the pairing process
             LOG.warning("Access Denied at mycroft.ai")
     except Exception as e:
         self.emitter.emit('recognizer_loop:speech.recognition.unknown')
         LOG.error(e)
         LOG.error("Speech Recognition could not understand audio")
     if text:
         # STT succeeded, send the transcribed speech on for processing
         payload = {
             'utterances': [text],
             'lang': self.stt.lang,
             'session': SessionManager.get().session_id
         }
         self.emitter.emit("recognizer_loop:utterance", payload)
         self.metrics.attr('utterances', [text])
Exemple #26
0
 def target():
     self.emitter.emit(
         "speak",
         Message("speak",
                 metadata={
                     'utterance': utterance,
                     'session': SessionManager.get().session_id
                 }))
Exemple #27
0
 def publish(self, events):
     if 'session_id' not in events:
         session_id = SessionManager.get().session_id
         events['session_id'] = session_id
     if self.enabled:
         requests.post(self.url,
                       headers={'Content-Type': 'application/json'},
                       data=json.dumps(events),
                       verify=False)
 def publish(self, events):
     if 'session_id' not in events:
         session_id = SessionManager.get().session_id
         events['session_id'] = session_id
     if self.enabled:
         requests.post(
             self.url,
             headers={'Content-Type': 'application/json'},
             data=json.dumps(events), verify=False)
    def try_consume_audio(self):
        timer = Stopwatch()
        hyp = None
        audio = self.queue.get()
        self.metrics.timer("mycroft.recognizer.audio.length_s", self._audio_length(audio))
        self.queue.task_done()
        timer.start()
        if self.state.sleeping:
            hyp = self.wakeup_recognizer.transcribe(audio.get_wav_data(), metrics=self.metrics)
            if hyp and hyp.hypstr:
                logger.debug("sleeping recognition: " + hyp.hypstr)
            if hyp and hyp.hypstr.lower().find("wake up") >= 0:
                SessionManager.touch()
                self.state.sleeping = False
                self.__speak("I'm awake.")  # TODO: Localization
                self.metrics.increment("mycroft.wakeup")
        else:
            if not self.state.skip_wakeword:
                hyp = self.ww_recognizer.transcribe(audio.get_wav_data(), metrics=self.metrics)

            if hyp and hyp.hypstr.lower().find("mycroft") >= 0:
                extractor = WakewordExtractor(audio, self.ww_recognizer, self.metrics)
                timer.lap()
                extractor.calculate_range()
                self.metrics.timer("mycroft.recognizer.extractor.time_s", timer.lap())
                audio_before = extractor.get_audio_data_before()
                self.metrics.timer("mycroft.recognizer.audio_extracted.length_s", self._audio_length(audio_before))
                audio_after = extractor.get_audio_data_after()
                self.metrics.timer("mycroft.recognizer.audio_extracted.length_s", self._audio_length(audio_after))

                SessionManager.touch()
                payload = {
                    'utterance': hyp.hypstr,
                    'session': SessionManager.get().session_id,
                    'pos_begin': int(extractor.range.begin),
                    'pos_end': int(extractor.range.end)
                }
                self.emitter.emit("recognizer_loop:wakeword", payload)

                try:
                    self.transcribe([audio_before, audio_after])
                except sr.UnknownValueError:
                    self.__speak("Go ahead")
                    self.state.skip_wakeword = True
                    self.metrics.increment("mycroft.wakeword")

            elif self.state.skip_wakeword:
                SessionManager.touch()
                try:
                    self.transcribe([audio])
                except sr.UnknownValueError:
                    logger.warn("Speech Recognition could not understand audio")
                    self.__speak("Sorry, I didn't catch that.")
                    self.metrics.increment("mycroft.recognizer.error")
                self.state.skip_wakeword = False
            else:
                self.metrics.clear()
        self.metrics.flush()
 def process_audio(self, audio):
     SessionManager.touch()
     payload = {
         'utterance': self.mycroft_recognizer.key_phrase,
         'session': SessionManager.get().session_id,
     }
     self.emitter.emit("recognizer_loop:wakeword", payload)
     try:
         self.transcribe([audio])
     except sr.UnknownValueError:  # TODO: Localization
         logger.warn("Speech Recognition could not understand audio")
         self.__speak("Sorry, I didn't catch that.")
         bus = dbus.SessionBus()
         remote_object = bus.get_object(
             "com.mycroftkde.KDEPlasmoid",
             "/ComMycroftkdeKDEPlasmoidInterface")
         setText = remote_object.setText(
             "Sorry, I didn't catch that.",
             dbus_interface="com.mycroftkde.KDEPlasmoid")
Exemple #31
0
    def _compile_metadata(self):
        ww_module = self.wake_word_recognizer.__class__.__name__
        if ww_module == 'PreciseHotword':
            model_path = self.wake_word_recognizer.precise_model
            with open(model_path, 'rb') as f:
                model_hash = md5(f.read()).hexdigest()
        else:
            model_hash = '0'

        return {
            'name': self.wake_word_name.replace(' ', '-'),
            'engine': md5(ww_module.encode('utf-8')).hexdigest(),
            'time': str(int(1000 * get_time())),
            'sessionId': SessionManager.get().session_id,
            'accountId': self.account_id,
            'model': str(model_hash)
        }
Exemple #32
0
    def process(self, audio):
        path = pathlib.Path().absolute()
        settings_file = open(str(path)+'/mycroft/client/speech/set_config.txt', 'w')
        
        if self._audio_length(audio) >= self.MIN_AUDIO_SIZE:
            stopwatch = Stopwatch()
            with stopwatch:
                transcription = self.transcribe(audio) 
            
            settings = {"rate": "  '1.0'  ", "volume": " '80%' "}
            #speed
            if "quickly" in transcription:
                settings["rate"] = "  '1.6'  "
            if "slowly" in transcription:
                settings["rate"] = "  '.6'  "
            #volume
            if "loudly" in transcription:
                settings["volume"] = "  '100%'  "
            if "softly" in transcription:
                settings["volume"] = "  '50%'  "


            settings_file.write(str(settings))
            settings_file.close()

            if transcription:
                ident = str(stopwatch.timestamp) + str(hash(transcription))
                # STT succeeded, send the transcribed speech on for processing
                payload = {
                    'utterances': [transcription],
                    'lang': self.stt.lang,
                    'session': SessionManager.get().session_id,
                    'ident': ident
                }
                self.emitter.emit("recognizer_loop:utterance", payload)
                self.metrics.attr('utterances', [transcription])
                # Report timing metrics
                report_timing(ident, 'stt', stopwatch,
                              {'transcription': transcription,
                               'stt': self.stt.__class__.__name__})
            else:
                ident = str(stopwatch.timestamp)
        else:
            LOG.warning("Audio too short to be processed")
Exemple #33
0
    def transcribe(self, audio):
        LOG.debug("Transcribing audio")
        text = None
        try:
            # Invoke the STT engine on the audio clip
            text = self.stt.execute(audio).lower().strip()
            LOG.debug("STT: --------->" + text)
        except sr.RequestError as e:
            LOG.error("Could not request Speech Recognition {0}".format(e))
        except ConnectionError as e:
            LOG.error("Connection Error: {0}".format(e))
            self.emitter.emit("recognizer_loop:no_internet")
        except HTTPError as e:
            if e.response.status_code == 401:
                text = "pair my device"  # phrase to start the pairing process
                LOG.warning("Access Denied at mycroft.ai")
        except Exception as e:
            LOG.error(e)
            LOG.error("Speech Recognition could not understand audio")
        if text:
            # STT succeeded, send the transcribed speech on for processing
            LOG.error("maine samjha tune bola " + text)
            tellMeMore = "tell me more"
            if (text == tellMeMore):
                #hotWordListener = self.finalHotWord
                LOG.info("found tell me more in listener****")
                #text = text + " about " + hotWordListener
                with open("hotWordFile.txt", "rw+") as hotWordTemp:
                    prevHotWord = hotWordTemp.read()
                    hotWordTemp.truncate(0)
                    text = "tell me about " + prevHotWord
                    LOG.error(" naya wala maine samjha tune bola " + text)

            payload = {
                'utterances': [text],
                'lang': self.stt.lang,
                'session': SessionManager.get().session_id
            }
            self.emitter.emit("recognizer_loop:utterance", payload)
            self.metrics.attr('utterances', [text])
Exemple #34
0
 def transcribe(self, audio):
     text = None
     try:
         text = self.stt.execute(audio).lower().strip()
         LOG.debug("STT: " + text)
     except sr.RequestError as e:
         LOG.error("Could not request Speech Recognition {0}".format(e))
     except HTTPError as e:
         if e.response.status_code == 401:
             text = "pair my device"
             LOG.warn("Access Denied at mycroft.ai")
     except Exception as e:
         LOG.error(e)
         LOG.error("Speech Recognition could not understand audio")
         self.__speak("Sorry, I didn't catch that")
     if text:
         payload = {
             'utterances': [text],
             'session': SessionManager.get().session_id
         }
         self.emitter.emit("recognizer_loop:utterance", payload)
         self.metrics.attr('utterances', [text])
Exemple #35
0
    def _upload_wake_word(self, audio):
        ww_module = self.wake_word_recognizer.__class__.__name__
        if ww_module == 'PreciseHotword':
            model_path = self.wake_word_recognizer.precise_model
            with open(model_path, 'rb') as f:
                model_hash = md5(f.read()).hexdigest()
        else:
            model_hash = '0'

        metadata = {
            'name': self.wake_word_name.replace(' ', '-'),
            'engine': md5(ww_module.encode('utf-8')).hexdigest(),
            'time': str(int(1000 * get_time())),
            'sessionId': SessionManager.get().session_id,
            'accountId': self.account_id,
            'model': str(model_hash)
        }
        requests.post(self.upload_url,
                      files={
                          'audio': BytesIO(audio.get_wav_data()),
                          'metadata': StringIO(json.dumps(metadata))
                      })
Exemple #36
0
    def _upload_wake_word(self, audio):
        ww_module = self.wake_word_recognizer.__class__.__name__
        if ww_module == 'PreciseHotword':
            model_path = self.wake_word_recognizer.precise_model
            with open(model_path, 'rb') as f:
                model_hash = md5(f.read()).hexdigest()
        else:
            model_hash = '0'

        metadata = {
            'name': self.wake_word_name.replace(' ', '-'),
            'engine': md5(ww_module.encode('utf-8')).hexdigest(),
            'time': str(int(1000 * get_time())),
            'sessionId': SessionManager.get().session_id,
            'accountId': self.account_id,
            'model': str(model_hash)
        }
        requests.post(
            self.upload_url, files={
                'audio': BytesIO(audio.get_wav_data()),
                'metadata': StringIO(json.dumps(metadata))
            }
        )
Exemple #37
0
 def __speak(self, utterance):
     payload = {
         'utterance': utterance,
         'session': SessionManager.get().session_id
     }
     self.emitter.emit("speak", Message("speak", metadata=payload))
Exemple #38
0
    def _wait_until_wake_word(self, source, sec_per_buffer):
        """Listen continuously on source until a wake word is spoken

        Args:
            source (AudioSource):  Source producing the audio chunks
            sec_per_buffer (float):  Fractional number of seconds in each chunk
        """
        num_silent_bytes = int(self.SILENCE_SEC * source.SAMPLE_RATE *
                               source.SAMPLE_WIDTH)

        silence = '\0' * num_silent_bytes

        # bytearray to store audio in
        byte_data = silence

        buffers_per_check = self.SEC_BETWEEN_WW_CHECKS / sec_per_buffer
        buffers_since_check = 0.0

        # Max bytes for byte_data before audio is removed from the front
        max_size = self.sec_to_bytes(self.SAVED_WW_SEC, source)
        test_size = self.sec_to_bytes(self.TEST_WW_SEC, source)

        said_wake_word = False

        # Rolling buffer to track the audio energy (loudness) heard on
        # the source recently.  An average audio energy is maintained
        # based on these levels.
        energies = []
        idx_energy = 0
        avg_energy = 0.0
        energy_avg_samples = int(5 / sec_per_buffer)  # avg over last 5 secs

        counter = 0

        while not said_wake_word and not self._stop_signaled:
            if self._skip_wake_word():
                break
            chunk = self.record_sound_chunk(source)

            energy = self.calc_energy(chunk, source.SAMPLE_WIDTH)
            if energy < self.energy_threshold * self.multiplier:
                self._adjust_threshold(energy, sec_per_buffer)

            if len(energies) < energy_avg_samples:
                # build the average
                energies.append(energy)
                avg_energy += float(energy) / energy_avg_samples
            else:
                # maintain the running average and rolling buffer
                avg_energy -= float(energies[idx_energy]) / energy_avg_samples
                avg_energy += float(energy) / energy_avg_samples
                energies[idx_energy] = energy
                idx_energy = (idx_energy + 1) % energy_avg_samples

                # maintain the threshold using average
                if energy < avg_energy * 1.5:
                    if energy > self.energy_threshold:
                        # bump the threshold to just above this value
                        self.energy_threshold = energy * 1.2

            # Periodically output energy level stats.  This can be used to
            # visualize the microphone input, e.g. a needle on a meter.
            if counter % 3:
                with open(self.mic_level_file, 'w') as f:
                    f.write("Energy:  cur=" + str(energy) + " thresh=" +
                            str(self.energy_threshold))
                f.close()
            counter += 1

            # At first, the buffer is empty and must fill up.  After that
            # just drop the first chunk bytes to keep it the same size.
            needs_to_grow = len(byte_data) < max_size
            if needs_to_grow:
                byte_data += chunk
            else:  # Remove beginning of audio and add new chunk to end
                byte_data = byte_data[len(chunk):] + chunk

            buffers_since_check += 1.0
            if buffers_since_check > buffers_per_check:
                buffers_since_check -= buffers_per_check
                chopped = byte_data[-test_size:] \
                    if test_size < len(byte_data) else byte_data
                audio_data = chopped + silence
                said_wake_word = \
                    self.wake_word_recognizer.found_wake_word(audio_data)
                # if a wake word is success full then record audio in temp
                # file.
                if self.save_wake_words and said_wake_word:
                    audio = self._create_audio_data(byte_data, source)
                    stamp = str(int(1000 * get_time()))
                    uid = SessionManager.get().session_id
                    if not isdir(self.save_wake_words_dir):
                        mkdir(self.save_wake_words_dir)

                    dr = self.save_wake_words_dir
                    ww = self.wake_word_name.replace(' ', '-')
                    filename = join(dr, ww + '.' + stamp + '.' + uid + '.wav')
                    with open(filename, 'wb') as f:
                        f.write(audio.get_wav_data())

                    if self.upload_config['enable'] or self.config['opt_in']:
                        t = Thread(target=self._upload_file, args=(filename,))
                        t.daemon = True
                        t.start()
Exemple #39
0
    def _wait_until_wake_word(self, source, sec_per_buffer, emitter):
        """Listen continuously on source until a wake word is spoken

        Args:
            source (AudioSource):  Source producing the audio chunks
            sec_per_buffer (float):  Fractional number of seconds in each chunk
        """
        num_silent_bytes = int(self.SILENCE_SEC * source.SAMPLE_RATE *
                               source.SAMPLE_WIDTH)

        silence = get_silence(num_silent_bytes)

        # bytearray to store audio in
        byte_data = silence

        buffers_per_check = self.SEC_BETWEEN_WW_CHECKS / sec_per_buffer
        buffers_since_check = 0.0

        # Max bytes for byte_data before audio is removed from the front
        max_size = self.sec_to_bytes(self.SAVED_WW_SEC, source)
        test_size = self.sec_to_bytes(self.TEST_WW_SEC, source)

        said_wake_word = False

        # Rolling buffer to track the audio energy (loudness) heard on
        # the source recently.  An average audio energy is maintained
        # based on these levels.
        energies = []
        idx_energy = 0
        avg_energy = 0.0
        energy_avg_samples = int(5 / sec_per_buffer)  # avg over last 5 secs
        counter = 0

        # These are frames immediately after wake word is detected
        # that we want to keep to send to STT
        ww_frames = deque(maxlen=7)

        while not said_wake_word and not self._stop_signaled:
            if self._skip_wake_word():
                break
            chunk = self.record_sound_chunk(source)
            ww_frames.append(chunk)

            energy = self.calc_energy(chunk, source.SAMPLE_WIDTH)
            if energy < self.energy_threshold * self.multiplier:
                self._adjust_threshold(energy, sec_per_buffer)

            if len(energies) < energy_avg_samples:
                # build the average
                energies.append(energy)
                avg_energy += float(energy) / energy_avg_samples
            else:
                # maintain the running average and rolling buffer
                avg_energy -= float(energies[idx_energy]) / energy_avg_samples
                avg_energy += float(energy) / energy_avg_samples
                energies[idx_energy] = energy
                idx_energy = (idx_energy + 1) % energy_avg_samples

                # maintain the threshold using average
                if energy < avg_energy * 1.5:
                    if energy > self.energy_threshold:
                        # bump the threshold to just above this value
                        self.energy_threshold = energy * 1.2

            # Periodically output energy level stats.  This can be used to
            # visualize the microphone input, e.g. a needle on a meter.
            if counter % 3:
                self.write_mic_level(energy, source)
            counter += 1

            # At first, the buffer is empty and must fill up.  After that
            # just drop the first chunk bytes to keep it the same size.
            needs_to_grow = len(byte_data) < max_size
            if needs_to_grow:
                byte_data += chunk
            else:  # Remove beginning of audio and add new chunk to end
                byte_data = byte_data[len(chunk):] + chunk

            buffers_since_check += 1.0
            self.wake_word_recognizer.update(chunk)
            if buffers_since_check > buffers_per_check:
                buffers_since_check -= buffers_per_check
                chopped = byte_data[-test_size:] \
                    if test_size < len(byte_data) else byte_data
                audio_data = chopped + silence
                said_wake_word = \
                    self.wake_word_recognizer.found_wake_word(audio_data)

                # Save positive wake words as appropriate
                if said_wake_word:
                    SessionManager.touch()
                    payload = {
                        'utterance': self.wake_word_name,
                        'session': SessionManager.get().session_id,
                    }
                    emitter.emit("recognizer_loop:wakeword", payload)

                    audio = None
                    mtd = None
                    if self.save_wake_words:
                        # Save wake word locally
                        audio = self._create_audio_data(byte_data, source)
                        mtd = self._compile_metadata()
                        if not isdir(self.saved_wake_words_dir):
                            os.mkdir(self.saved_wake_words_dir)
                        module = self.wake_word_recognizer.__class__.__name__

                        fn = join(
                            self.saved_wake_words_dir,
                            '_'.join(str(mtd[k])
                                     for k in sorted(mtd)) + '.wav')
                        with open(fn, 'wb') as f:
                            f.write(audio.get_wav_data())

                    if self.config['opt_in'] and not self.upload_disabled:
                        # Upload wake word for opt_in people
                        Thread(
                            target=self._upload_wake_word,
                            daemon=True,
                            args=[
                                audio
                                or self._create_audio_data(byte_data, source),
                                mtd or self._compile_metadata()
                            ]).start()
        return ww_frames
Exemple #40
0
 def wake_up(self, audio):
     if self.wakeup_recognizer.found_wake_word(audio.frame_data):
         SessionManager.touch()
         self.state.sleeping = False
         self.emitter.emit('recognizer_loop:awoken')
         self.metrics.increment("mycroft.wakeup")
Exemple #41
0
 def wake_up(self, audio):
     if self.wakeup_recognizer.found_wake_word(audio.frame_data):
         SessionManager.touch()
         self.state.sleeping = False
         self.emitter.emit('recognizer_loop:awoken')
         self.metrics.increment("mycroft.wakeup")
Exemple #42
0
 def __speak(self, utterance):
     payload = {
         'utterance': utterance,
         'session': SessionManager.get().session_id
     }
     self.emitter.emit("speak", Message("speak", metadata=payload))
 def target():
     self.emitter.emit(
         "speak",
         Message("speak",
                 metadata={'utterance': utterance,
                           'session': SessionManager.get().session_id}))
Exemple #44
0
    def try_consume_audio(self):
        timer = Stopwatch()
        hyp = None
        audio = self.queue.get()
        self.metrics.timer("mycroft.recognizer.audio.length_s",
                           self._audio_length(audio))
        self.queue.task_done()
        timer.start()
        if self.state.sleeping:
            hyp = self.wakeup_recognizer.transcribe(audio.get_wav_data(),
                                                    metrics=self.metrics)
            if hyp and hyp.hypstr:
                logger.debug("sleeping recognition: " + hyp.hypstr)
            if hyp and hyp.hypstr.lower().find("wake up") >= 0:
                SessionManager.touch()
                self.state.sleeping = False
                self.__speak("I'm awake.")  # TODO: Localization
                self.metrics.increment("mycroft.wakeup")
        else:
            if not self.state.skip_wakeword:
                hyp = self.ww_recognizer.transcribe(audio.get_wav_data(),
                                                    metrics=self.metrics)

            if hyp and hyp.hypstr.lower().find("mycroft") >= 0:
                extractor = WakewordExtractor(audio, self.ww_recognizer,
                                              self.metrics)
                timer.lap()
                extractor.calculate_range()
                self.metrics.timer("mycroft.recognizer.extractor.time_s",
                                   timer.lap())
                audio_before = extractor.get_audio_data_before()
                self.metrics.timer(
                    "mycroft.recognizer.audio_extracted.length_s",
                    self._audio_length(audio_before))
                audio_after = extractor.get_audio_data_after()
                self.metrics.timer(
                    "mycroft.recognizer.audio_extracted.length_s",
                    self._audio_length(audio_after))

                SessionManager.touch()
                payload = {
                    'utterance': hyp.hypstr,
                    'session': SessionManager.get().session_id,
                    'pos_begin': int(extractor.range.begin),
                    'pos_end': int(extractor.range.end)
                }
                self.emitter.emit("recognizer_loop:wakeword", payload)

                try:
                    self.transcribe([audio_before, audio_after])
                except sr.UnknownValueError:
                    self.__speak("Go ahead")
                    self.state.skip_wakeword = True
                    self.metrics.increment("mycroft.wakeword")

            elif self.state.skip_wakeword:
                SessionManager.touch()
                try:
                    self.transcribe([audio])
                except sr.UnknownValueError:
                    logger.warn(
                        "Speech Recognition could not understand audio")
                    self.__speak("Sorry, I didn't catch that.")
                    self.metrics.increment("mycroft.recognizer.error")
                self.state.skip_wakeword = False
            else:
                self.metrics.clear()
        self.metrics.flush()
Exemple #45
0
    def _wait_until_wake_word(self, source, sec_per_buffer):
        """Listen continuously on source until a wake word is spoken

        Args:
            source (AudioSource):  Source producing the audio chunks
            sec_per_buffer (float):  Fractional number of seconds in each chunk
        """
        num_silent_bytes = int(self.SILENCE_SEC * source.SAMPLE_RATE *
                               source.SAMPLE_WIDTH)

        silence = '\0' * num_silent_bytes

        # bytearray to store audio in
        byte_data = silence

        buffers_per_check = self.SEC_BETWEEN_WW_CHECKS / sec_per_buffer
        buffers_since_check = 0.0

        # Max bytes for byte_data before audio is removed from the front
        max_size = self.sec_to_bytes(self.SAVED_WW_SEC, source)
        test_size = self.sec_to_bytes(self.TEST_WW_SEC, source)

        said_wake_word = False

        # Rolling buffer to track the audio energy (loudness) heard on
        # the source recently.  An average audio energy is maintained
        # based on these levels.
        energies = []
        idx_energy = 0
        avg_energy = 0.0
        energy_avg_samples = int(5 / sec_per_buffer)  # avg over last 5 secs

        counter = 0

        while not said_wake_word and not self._stop_signaled:
            if self._skip_wake_word():
                break
            chunk = self.record_sound_chunk(source)

            energy = self.calc_energy(chunk, source.SAMPLE_WIDTH)
            if energy < self.energy_threshold * self.multiplier:
                self._adjust_threshold(energy, sec_per_buffer)

            if len(energies) < energy_avg_samples:
                # build the average
                energies.append(energy)
                avg_energy += float(energy) / energy_avg_samples
            else:
                # maintain the running average and rolling buffer
                avg_energy -= float(energies[idx_energy]) / energy_avg_samples
                avg_energy += float(energy) / energy_avg_samples
                energies[idx_energy] = energy
                idx_energy = (idx_energy + 1) % energy_avg_samples

                # maintain the threshold using average
                if energy < avg_energy * 1.5:
                    if energy > self.energy_threshold:
                        # bump the threshold to just above this value
                        self.energy_threshold = energy * 1.2

            # Periodically output energy level stats.  This can be used to
            # visualize the microphone input, e.g. a needle on a meter.
            if counter % 3:
                with open(self.mic_level_file, 'w') as f:
                    f.write("Energy:  cur=" + str(energy) + " thresh=" +
                            str(self.energy_threshold))
                f.close()
            counter += 1

            # At first, the buffer is empty and must fill up.  After that
            # just drop the first chunk bytes to keep it the same size.
            needs_to_grow = len(byte_data) < max_size
            if needs_to_grow:
                byte_data += chunk
            else:  # Remove beginning of audio and add new chunk to end
                byte_data = byte_data[len(chunk):] + chunk

            buffers_since_check += 1.0
            if buffers_since_check > buffers_per_check:
                buffers_since_check -= buffers_per_check
                chopped = byte_data[-test_size:] \
                    if test_size < len(byte_data) else byte_data
                audio_data = chopped + silence
                said_wake_word = \
                    self.wake_word_recognizer.found_wake_word(audio_data)
                # if a wake word is success full then record audio in temp
                # file.
                if self.save_wake_words and said_wake_word:
                    audio = self._create_audio_data(byte_data, source)
                    stamp = str(int(1000 * get_time()))
                    uid = SessionManager.get().session_id
                    if not isdir(self.save_wake_words_dir):
                        mkdir(self.save_wake_words_dir)

                    dr = self.save_wake_words_dir
                    ww = self.wake_word_name.replace(' ', '-')
                    filename = join(dr, ww + '.' + stamp + '.' + uid + '.wav')
                    with open(filename, 'wb') as f:
                        f.write(audio.get_wav_data())

                    if self.upload_config['enable'] or self.config['opt_in']:
                        t = Thread(target=self._upload_file, args=(filename, ))
                        t.daemon = True
                        t.start()