Esempio n. 1
0
    def _load_config(self):
        """Load configuration parameters from configuration."""
        config = CONFIGURATION
        self.config_core = config
        self.lang = config.get('lang')
        self.config = config.get('listener')
        rate = self.config.get('sample_rate')

        device_index = self.config.get('device_index')
        device_name = self.config.get('device_name')
        if not device_index and device_name:
            device_index = find_input_device(device_name)

        LOG.debug('Using microphone (None = default): ' + str(device_index))

        self.microphone = MutableMicrophone(device_index,
                                            rate,
                                            mute=self.mute_calls > 0)

        # TODO - localization
        self.wakeup_recognizer = self.create_wakeup_recognizer()
        self.hotword_engines = {}
        self.create_hotword_engines()
        self.responsive_recognizer = ResponsiveRecognizer(self.hotword_engines)
        self.state = RecognizerLoopState()
Esempio n. 2
0
 def on_preferences_changed(self, event):
     preferences = json.loads(event["data"]["preferences"])
     for pref in preferences:
         user_id = pref["user_id"]
         category = pref["category"]
         value = pref["value"]
         LOG.debug(category + ":" + value)
Esempio n. 3
0
    def execute(self, sentence, ident=None, listen=False):
        """Convert sentence to speech, preprocessing out unsupported ssml

            The method caches results if possible using the hash of the
            sentence.

            Arguments:
                sentence:   Sentence to be spoken
                ident:      Id reference to current interaction
                listen:     True if listen should be triggered at the end
                            of the utterance.
        """
        sentence = self.validate_ssml(sentence)

        chunks = self._preprocess_sentence(sentence)
        # Apply the listen flag to the last chunk, set the rest to False
        chunks = [(chunks[i], listen if i == len(chunks) - 1 else False)
                  for i in range(len(chunks))]

        for sentence, l in chunks:
            key = str(hashlib.md5(
                sentence.encode('utf-8', 'ignore')).hexdigest())
            wav_file = os.path.join(self.cache_dir,
                                    key + '.' + self.audio_ext)

            if os.path.exists(wav_file):
                LOG.debug("TTS cache hit")
                phonemes = self.load_phonemes(key)
            else:
                wav_file, phonemes = self.get_tts(sentence, wav_file)
                if phonemes:
                    self.save_phonemes(key, phonemes)

            vis = self.viseme(phonemes) if phonemes else None
            self.queue.put((self.audio_ext, wav_file, vis, ident, l))
Esempio n. 4
0
 def speak(self, utterance):
     if self.debug:
         LOG.debug("[SPEAK] " + utterance)
     if self.color:
         print('\x1b[6;34;40m MYCROFT: ' + utterance + ' \x1b[0m')
     else:
         print('MYCROFT: ' + utterance)
Esempio n. 5
0
 def say(self, utterance):
     if self.debug:
         LOG.debug("[UTTERANCE] " + utterance)
     if self.color:
         print('\x1b[6;33;40m YOU: ' + utterance + ' \x1b[0m')
     else:
         print('YOU: ' + utterance)
Esempio n. 6
0
    def on_gui_message(self, payload):
        try:
            msg = json.loads(payload)
            if self.debug:
                LOG.debug("Msg: " + str(payload))
            msg_type = msg.get("type")
            if msg_type == "mycroft.session.set":
                skill = msg.get("namespace")
                self.skill = self.skill or skill
                data = msg.get("data")
                if skill not in self.vars:
                    self.vars[skill] = {}
                for d in data:
                    self.vars[skill][d] = data[d]
                self.on_new_gui_data(data)
            elif msg_type == "mycroft.session.list.insert":
                # Insert new namespace
                self.skill = msg['data'][0]['skill_id']
                self.loaded.insert(0, [self.skill, []])
            elif msg_type == "mycroft.gui.list.insert":
                # Insert a page in an existing namespace
                self.page = msg['data'][0]['url']
                pos = msg.get('position')
                # TODO sometimes throws IndexError: list index out of range
                # not invalid json, seems like either pos is out of range or
                # "mycroft.session.list.insert" message was missed
                # NOTE: only happened once with wiki skill, cant replicate
                self.loaded[0][1].insert(pos, self.page)
                #self.skill = self.loaded[0][0]
            elif msg_type == "mycroft.session.list.move":
                # Move the namespace at "pos" to the top of the stack
                pos = msg.get('from')
                self.loaded.insert(0, self.loaded.pop(pos))
            elif msg_type == "mycroft.session.list.remove":
                pos = msg.get('position')
                skill = msg.get("namespace")
                if self.skill == skill:
                    self.skill = None
                self.loaded.pop(pos)
            elif msg_type == "mycroft.events.triggered":
                # Switch selected page of namespace
                skill = msg['namespace']
                self.skill = self.skill or skill
                pos = msg['data']['number']
                for n in self.loaded:
                    if n[0] == skill:
                        # TODO sometimes pos throws
                        #  IndexError: list index out of range
                        # ocasionally happens with weather skill
                        # LOGS:
                        #   05:38:29.363 - __main__:on_gui_message:56 - DEBUG - Msg: {"type": "mycroft.events.triggered", "namespace": "mycroft-weather.mycroftai", "event_name": "page_gained_focus", "data": {"number": 1}}
                        #   05:38:29.364 - __main__:on_gui_message:90 - ERROR - list index out of range
                        self.page = n[1][pos]

            self._draw_buffer()
            self.on_message(msg)
        except Exception as e:
            if self.debug:
                LOG.exception(e)
                LOG.error("Invalid JSON: " + str(payload))
Esempio n. 7
0
    def _register_object(self, message, object_name, register_func):
        name = message.data['name']
        samples = message.data['samples']

        LOG.debug('Registering ' + self.engine.name + ' ' + object_name +
                  ': ' + name)

        register_func(name, samples)
        self.train_time = get_time() + self.train_delay
        self.wait_and_train()
Esempio n. 8
0
    def end_audio(self):
        """Helper function for child classes to call in execute().

        Sends the recognizer_loop:audio_output_end message (indicating
        that speaking is done for the moment) as well as trigger listening
        if it has been requested. It also checks if cache directory needs
        cleaning to free up disk space.

        Arguments:
            listen (bool): indication if listening trigger should be sent.
        """

        LOG.debug("recognizer_loop:audio_output_end")
Esempio n. 9
0
    def load_phonemes(self, key):
        """Load phonemes from cache file.

        Arguments:
            Key:    Key identifying phoneme cache
        """
        pho_file = os.path.join(self.cache_dir,    key + ".pho")
        if os.path.exists(pho_file):
            try:
                with open(pho_file, "r") as cachefile:
                    phonemes = cachefile.read().strip()
                return phonemes
            except Exception:
                LOG.debug("Failed to read .PHO from cache")
        return None
Esempio n. 10
0
def play_ogg(uri, play_cmd="ogg123 -q %1"):
    """ Play a ogg-file.

        Returns: subprocess.Popen object
    """
    play_ogg_cmd = str(play_cmd).split(" ")
    for index, cmd in enumerate(play_ogg_cmd):
        if cmd == "%1":
            play_ogg_cmd[index] = uri
    try:
        return subprocess.Popen(play_ogg_cmd)
    except Exception as e:
        LOG.error("Failed to launch OGG: {}".format(play_ogg_cmd))
        LOG.debug("Error: {}".format(repr(e)), exc_info=True)
        return None
Esempio n. 11
0
    def handle_fallback(self, message):
        utt = message.data.get('utterance')
        LOG.debug(self.engine.name + " fallback attempt: " + utt)

        if not self.finished_training_event.is_set():
            LOG.debug('Waiting for training to finish...')
            self.finished_training_event.wait()

        data = self.engine.calc_intent(utt)

        if data["conf"] < 0.5:
            return False

        self.make_active()

        self.emitter.emit(message.reply(data["name"], data=data))
        return True
Esempio n. 12
0
    def transcribe(self, audio):
        def send_unknown_intent():
            """ Send message that nothing was transcribed. """
            self.emitter.emit('recognizer_loop:speech.recognition.unknown')

        try:
            # Invoke the STT engine on the audio clip
            text = self.stt.execute(audio)
            if text is not None:
                text = text.lower().strip()
                LOG.debug("STT: " + text)
            else:
                send_unknown_intent()
                LOG.info('no words were transcribed')
            if self.save_utterances:
                mtd = self._compile_metadata(text)

                filename = os.path.join(self.saved_utterances_dir, mtd["name"])
                with open(filename, 'wb') as f:
                    f.write(audio.get_wav_data())

                filename = os.path.join(self.saved_utterances_dir,
                                        mtd["name"].replace(".wav", ".json"))
                with open(filename, 'w') as f:
                    json.dump(mtd, f, indent=4)

            return text
        except sr.RequestError as e:
            LOG.error("Could not request Speech Recognition {0}".format(e))
        except ConnectionError as e:
            LOG.error("Connection Error: {0}".format(e))

            self.emitter.emit("recognizer_loop:no_internet")
        except RequestException as e:
            LOG.error(e.__class__.__name__ + ': ' + str(e))
        except Exception as e:
            send_unknown_intent()
            LOG.error(e)
            LOG.error("Speech Recognition could not understand audio")
            return None

        dialog_name = 'not connected to the internet'
        self.emitter.emit('speak', {'utterance': dialog_name})
Esempio n. 13
0
    def load_local(self, path):
        """
            Load local json file into self.

            Args:
                path (str): file to load
        """
        path = expanduser(path)
        if exists(path) and isfile(path):
            try:
                config = load_commented_json(path)
                for key in config:
                    self.__setitem__(key, config[key])

                LOG.debug("Configuration {} loaded".format(path))
            except Exception as e:
                LOG.error("Error loading configuration '{}'".format(path))
                LOG.error(repr(e))
        else:
            LOG.debug("Configuration '{}' not defined, skipping".format(path))
Esempio n. 14
0
    def get_cli_input(self):
        while True:
            if self.waiting:
                sleep(0.3)
                continue
            if self.debug:
                LOG.debug("waiting for input")
            if self.color:
                line = input("\x1b[6;33;40m INPUT: \x1b[0m")
            else:
                line = input("INPUT:")
            self.say(line)

            msg = {"data": {"utterances": [line],
                            "lang": "en-us"},
                   "type": "recognizer_loop:utterance",
                   "context": {"source": self.client.peer,
                               "destination": "hive_mind",
                               "platform": platform}}
            self.send_to_hivemind_bus(msg)
            self.waiting = True
Esempio n. 15
0
    def _skip_wake_word(self):
        """Check if told programatically to skip the wake word

        For example when we are in a dialog with the user.
        """
        # TODO: remove startListening signal check in 20.02
        if check_for_signal('startListening') or self._listen_triggered:
            return True

        # Pressing the Mark 1 button can start recording (unless
        # it is being used to mean 'stop' instead)
        if check_for_signal('buttonPress', 1):
            # give other processes time to consume this signal if
            # it was meant to be a 'stop'
            sleep(0.25)
            if check_for_signal('buttonPress'):
                # Signal is still here, assume it was intended to
                # begin recording
                LOG.debug("Button Pressed, wakeword not needed")
                return True

        return False
Esempio n. 16
0
 async def event_handler(self, event):
     event = json.loads(event)
     event_type = event.get("event", "")
     if event_type == "hello":
         self.on_connect(event)
     elif event_type == "status_change":
         self.on_status_change(event)
     elif event_type == "typing":
         self.on_typing(event)
     elif event_type == "posted":
         self.on_message(event)
     elif event_type == "channel_viewed":
         self.on_viewed(event)
     elif event_type == "preferences_changed":
         self.on_preferences_changed(event)
     elif event_type == "post_deleted":
         self.on_post_deleted(event)
     elif event_type == "user_added":
         self.on_user_added(event)
     elif event_type == "user_removed":
         self.on_user_removed(event)
     else:
         LOG.debug(event)
Esempio n. 17
0
    def listen(self, source, bus, stream=None):
        """Listens for chunks of audio that Mycroft should perform STT on.

        This will listen continuously for a wake-up-word, then return the
        audio chunk containing the spoken phrase that comes immediately
        afterwards.

        Args:
            source (AudioSource):  Source producing the audio chunks
            bus (EventEmitter): Emitter for notifications of when recording
                                    begins and ends.
            stream (AudioStreamHandler): Stream target that will receive chunks
                                         of the utterance audio while it is
                                         being recorded

        Returns:
            AudioData: audio with the user's utterance, minus the wake-up-word
        """
        assert isinstance(source, AudioSource), "Source must be an AudioSource"

        #        bytes_per_sec = source.SAMPLE_RATE * source.SAMPLE_WIDTH
        sec_per_buffer = float(source.CHUNK) / source.SAMPLE_RATE

        # Every time a new 'listen()' request begins, reset the threshold
        # used for silence detection.  This is as good of a reset point as
        # any, as we expect the user and Mycroft to not be talking.
        # NOTE: adjust_for_ambient_noise() doc claims it will stop early if
        #       speech is detected, but there is no code to actually do that.
        self.adjust_for_ambient_noise(source, 1.0)

        LOG.debug("Waiting for wake word...")
        self._wait_until_wake_word(source, sec_per_buffer, bus)
        self._listen_triggered = False
        if self._stop_signaled:
            return

        LOG.debug("Recording...")
        bus.emit("recognizer_loop:record_begin")

        frame_data = self._record_phrase(source, sec_per_buffer, stream)
        audio_data = self._create_audio_data(frame_data, source)
        bus.emit("recognizer_loop:record_end")

        LOG.debug("Thinking...")

        return audio_data
Esempio n. 18
0
def find_input_device(device_name):
    """ Find audio input device by name.

        Arguments:
            device_name: device name or regex pattern to match

        Returns: device_index (int) or None if device wasn't found
    """
    LOG.info('Searching for input device: {}'.format(device_name))
    LOG.debug('Devices: ')
    pa = pyaudio.PyAudio()
    pattern = re.compile(device_name)
    for device_index in range(pa.get_device_count()):
        dev = pa.get_device_info_by_index(device_index)
        LOG.debug('   {}'.format(dev['name']))
        if dev['maxInputChannels'] > 0 and pattern.match(dev['name']):
            LOG.debug('    ^-- matched')
            return device_index
    return None
Esempio n. 19
0
 def begin_audio(self):
     """Helper function for child classes to call in execute()"""
     # Create signals informing start of speech
     LOG.debug("recognizer_loop:audio_output_start")
Esempio n. 20
0
 def speak(self, utterance, channel_id, user_data):
     user = user_data["mattermost_username"]
     utterance = "@{} , ".format(user) + utterance
     LOG.debug("Sending message to channel " + channel_id)
     LOG.debug("Message: " + utterance)
     self.bot.send_message(channel_id, utterance)
Esempio n. 21
0
 def on_handled(self):
     if self.debug:
         LOG.debug("Request handled")
     self.waiting = False
Esempio n. 22
0
    def _wait_until_wake_word(self, source, sec_per_buffer, bus):
        """Listen continuously on source until a wake word is spoken

        Args:
            source (AudioSource):  Source producing the audio chunks
            sec_per_buffer (float):  Fractional number of seconds in each chunk
        """
        num_silent_bytes = int(self.SILENCE_SEC * source.SAMPLE_RATE *
                               source.SAMPLE_WIDTH)

        silence = get_silence(num_silent_bytes)

        # bytearray to store audio in
        byte_data = silence

        buffers_per_check = self.SEC_BETWEEN_WW_CHECKS / sec_per_buffer
        buffers_since_check = 0.0

        # Max bytes for byte_data before audio is removed from the front
        max_size = self.sec_to_bytes(self.SAVED_WW_SEC, source)
        test_size = self.sec_to_bytes(self.TEST_WW_SEC, source)

        said_wake_word = False

        # Rolling buffer to track the audio energy (loudness) heard on
        # the source recently.  An average audio energy is maintained
        # based on these levels.
        energies = []
        idx_energy = 0
        avg_energy = 0.0
        energy_avg_samples = int(5 / sec_per_buffer)  # avg over last 5 secs
        counter = 0

        # These are frames immediately after wake word is detected
        # that we want to keep to send to STT
        ww_frames = deque(maxlen=7)

        while not said_wake_word and not self._stop_signaled:
            if self._skip_wake_word():
                break
            chunk = self.record_sound_chunk(source)
            ww_frames.append(chunk)

            energy = self.calc_energy(chunk, source.SAMPLE_WIDTH)
            if energy < self.energy_threshold * self.multiplier:
                self._adjust_threshold(energy, sec_per_buffer)

            if len(energies) < energy_avg_samples:
                # build the average
                energies.append(energy)
                avg_energy += float(energy) / energy_avg_samples
            else:
                # maintain the running average and rolling buffer
                avg_energy -= float(energies[idx_energy]) / energy_avg_samples
                avg_energy += float(energy) / energy_avg_samples
                energies[idx_energy] = energy
                idx_energy = (idx_energy + 1) % energy_avg_samples

                # maintain the threshold using average
                if energy < avg_energy * 1.5:
                    if energy > self.energy_threshold:
                        # bump the threshold to just above this value
                        self.energy_threshold = energy * 1.2

            counter += 1

            # At first, the buffer is empty and must fill up.  After that
            # just drop the first chunk bytes to keep it the same size.
            needs_to_grow = len(byte_data) < max_size
            if needs_to_grow:
                byte_data += chunk
            else:  # Remove beginning of audio and add new chunk to end
                byte_data = byte_data[len(chunk):] + chunk

            buffers_since_check += 1.0
            self.feed_hotwords(chunk)
            if buffers_since_check > buffers_per_check:
                buffers_since_check -= buffers_per_check
                chopped = byte_data[-test_size:] \
                    if test_size < len(byte_data) else byte_data
                audio_data = chopped + silence
                said_hot_word = False
                for hotword in self.check_for_hotwords(audio_data, bus):
                    said_hot_word = True
                    engine = self.hotword_engines[hotword]["engine"]
                    sound = self.hotword_engines[hotword]["sound"]
                    utterance = self.hotword_engines[hotword]["utterance"]
                    listen = self.hotword_engines[hotword]["listen"]

                    LOG.debug("Hot Word: " + hotword)
                    # If enabled, play a wave file with a short sound to audibly
                    # indicate hotword was detected.
                    if sound:
                        try:
                            audio_file = resolve_resource_file(sound)
                            source.mute()
                            if audio_file.endswith(".wav"):
                                play_wav(audio_file).wait()
                            elif audio_file.endswith(".mp3"):
                                play_mp3(audio_file).wait()
                            elif audio_file.endswith(".ogg"):
                                play_ogg(audio_file).wait()
                            else:
                                play_audio(audio_file).wait()
                            source.unmute()
                        except Exception as e:
                            LOG.warning(e)

                    # Hot Word succeeded
                    payload = {
                        'hotword': hotword,
                        'start_listening': listen,
                        'sound': sound,
                        "engine": engine.__class__.__name__
                    }
                    bus.emit("recognizer_loop:hotword", payload)

                    if utterance:
                        # send the transcribed word on for processing
                        payload = {'utterances': [utterance]}
                        bus.emit("recognizer_loop:utterance", payload)

                    audio = None
                    mtd = self._compile_metadata(hotword)
                    if self.save_wake_words:
                        # Save wake word locally
                        audio = self._create_audio_data(byte_data, source)

                        if not isdir(self.saved_wake_words_dir):
                            os.mkdir(self.saved_wake_words_dir)

                        fn = join(
                            self.saved_wake_words_dir,
                            '_'.join(str(mtd[k])
                                     for k in sorted(mtd)) + '.wav')
                        with open(fn, 'wb') as f:
                            f.write(audio.get_wav_data())

                        fn = join(
                            self.saved_wake_words_dir,
                            '_'.join(str(mtd[k])
                                     for k in sorted(mtd)) + '.json')
                        with open(fn, 'w') as f:
                            json.dump(mtd, f, indent=4)

                    if listen:
                        said_wake_word = True

                if said_hot_word:
                    # reset bytearray to store wake word audio in, else many
                    # serial detections
                    byte_data = silence
Esempio n. 23
0
 def trigger_listen(self):
     """Externally trigger listening."""
     LOG.debug('Listen triggered from external source.')
     self._listen_triggered = True
Esempio n. 24
0
 def connect(self):
     LOG.debug("Announcing GUI")
     self.bus.on('mycroft.gui.port', self._connect_to_gui)
     self.bus.emit(Message("mycroft.gui.connected",
                           {"gui_id": self.gui_id}))
     self.connected = True
Esempio n. 25
0
 def on_open(self, message):
     LOG.debug("Gui connection open")