def test_get_last(self):
     buff = CyclicAudioBuffer(3, b'abcdef')
     self.assertEqual(buff.get_last(3), b'def')
 def test_get_item(self):
     buff = CyclicAudioBuffer(6, b'abcdef')
     self.assertEqual(buff[:], b'abcdef')
 def test_append_with_full(self):
     buff = CyclicAudioBuffer(3, b'abc')
     buff.append(b'de')
     self.assertEqual(buff.get(), b'cde')
     self.assertEqual(len(buff), 3)
 def test_append_with_room_left(self):
     buff = CyclicAudioBuffer(16, b'abc')
     buff.append(b'def')
     self.assertEqual(buff.get(), b'abcdef')
 def test_init_larger_inital_data(self):
     size = 16
     buff = CyclicAudioBuffer(size, b'a' * (size + 3))
     self.assertEqual(buff.get(), b'a' * size)
 def test_init(self):
     buff = CyclicAudioBuffer(16, b'abc')
     self.assertEqual(buff.get(), b'abc')
     self.assertEqual(len(buff), 3)
Beispiel #7
0
    def _wait_until_wake_word(self, source, sec_per_buffer):
        """Listen continuously on source until a wake word is spoken

        Arguments:
            source (AudioSource):  Source producing the audio chunks
            sec_per_buffer (float):  Fractional number of seconds in each chunk
        """

        # The maximum audio in seconds to keep for transcribing a phrase
        # The wake word must fit in this time
        ww_duration = self.wake_word_recognizer.expected_duration
        ww_test_duration = max(3, ww_duration)

        mic_write_counter = 0
        num_silent_bytes = int(self.SILENCE_SEC * source.SAMPLE_RATE *
                               source.SAMPLE_WIDTH)

        silence = get_silence(num_silent_bytes)

        # Max bytes for byte_data before audio is removed from the front
        max_size = source.duration_to_bytes(ww_duration)
        test_size = source.duration_to_bytes(ww_test_duration)
        audio_buffer = CyclicAudioBuffer(max_size, silence)

        buffers_per_check = self.SEC_BETWEEN_WW_CHECKS / sec_per_buffer
        buffers_since_check = 0.0

        # Rolling buffer to track the audio energy (loudness) heard on
        # the source recently.  An average audio energy is maintained
        # based on these levels.
        average_samples = int(5 / sec_per_buffer)  # average over last 5 secs
        audio_mean = RollingMean(average_samples)

        # These are frames immediately after wake word is detected
        # that we want to keep to send to STT
        ww_frames = deque(maxlen=7)

        said_wake_word = False
        audio_data = None
        while (not said_wake_word and not self._stop_signaled and
               not self._skip_wake_word()):
            chunk = self.record_sound_chunk(source)
            audio_buffer.append(chunk)
            ww_frames.append(chunk)

            energy = self.calc_energy(chunk, source.SAMPLE_WIDTH)
            audio_mean.append_sample(energy)

            if energy < self.energy_threshold * self.multiplier:
                self._adjust_threshold(energy, sec_per_buffer)
            # maintain the threshold using average
            if self.energy_threshold < energy < audio_mean.value * 1.5:
                # bump the threshold to just above this value
                self.energy_threshold = energy * 1.2

            # Periodically output energy level stats. This can be used to
            # visualize the microphone input, e.g. a needle on a meter.
            if mic_write_counter % 3:
                self._watchdog()
                self.write_mic_level(energy, source)
            mic_write_counter += 1

            buffers_since_check += 1.0
            # Send chunk to wake_word_recognizer
            self.wake_word_recognizer.update(chunk)

            if buffers_since_check > buffers_per_check:
                buffers_since_check -= buffers_per_check
                audio_data = audio_buffer.get_last(test_size) + silence
                said_wake_word = \
                    self.wake_word_recognizer.found_wake_word(audio_data)

        self._listen_triggered = False
        return WakeWordData(audio_data, said_wake_word,
                            self._stop_signaled, ww_frames)