Example #1
0
def test_detect_vad_active(_mock):
    context = SpeechContext()

    detector = WakewordTrigger(model_dir="wakeword_model")

    for _ in range(1):
        test_frame = np.random.rand(160, ).astype(np.float32)
        context.is_speech = True
        detector(context, test_frame)
        assert not context.is_active

    detector.close()
Example #2
0
def test_detect_vad_inactive(_mock):
    context = SpeechContext()

    detector = WakewordTrigger(model_dir="wakeword_model")

    test_frame = np.random.rand(160, ).astype(np.float32)
    context.is_speech = False
    detector(context, test_frame)
    assert not context.is_active
Example #3
0
def test_detect_activate(_mock):
    context = SpeechContext()
    detector = WakewordTrigger(model_dir="wakeword_model")
    detector.detect_model.return_value[0][:] = 0.6

    test_frame = np.random.rand(512, ).astype(np.float32)
    context.is_speech = True
    detector(context, test_frame)
    context.is_speech = False
    assert context.is_active
Example #4
0
def test_detect_manual_min_delay(_mock):
    context = SpeechContext()
    detector = WakewordTrigger(model_dir="wakeword_model")
    detector.detect_model.return_value[0][:] = 1

    context.is_active = True
    test_frame = np.random.rand(512, ).astype(np.float32)
    detector(context, test_frame)
    detector(context, test_frame)
    detector(context, test_frame)

    assert context.is_active
    def create(
        spokestack_id: str,
        spokestack_secret: str,
        sample_rate: int = 16000,
        frame_width: int = 20,
        model_dir: str = "",
        **kwargs: Any,
    ) -> SpeechPipeline:
        """Creates a speech pipeline instance from profile

        Args:
            spokestack_id (str): spokestack API id.
            spokestack_secret (str): Spokestack API secret.
            sample_rate (int): sample rate of the audio (Hz).
            frame_width (int): width of the audio frame: 10, 20, or 30 (ms).
            model_dir (str): Directory containing the tflite wakeword models.

        Returns:

        """
        pipeline = SpeechPipeline(
            input_source=PyAudioInput(frame_width=frame_width,
                                      sample_rate=sample_rate,
                                      **kwargs),
            stages=[
                AutomaticGainControl(sample_rate=sample_rate,
                                     frame_width=frame_width),
                AutomaticNoiseSuppression(sample_rate=sample_rate),
                VoiceActivityDetector(
                    frame_width=frame_width,
                    sample_rate=sample_rate,
                    **kwargs,
                ),
                WakewordTrigger(model_dir=model_dir, **kwargs),
                ActivationTimeout(frame_width=frame_width, **kwargs),
                CloudSpeechRecognizer(
                    spokestack_secret=spokestack_secret,
                    spokestack_id=spokestack_id,
                    **kwargs,
                ),
            ],
        )
        return pipeline
Example #6
0
def main():
    pipeline = SpeechPipeline(
        PyAudioInput(frame_width=20,
                     sample_rate=16000,
                     exception_on_overflow=False),
        [
            VoiceActivityDetector(),
            WakewordTrigger(pre_emphasis=0.97, model_dir="tflite"),
            GoogleSpeechRecognizer(GOOGLE_CREDS),
            ActivationTimeout(),
        ],
    )

    dialogue_manager = DialogueManager(
        "tflite", "distilbert-base-cased-distilled-squad")
    manager = TextToSpeechManager(
        TextToSpeechClient(KEY_ID, KEY_SECRET),
        PyAudioOutput(),
    )

    @pipeline.event
    def on_activate(context):
        print(context.is_active)

    @pipeline.event
    def on_recognize(context):
        pipeline.pause()
        answer = dialogue_manager(context.transcript)
        manager.synthesize(answer, "text", "demo-male")
        pipeline.resume()

    @pipeline.event
    def on_deactivate(context):
        print(context.is_active)

    manager.synthesize(dialogue_manager.greet(), "text", "demo-male")
    pipeline.start()
    pipeline.run()
Example #7
0
def main():
    pipeline = SpeechPipeline(
        PyAudioInput(frame_width=20, sample_rate=16000, exception_on_overflow=False),
        [
            VoiceActivityDetector(),
            WakewordTrigger(pre_emphasis=0.97, model_dir="tflite"),
            CloudSpeechRecognizer(spokestack_id=KEY_ID, spokestack_secret=KEY_SECRET),
            ActivationTimeout(),
        ],
    )

    nlu = TFLiteNLU("tflite")
    dialogue_manager = DialogueManager()
    manager = TextToSpeechManager(
        TextToSpeechClient(KEY_ID, KEY_SECRET), PyAudioOutput(),
    )

    @pipeline.event
    def on_activate(context):
        print("active")

    @pipeline.event
    def on_recognize(context):
        pipeline.pause()
        results = nlu(context.transcript)
        response = dialogue_manager(results)
        if response:
            manager.synthesize(response, "text", "demo-male")
        pipeline.resume()

        if results.intent == "AMAZON.StopIntent":
            pipeline.stop()

    manager.synthesize(Response.WELCOME.value, "text", "demo-male")
    pipeline.start()
    pipeline.run()
Example #8
0
def test_invalid_args(_mock):
    with pytest.raises(ValueError):
        _ = WakewordTrigger(fft_window_type="hamming")