def test_detect_vad_active(_mock): context = SpeechContext() detector = WakewordTrigger(model_dir="wakeword_model") for _ in range(1): test_frame = np.random.rand(160, ).astype(np.float32) context.is_speech = True detector(context, test_frame) assert not context.is_active detector.close()
def test_detect_vad_inactive(_mock): context = SpeechContext() detector = WakewordTrigger(model_dir="wakeword_model") test_frame = np.random.rand(160, ).astype(np.float32) context.is_speech = False detector(context, test_frame) assert not context.is_active
def test_detect_activate(_mock): context = SpeechContext() detector = WakewordTrigger(model_dir="wakeword_model") detector.detect_model.return_value[0][:] = 0.6 test_frame = np.random.rand(512, ).astype(np.float32) context.is_speech = True detector(context, test_frame) context.is_speech = False assert context.is_active
def test_detect_manual_min_delay(_mock): context = SpeechContext() detector = WakewordTrigger(model_dir="wakeword_model") detector.detect_model.return_value[0][:] = 1 context.is_active = True test_frame = np.random.rand(512, ).astype(np.float32) detector(context, test_frame) detector(context, test_frame) detector(context, test_frame) assert context.is_active
def create( spokestack_id: str, spokestack_secret: str, sample_rate: int = 16000, frame_width: int = 20, model_dir: str = "", **kwargs: Any, ) -> SpeechPipeline: """Creates a speech pipeline instance from profile Args: spokestack_id (str): spokestack API id. spokestack_secret (str): Spokestack API secret. sample_rate (int): sample rate of the audio (Hz). frame_width (int): width of the audio frame: 10, 20, or 30 (ms). model_dir (str): Directory containing the tflite wakeword models. Returns: """ pipeline = SpeechPipeline( input_source=PyAudioInput(frame_width=frame_width, sample_rate=sample_rate, **kwargs), stages=[ AutomaticGainControl(sample_rate=sample_rate, frame_width=frame_width), AutomaticNoiseSuppression(sample_rate=sample_rate), VoiceActivityDetector( frame_width=frame_width, sample_rate=sample_rate, **kwargs, ), WakewordTrigger(model_dir=model_dir, **kwargs), ActivationTimeout(frame_width=frame_width, **kwargs), CloudSpeechRecognizer( spokestack_secret=spokestack_secret, spokestack_id=spokestack_id, **kwargs, ), ], ) return pipeline
def main(): pipeline = SpeechPipeline( PyAudioInput(frame_width=20, sample_rate=16000, exception_on_overflow=False), [ VoiceActivityDetector(), WakewordTrigger(pre_emphasis=0.97, model_dir="tflite"), GoogleSpeechRecognizer(GOOGLE_CREDS), ActivationTimeout(), ], ) dialogue_manager = DialogueManager( "tflite", "distilbert-base-cased-distilled-squad") manager = TextToSpeechManager( TextToSpeechClient(KEY_ID, KEY_SECRET), PyAudioOutput(), ) @pipeline.event def on_activate(context): print(context.is_active) @pipeline.event def on_recognize(context): pipeline.pause() answer = dialogue_manager(context.transcript) manager.synthesize(answer, "text", "demo-male") pipeline.resume() @pipeline.event def on_deactivate(context): print(context.is_active) manager.synthesize(dialogue_manager.greet(), "text", "demo-male") pipeline.start() pipeline.run()
def main(): pipeline = SpeechPipeline( PyAudioInput(frame_width=20, sample_rate=16000, exception_on_overflow=False), [ VoiceActivityDetector(), WakewordTrigger(pre_emphasis=0.97, model_dir="tflite"), CloudSpeechRecognizer(spokestack_id=KEY_ID, spokestack_secret=KEY_SECRET), ActivationTimeout(), ], ) nlu = TFLiteNLU("tflite") dialogue_manager = DialogueManager() manager = TextToSpeechManager( TextToSpeechClient(KEY_ID, KEY_SECRET), PyAudioOutput(), ) @pipeline.event def on_activate(context): print("active") @pipeline.event def on_recognize(context): pipeline.pause() results = nlu(context.transcript) response = dialogue_manager(results) if response: manager.synthesize(response, "text", "demo-male") pipeline.resume() if results.intent == "AMAZON.StopIntent": pipeline.stop() manager.synthesize(Response.WELCOME.value, "text", "demo-male") pipeline.start() pipeline.run()
def test_invalid_args(_mock): with pytest.raises(ValueError): _ = WakewordTrigger(fft_window_type="hamming")