async def speak_sentence(self,
                             sentence: str,
                             language: typing.Optional[str] = None):
        """Speak a sentence using text to speech."""
        tts_id = str(uuid4())

        def handle_finished():
            while True:
                _, message = yield

                if isinstance(message, TtsSayFinished) and (message.id
                                                            == tts_id):
                    return True, None

        say = TtsSay(id=tts_id, text=sentence, siteId=self.siteId)
        if language:
            say.lang = language

        messages = [say]
        topics = [TtsSayFinished.topic()]

        # Expecting only a single result
        async for result in self.publish_wait(handle_finished(), messages,
                                              topics):
            return result
Example #2
0
    def test_http_mqtt_text_to_speech(self):
        """Test text-to-speech HTTP endpoint"""
        text = "This is a test."
        self.client.subscribe(TtsSay.topic())
        self.client.subscribe(AudioPlayBytes.topic(site_id=self.site_id))
        self.client.subscribe(TtsSayFinished.topic())

        response = requests.post(
            self.api_url("text-to-speech"),
            data=text,
            params={
                "siteId": self.site_id,
                "sessionId": self.session_id
            },
        )
        self.check_status(response)

        wav_data = response.content
        self.assertGreater(len(wav_data), 0)

        # Check tts/say
        tts_say_msg = self.mqtt_messages.get(timeout=5)
        self.assertTrue(TtsSay.is_topic(tts_say_msg.topic))

        tts_say = TtsSay.from_dict(json.loads(tts_say_msg.payload))
        self.assertEqual(tts_say.site_id, self.site_id)
        self.assertEqual(tts_say.session_id, self.session_id)
        self.assertEqual(tts_say.text, text)

        # Check audioServer/playBytes
        play_bytes_msg = self.mqtt_messages.get(timeout=5)
        self.assertTrue(AudioPlayBytes.is_topic(play_bytes_msg.topic))
        self.assertEqual(AudioPlayBytes.get_site_id(play_bytes_msg.topic),
                         self.site_id)
        self.assertEqual(play_bytes_msg.payload, wav_data)

        # Check tts/sayFinished
        tts_finished_msg = self.mqtt_messages.get(timeout=5)
        self.assertTrue(TtsSayFinished.is_topic(tts_finished_msg.topic))

        tts_finished = TtsSayFinished.from_dict(
            json.loads(tts_finished_msg.payload))
        self.assertEqual(tts_finished.session_id, self.session_id)

        # Ask for repeat
        response = requests.post(self.api_url("text-to-speech"),
                                 params={"repeat": "true"})
        self.check_status(response)
        self.assertEqual(wav_data, response.content)
Example #3
0
    def on_message(self, client, userdata, msg):
        """Received message from MQTT broker."""
        try:
            _LOGGER.debug("Received %s byte(s) on %s", len(msg.payload),
                          msg.topic)

            if msg.topic == TtsSay.topic():
                json_payload = json.loads(msg.payload or "{}")

                if not self._check_siteId(json_payload):
                    return

                say = TtsSay(**json_payload)
                self.handle_say(say)
        except Exception:
            _LOGGER.exception("on_message")
    async def handle_intent(
            self, nlu_intent: NluIntent) -> typing.AsyncIterable[TtsSay]:
        """Handle intent with Home Assistant."""
        try:
            if self.handle_type == HandleType.EVENT:
                await self.handle_home_assistant_event(nlu_intent)
            elif self.handle_type == HandleType.INTENT:
                response_dict = await self.handle_home_assistant_intent(
                    nlu_intent)
                assert response_dict, f"No response from {self.url}"

                # Check for speech response
                tts_text = (response_dict.get("speech",
                                              {}).get("plain",
                                                      {}).get("speech", ""))
                if tts_text:
                    # Forward to TTS system
                    yield TtsSay(
                        text=tts_text,
                        id=str(uuid4()),
                        site_id=nlu_intent.site_id,
                        session_id=nlu_intent.session_id,
                    )
            else:
                raise ValueError(
                    f"Unsupported handle_type (got {self.handle_type})")
        except Exception:
            _LOGGER.exception("handle_intent")
    async def say(
        self,
        text: str,
        site_id="default",
        session_id="",
        request_id: typing.Optional[str] = None,
        block: bool = True,
    ) -> typing.AsyncIterable[
        typing.Union[
            TtsSay, HotwordToggleOn, HotwordToggleOff, AsrToggleOn, AsrToggleOff
        ]
    ]:
        """Send text to TTS system and wait for reply."""
        finished_event = asyncio.Event()
        finished_id = request_id or str(uuid4())
        self.message_events[TtsSayFinished][finished_id] = finished_event

        # Disable ASR/hotword at site
        yield HotwordToggleOff(site_id=site_id, reason=HotwordToggleReason.TTS_SAY)
        yield AsrToggleOff(site_id=site_id, reason=AsrToggleReason.TTS_SAY)

        # Wait for messages to be delivered
        await asyncio.sleep(self.toggle_delay)

        try:
            # Forward to TTS
            _LOGGER.debug("Say: %s", text)
            yield TtsSay(
                id=finished_id, site_id=site_id, session_id=session_id, text=text
            )

            if block:
                # Wait for finished event
                say_finished_timeout = 10.0
                if self.say_chars_per_second > 0:
                    # Estimate timeout based on text length
                    say_finished_timeout = max(
                        say_finished_timeout, len(text) / self.say_chars_per_second
                    )

                _LOGGER.debug(
                    "Waiting for sayFinished (id=%s, timeout=%s)",
                    finished_id,
                    say_finished_timeout,
                )
                await asyncio.wait_for(
                    finished_event.wait(), timeout=say_finished_timeout
                )
        except asyncio.TimeoutError:
            _LOGGER.warning("Did not receive sayFinished before timeout")
        except Exception:
            _LOGGER.exception("say")
        finally:
            # Wait for audio to finish play
            await asyncio.sleep(self.toggle_delay)

            # Re-enable ASR/hotword at site
            yield HotwordToggleOn(site_id=site_id, reason=HotwordToggleReason.TTS_SAY)
            yield AsrToggleOn(site_id=site_id, reason=AsrToggleReason.TTS_SAY)
Example #6
0
 def on_connect(self, client, userdata, flags, rc):
     """Connected to MQTT broker."""
     try:
         topics = [TtsSay.topic()]
         for topic in topics:
             self.client.subscribe(topic)
             _LOGGER.debug("Subscribed to %s", topic)
     except Exception:
         _LOGGER.exception("on_connect")
Example #7
0
    async def say_and_wait(self, text: str) -> typing.Iterable[TtsSay]:
        """Send text to TTS system and wait for reply."""
        assert self.session, "No session"
        self.say_finished_event.clear()

        # Forward to TTS
        _LOGGER.debug("Say: %s", text)
        yield TtsSay(siteId=self.siteId,
                     sessionId=self.session.sessionId,
                     text=text)

        # Wait for finished response (with timeout)
        try:
            await asyncio.wait_for(self.say_finished_event.wait(),
                                   timeout=self.say_finished_timeout)
        except asyncio.TimeoutError:
            _LOGGER.exception("say_and_wait")
Example #8
0
    def test_no_play(self):
        """Test text-to-speech HTTP endpoint with play=false"""
        text = "This is a test."
        self.client.subscribe(TtsSay.topic())
        self.client.subscribe(AudioPlayBytes.topic(site_id=self.site_id))
        self.client.subscribe(TtsSayFinished.topic())
        self.client.subscribe(AudioToggleOff.topic())
        self.client.subscribe(AudioToggleOn.topic())

        response = requests.post(
            self.api_url("text-to-speech"),
            data=text,
            params={
                "siteId": self.site_id,
                "sessionId": self.session_id,
                "play": "false",
            },
        )
        self.check_status(response)

        wav_data = response.content
        self.assertGreater(len(wav_data), 0)

        # Check audioServer/toggleOff
        audio_off_msg = self.mqtt_messages.get(timeout=5)
        self.assertTrue(AudioToggleOff.is_topic(audio_off_msg.topic))

        audio_off = AudioToggleOff.from_dict(json.loads(audio_off_msg.payload))
        self.assertEqual(audio_off.site_id, self.site_id)

        # Check tts/say
        tts_say_msg = self.mqtt_messages.get(timeout=5)
        self.assertTrue(TtsSay.is_topic(tts_say_msg.topic))

        tts_say = TtsSay.from_dict(json.loads(tts_say_msg.payload))
        self.assertEqual(tts_say.site_id, self.site_id)
        self.assertEqual(tts_say.session_id, self.session_id)
        self.assertEqual(tts_say.text, text)

        # Check audioServer/playBytes (will be ignored by audio output system)
        play_bytes_msg = self.mqtt_messages.get(timeout=5)
        self.assertTrue(AudioPlayBytes.is_topic(play_bytes_msg.topic))
        self.assertEqual(AudioPlayBytes.get_site_id(play_bytes_msg.topic),
                         self.site_id)
        self.assertEqual(play_bytes_msg.payload, wav_data)

        # Check tts/sayFinished
        tts_finished_msg = self.mqtt_messages.get(timeout=5)
        self.assertTrue(TtsSayFinished.is_topic(tts_finished_msg.topic))

        tts_finished = TtsSayFinished.from_dict(
            json.loads(tts_finished_msg.payload))
        self.assertEqual(tts_finished.site_id, self.site_id)
        self.assertEqual(tts_finished.session_id, self.session_id)

        # Check audioServer/toggleOn
        audio_on_msg = self.mqtt_messages.get(timeout=5)
        self.assertTrue(AudioToggleOn.is_topic(audio_on_msg.topic))

        audio_on = AudioToggleOn.from_dict(json.loads(audio_on_msg.payload))
        self.assertEqual(audio_on.site_id, self.site_id)
    async def speak_sentence(
        self,
        sentence: str,
        language: typing.Optional[str] = None,
        capture_audio: bool = False,
        wait_play_finished: bool = True,
        site_id: typing.Optional[str] = None,
        session_id: str = "",
    ) -> typing.Tuple[TtsSayFinished, typing.Optional[AudioPlayBytes]]:
        """Speak a sentence using text to speech."""
        if (self.sound_system == "dummy") and (
            not self.satellite_site_ids["text_to_speech"]
        ):
            raise TtsException("No text to speech system configured")

        site_id = site_id or self.site_id
        tts_id = str(uuid4())

        def handle_finished():
            say_finished: typing.Optional[TtsSayFinished] = None
            play_bytes: typing.Optional[
                AudioPlayBytes
            ] = None if capture_audio else True
            play_finished = not wait_play_finished

            while True:
                topic, message = yield

                if isinstance(message, TtsSayFinished) and (message.id == tts_id):
                    say_finished = message
                    play_finished = True
                elif isinstance(message, TtsError):
                    # Assume audio playback didn't happen
                    say_finished = message
                    play_bytes = True
                    play_finished = True
                elif isinstance(message, AudioPlayBytes):
                    request_id = AudioPlayBytes.get_request_id(topic)
                    if request_id == tts_id:
                        play_bytes = message
                elif isinstance(message, AudioPlayError):
                    play_bytes = message

                if say_finished and play_bytes and play_finished:
                    return (say_finished, play_bytes)

        say = TtsSay(id=tts_id, text=sentence, site_id=site_id, session_id=session_id)
        if language:
            say.lang = language

        messages = [say]
        message_types: typing.List[typing.Type[Message]] = [
            TtsSayFinished,
            TtsError,
            AudioPlayBytes,
            AudioPlayError,
        ]

        # Expecting only a single result
        result = None
        async for response in self.publish_wait(
            handle_finished(), messages, message_types
        ):
            result = response

        assert isinstance(result, tuple), f"Expected tuple, got {result}"
        say_response, play_response = result

        if isinstance(say_response, TtsError):
            _LOGGER.error(say_response)
            raise TtsException(say_response.error)

        assert isinstance(say_response, TtsSayFinished), say_response

        if isinstance(play_response, AudioPlayError):
            _LOGGER.error(play_response)
            raise AudioServerException(play_response.error)

        if capture_audio:
            assert isinstance(play_response, AudioPlayBytes), play_response

        return typing.cast(
            typing.Tuple[TtsSayFinished, typing.Optional[AudioPlayBytes]], result
        )
Example #10
0
def test_tts_say():
    """Test TtsSay."""
    assert TtsSay.topic() == "hermes/tts/say"
Example #11
0
    async def handle_intent(
        self, intent: NluIntent
    ) -> typing.AsyncIterable[typing.Union[TtsSay]]:
        """Handle intent with remote server or local command."""
        try:
            if not self.handle_enabled:
                _LOGGER.debug("Intent handling is disabled")
                return

            tts_text = ""
            intent_dict = intent.to_rhasspy_dict()

            # Add site_id
            intent_dict["site_id"] = intent.site_id

            if self.handle_url:
                # Remote server
                _LOGGER.debug(self.handle_url)

                async with self.http_session.post(
                    self.handle_url, json=intent_dict, ssl=self.ssl_context
                ) as response:
                    response.raise_for_status()
                    response_dict = await response.json()

                # Check for speech response
                tts_text = response_dict.get("speech", {}).get("text", "")
            elif self.handle_command:
                intent_json = json.dumps(intent_dict)

                # Local handling command
                _LOGGER.debug(self.handle_command)

                proc = await asyncio.create_subprocess_exec(
                    *self.handle_command,
                    stdin=asyncio.subprocess.PIPE,
                    stdout=asyncio.subprocess.PIPE,
                    stderr=asyncio.subprocess.PIPE,
                )

                output, error = await proc.communicate(intent_json.encode())

                if error:
                    _LOGGER.debug(error.decode())

                try:
                    response_dict = json.loads(output)

                    # Check for speech response
                    tts_text = response_dict.get("speech", {}).get("text", "")
                except json.JSONDecodeError as e:
                    if output:
                        # Only report error if non-empty output
                        _LOGGER.warning("Failed to parse output as JSON: %s", e)
                        _LOGGER.warning("Output: %s", output)
            else:
                _LOGGER.warning("Can't handle intent. No handle URL or command.")

            if tts_text:
                # Forward to TTS system
                yield TtsSay(
                    text=tts_text,
                    id=str(uuid4()),
                    site_id=intent.site_id,
                    session_id=intent.session_id,
                )
        except Exception:
            _LOGGER.exception("handle_intent")