async def speak_sentence(self, sentence: str, language: typing.Optional[str] = None): """Speak a sentence using text to speech.""" tts_id = str(uuid4()) def handle_finished(): while True: _, message = yield if isinstance(message, TtsSayFinished) and (message.id == tts_id): return True, None say = TtsSay(id=tts_id, text=sentence, siteId=self.siteId) if language: say.lang = language messages = [say] topics = [TtsSayFinished.topic()] # Expecting only a single result async for result in self.publish_wait(handle_finished(), messages, topics): return result
def test_http_mqtt_text_to_speech(self): """Test text-to-speech HTTP endpoint""" text = "This is a test." self.client.subscribe(TtsSay.topic()) self.client.subscribe(AudioPlayBytes.topic(site_id=self.site_id)) self.client.subscribe(TtsSayFinished.topic()) response = requests.post( self.api_url("text-to-speech"), data=text, params={ "siteId": self.site_id, "sessionId": self.session_id }, ) self.check_status(response) wav_data = response.content self.assertGreater(len(wav_data), 0) # Check tts/say tts_say_msg = self.mqtt_messages.get(timeout=5) self.assertTrue(TtsSay.is_topic(tts_say_msg.topic)) tts_say = TtsSay.from_dict(json.loads(tts_say_msg.payload)) self.assertEqual(tts_say.site_id, self.site_id) self.assertEqual(tts_say.session_id, self.session_id) self.assertEqual(tts_say.text, text) # Check audioServer/playBytes play_bytes_msg = self.mqtt_messages.get(timeout=5) self.assertTrue(AudioPlayBytes.is_topic(play_bytes_msg.topic)) self.assertEqual(AudioPlayBytes.get_site_id(play_bytes_msg.topic), self.site_id) self.assertEqual(play_bytes_msg.payload, wav_data) # Check tts/sayFinished tts_finished_msg = self.mqtt_messages.get(timeout=5) self.assertTrue(TtsSayFinished.is_topic(tts_finished_msg.topic)) tts_finished = TtsSayFinished.from_dict( json.loads(tts_finished_msg.payload)) self.assertEqual(tts_finished.session_id, self.session_id) # Ask for repeat response = requests.post(self.api_url("text-to-speech"), params={"repeat": "true"}) self.check_status(response) self.assertEqual(wav_data, response.content)
def on_message(self, client, userdata, msg): """Received message from MQTT broker.""" try: _LOGGER.debug("Received %s byte(s) on %s", len(msg.payload), msg.topic) if msg.topic == TtsSay.topic(): json_payload = json.loads(msg.payload or "{}") if not self._check_siteId(json_payload): return say = TtsSay(**json_payload) self.handle_say(say) except Exception: _LOGGER.exception("on_message")
async def handle_intent( self, nlu_intent: NluIntent) -> typing.AsyncIterable[TtsSay]: """Handle intent with Home Assistant.""" try: if self.handle_type == HandleType.EVENT: await self.handle_home_assistant_event(nlu_intent) elif self.handle_type == HandleType.INTENT: response_dict = await self.handle_home_assistant_intent( nlu_intent) assert response_dict, f"No response from {self.url}" # Check for speech response tts_text = (response_dict.get("speech", {}).get("plain", {}).get("speech", "")) if tts_text: # Forward to TTS system yield TtsSay( text=tts_text, id=str(uuid4()), site_id=nlu_intent.site_id, session_id=nlu_intent.session_id, ) else: raise ValueError( f"Unsupported handle_type (got {self.handle_type})") except Exception: _LOGGER.exception("handle_intent")
async def say( self, text: str, site_id="default", session_id="", request_id: typing.Optional[str] = None, block: bool = True, ) -> typing.AsyncIterable[ typing.Union[ TtsSay, HotwordToggleOn, HotwordToggleOff, AsrToggleOn, AsrToggleOff ] ]: """Send text to TTS system and wait for reply.""" finished_event = asyncio.Event() finished_id = request_id or str(uuid4()) self.message_events[TtsSayFinished][finished_id] = finished_event # Disable ASR/hotword at site yield HotwordToggleOff(site_id=site_id, reason=HotwordToggleReason.TTS_SAY) yield AsrToggleOff(site_id=site_id, reason=AsrToggleReason.TTS_SAY) # Wait for messages to be delivered await asyncio.sleep(self.toggle_delay) try: # Forward to TTS _LOGGER.debug("Say: %s", text) yield TtsSay( id=finished_id, site_id=site_id, session_id=session_id, text=text ) if block: # Wait for finished event say_finished_timeout = 10.0 if self.say_chars_per_second > 0: # Estimate timeout based on text length say_finished_timeout = max( say_finished_timeout, len(text) / self.say_chars_per_second ) _LOGGER.debug( "Waiting for sayFinished (id=%s, timeout=%s)", finished_id, say_finished_timeout, ) await asyncio.wait_for( finished_event.wait(), timeout=say_finished_timeout ) except asyncio.TimeoutError: _LOGGER.warning("Did not receive sayFinished before timeout") except Exception: _LOGGER.exception("say") finally: # Wait for audio to finish play await asyncio.sleep(self.toggle_delay) # Re-enable ASR/hotword at site yield HotwordToggleOn(site_id=site_id, reason=HotwordToggleReason.TTS_SAY) yield AsrToggleOn(site_id=site_id, reason=AsrToggleReason.TTS_SAY)
def on_connect(self, client, userdata, flags, rc): """Connected to MQTT broker.""" try: topics = [TtsSay.topic()] for topic in topics: self.client.subscribe(topic) _LOGGER.debug("Subscribed to %s", topic) except Exception: _LOGGER.exception("on_connect")
async def say_and_wait(self, text: str) -> typing.Iterable[TtsSay]: """Send text to TTS system and wait for reply.""" assert self.session, "No session" self.say_finished_event.clear() # Forward to TTS _LOGGER.debug("Say: %s", text) yield TtsSay(siteId=self.siteId, sessionId=self.session.sessionId, text=text) # Wait for finished response (with timeout) try: await asyncio.wait_for(self.say_finished_event.wait(), timeout=self.say_finished_timeout) except asyncio.TimeoutError: _LOGGER.exception("say_and_wait")
def test_no_play(self): """Test text-to-speech HTTP endpoint with play=false""" text = "This is a test." self.client.subscribe(TtsSay.topic()) self.client.subscribe(AudioPlayBytes.topic(site_id=self.site_id)) self.client.subscribe(TtsSayFinished.topic()) self.client.subscribe(AudioToggleOff.topic()) self.client.subscribe(AudioToggleOn.topic()) response = requests.post( self.api_url("text-to-speech"), data=text, params={ "siteId": self.site_id, "sessionId": self.session_id, "play": "false", }, ) self.check_status(response) wav_data = response.content self.assertGreater(len(wav_data), 0) # Check audioServer/toggleOff audio_off_msg = self.mqtt_messages.get(timeout=5) self.assertTrue(AudioToggleOff.is_topic(audio_off_msg.topic)) audio_off = AudioToggleOff.from_dict(json.loads(audio_off_msg.payload)) self.assertEqual(audio_off.site_id, self.site_id) # Check tts/say tts_say_msg = self.mqtt_messages.get(timeout=5) self.assertTrue(TtsSay.is_topic(tts_say_msg.topic)) tts_say = TtsSay.from_dict(json.loads(tts_say_msg.payload)) self.assertEqual(tts_say.site_id, self.site_id) self.assertEqual(tts_say.session_id, self.session_id) self.assertEqual(tts_say.text, text) # Check audioServer/playBytes (will be ignored by audio output system) play_bytes_msg = self.mqtt_messages.get(timeout=5) self.assertTrue(AudioPlayBytes.is_topic(play_bytes_msg.topic)) self.assertEqual(AudioPlayBytes.get_site_id(play_bytes_msg.topic), self.site_id) self.assertEqual(play_bytes_msg.payload, wav_data) # Check tts/sayFinished tts_finished_msg = self.mqtt_messages.get(timeout=5) self.assertTrue(TtsSayFinished.is_topic(tts_finished_msg.topic)) tts_finished = TtsSayFinished.from_dict( json.loads(tts_finished_msg.payload)) self.assertEqual(tts_finished.site_id, self.site_id) self.assertEqual(tts_finished.session_id, self.session_id) # Check audioServer/toggleOn audio_on_msg = self.mqtt_messages.get(timeout=5) self.assertTrue(AudioToggleOn.is_topic(audio_on_msg.topic)) audio_on = AudioToggleOn.from_dict(json.loads(audio_on_msg.payload)) self.assertEqual(audio_on.site_id, self.site_id)
async def speak_sentence( self, sentence: str, language: typing.Optional[str] = None, capture_audio: bool = False, wait_play_finished: bool = True, site_id: typing.Optional[str] = None, session_id: str = "", ) -> typing.Tuple[TtsSayFinished, typing.Optional[AudioPlayBytes]]: """Speak a sentence using text to speech.""" if (self.sound_system == "dummy") and ( not self.satellite_site_ids["text_to_speech"] ): raise TtsException("No text to speech system configured") site_id = site_id or self.site_id tts_id = str(uuid4()) def handle_finished(): say_finished: typing.Optional[TtsSayFinished] = None play_bytes: typing.Optional[ AudioPlayBytes ] = None if capture_audio else True play_finished = not wait_play_finished while True: topic, message = yield if isinstance(message, TtsSayFinished) and (message.id == tts_id): say_finished = message play_finished = True elif isinstance(message, TtsError): # Assume audio playback didn't happen say_finished = message play_bytes = True play_finished = True elif isinstance(message, AudioPlayBytes): request_id = AudioPlayBytes.get_request_id(topic) if request_id == tts_id: play_bytes = message elif isinstance(message, AudioPlayError): play_bytes = message if say_finished and play_bytes and play_finished: return (say_finished, play_bytes) say = TtsSay(id=tts_id, text=sentence, site_id=site_id, session_id=session_id) if language: say.lang = language messages = [say] message_types: typing.List[typing.Type[Message]] = [ TtsSayFinished, TtsError, AudioPlayBytes, AudioPlayError, ] # Expecting only a single result result = None async for response in self.publish_wait( handle_finished(), messages, message_types ): result = response assert isinstance(result, tuple), f"Expected tuple, got {result}" say_response, play_response = result if isinstance(say_response, TtsError): _LOGGER.error(say_response) raise TtsException(say_response.error) assert isinstance(say_response, TtsSayFinished), say_response if isinstance(play_response, AudioPlayError): _LOGGER.error(play_response) raise AudioServerException(play_response.error) if capture_audio: assert isinstance(play_response, AudioPlayBytes), play_response return typing.cast( typing.Tuple[TtsSayFinished, typing.Optional[AudioPlayBytes]], result )
def test_tts_say(): """Test TtsSay.""" assert TtsSay.topic() == "hermes/tts/say"
async def handle_intent( self, intent: NluIntent ) -> typing.AsyncIterable[typing.Union[TtsSay]]: """Handle intent with remote server or local command.""" try: if not self.handle_enabled: _LOGGER.debug("Intent handling is disabled") return tts_text = "" intent_dict = intent.to_rhasspy_dict() # Add site_id intent_dict["site_id"] = intent.site_id if self.handle_url: # Remote server _LOGGER.debug(self.handle_url) async with self.http_session.post( self.handle_url, json=intent_dict, ssl=self.ssl_context ) as response: response.raise_for_status() response_dict = await response.json() # Check for speech response tts_text = response_dict.get("speech", {}).get("text", "") elif self.handle_command: intent_json = json.dumps(intent_dict) # Local handling command _LOGGER.debug(self.handle_command) proc = await asyncio.create_subprocess_exec( *self.handle_command, stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, ) output, error = await proc.communicate(intent_json.encode()) if error: _LOGGER.debug(error.decode()) try: response_dict = json.loads(output) # Check for speech response tts_text = response_dict.get("speech", {}).get("text", "") except json.JSONDecodeError as e: if output: # Only report error if non-empty output _LOGGER.warning("Failed to parse output as JSON: %s", e) _LOGGER.warning("Output: %s", output) else: _LOGGER.warning("Can't handle intent. No handle URL or command.") if tts_text: # Forward to TTS system yield TtsSay( text=tts_text, id=str(uuid4()), site_id=intent.site_id, session_id=intent.session_id, ) except Exception: _LOGGER.exception("handle_intent")