async def handle_get_voices( self, get_voices: GetVoices ) -> typing.AsyncIterable[typing.Union[Voices, TtsError]]: """Publish list of available voices""" voices: typing.List[Voice] = [] try: assert self.voices_command, "No voices command" _LOGGER.debug(self.voices_command) lines = (subprocess.check_output(self.voices_command, shell=True).decode().splitlines()) # Read a voice on each line. # The line must start with a voice ID, optionally follow by # whitespace and a description. for line in lines: line = line.strip() if line: # ID description with whitespace parts = line.split(maxsplit=1) voice = Voice(voice_id=parts[0]) if len(parts) > 1: voice.description = parts[1] voices.append(voice) except Exception as e: _LOGGER.exception("handle_get_voices") yield TtsError(error=str(e), context=get_voices.id, site_id=get_voices.site_id) # Publish response yield Voices(voices=voices, id=get_voices.id, site_id=get_voices.site_id)
async def handle_say( self, say: TtsSay ) -> typing.AsyncIterable[ typing.Union[typing.Tuple[AudioPlayBytes, TopicArgs], TtsSayFinished, TtsError] ]: """Do text to speech.""" try: if self.tts_url: # Remote text to speech server _LOGGER.debug(self.tts_url) params = {"play": "false"} if say.lang: # Add ?language=<lang> query parameter params["language"] = say.lang async with self.http_session.post( self.tts_url, data=say.text, params=params, ssl=self.ssl_context ) as response: response.raise_for_status() content_type = response.headers["Content-Type"] if content_type != "audio/wav": _LOGGER.warning( "Expected audio/wav content type, got %s", content_type ) wav_bytes = await response.read() if wav_bytes: yield ( AudioPlayBytes(wav_bytes=wav_bytes), {"site_id": say.site_id, "request_id": say.id}, ) else: _LOGGER.error("Received empty response") except Exception as e: _LOGGER.exception("handle_say") yield TtsError( error=str(e), context=say.id, site_id=say.site_id, session_id=say.session_id, ) finally: yield TtsSayFinished( id=say.id, site_id=say.site_id, session_id=say.session_id )
async def handle_get_voices( self, get_voices: GetVoices ) -> typing.AsyncIterable[typing.Union[Voices, TtsError]]: """Publish list of available voices.""" voices: typing.List[Voice] = [] try: for voice in self.voices: voices.append(Voice(voice_id=voice)) except Exception as e: _LOGGER.exception("handle_get_voices") yield TtsError(error=str(e), context=get_voices.id, site_id=get_voices.site_id) # Publish response yield Voices(voices=voices, id=get_voices.id, site_id=get_voices.site_id)
async def handle_say( self, say: TtsSay ) -> typing.AsyncIterable[typing.Union[TtsSayFinished, typing.Tuple[ AudioPlayBytes, TopicArgs], TtsError, AudioPlayError, ]]: """Run TTS system and publish WAV data.""" wav_bytes: typing.Optional[bytes] = None temp_wav_path: typing.Optional[str] = None try: language = say.lang or self.language format_args = {"lang": language} if self.use_temp_wav: # WAV audio will be stored in a temporary file temp_wav_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name # Path to WAV file format_args["file"] = temp_wav_path if self.use_jinja2: # Interpret TTS command as a Jinja2 template if not self.jinja2_template: from jinja2 import Environment self.jinja2_template = Environment().from_string( self.tts_command) tts_command_str = self.jinja2_template.render(**format_args) else: # Interpret TTS command as a formatted string tts_command_str = self.tts_command.format(**format_args) say_command = shlex.split(tts_command_str) if not self.text_on_stdin: # Text as command-line arguments say_command += [say.text] _LOGGER.debug(say_command) # WAV audio on stdout, text as command-line argument proc_stdin: typing.Optional[int] = None proc_stdout: typing.Optional[int] = subprocess.PIPE proc_input: typing.Optional[bytes] = None if self.use_temp_wav: # WAV audio from file proc_stdout = None if self.text_on_stdin: # Text from standard in proc_stdin = subprocess.PIPE proc_input = say.text.encode() # Run TTS process proc = subprocess.Popen(say_command, stdin=proc_stdin, stdout=proc_stdout) wav_bytes, _ = proc.communicate(input=proc_input) proc.wait() assert proc.returncode == 0, f"Non-zero exit code: {proc.returncode}" if self.use_temp_wav and temp_wav_path: with open(temp_wav_path, "rb") as wav_file: wav_bytes = wav_file.read() assert wav_bytes, "No WAV data received" _LOGGER.debug("Got %s byte(s) of WAV data", len(wav_bytes)) if wav_bytes: volume = self.volume if say.volume is not None: # Override with message volume volume = say.volume if volume is not None: wav_bytes = TtsHermesMqtt.change_volume(wav_bytes, volume) finished_event = asyncio.Event() # Play WAV if self.play_command: try: # Play locally play_command = shlex.split( self.play_command.format(lang=say.lang)) _LOGGER.debug(play_command) subprocess.run(play_command, input=wav_bytes, check=True) # Don't wait for playFinished finished_event.set() except Exception as e: _LOGGER.exception("play_command") yield AudioPlayError( error=str(e), context=say.id, site_id=say.site_id, session_id=say.session_id, ) else: # Publish playBytes request_id = say.id or str(uuid4()) self.play_finished_events[request_id] = finished_event yield ( AudioPlayBytes(wav_bytes=wav_bytes), { "site_id": say.site_id, "request_id": request_id }, ) try: # Wait for audio to finished playing or timeout wav_duration = get_wav_duration(wav_bytes) wav_timeout = wav_duration + self.finished_timeout_extra _LOGGER.debug("Waiting for play finished (timeout=%s)", wav_timeout) await asyncio.wait_for(finished_event.wait(), timeout=wav_timeout) except asyncio.TimeoutError: _LOGGER.warning( "Did not receive playFinished before timeout") except Exception as e: _LOGGER.exception("handle_say") yield TtsError( error=str(e), context=say.id, site_id=say.site_id, session_id=say.session_id, ) finally: yield TtsSayFinished(id=say.id, site_id=say.site_id, session_id=say.session_id) if temp_wav_path: try: os.unlink(temp_wav_path) except Exception: pass
async def handle_say( self, say: TtsSay ) -> typing.AsyncIterable[typing.Union[TtsSayFinished, typing.Tuple[ AudioPlayBytes, TopicArgs], TtsError, AudioPlayError, ]]: """Run TTS system and publish WAV data.""" wav_bytes: typing.Optional[bytes] = None try: # Try to pull WAV from cache first voice_name = say.lang or self.default_voice voice = self.voices.get(voice_name) assert voice is not None, f"No voice named {voice_name}" # Check cache sentence_hash = TtsHermesMqtt.get_sentence_hash( voice.cache_id, say.text) wav_bytes = None from_cache = False cached_wav_path = None if self.cache_dir: # Create cache directory in profile if it doesn't exist self.cache_dir.mkdir(parents=True, exist_ok=True) # Load from cache cached_wav_path = self.cache_dir / f"{sentence_hash.hexdigest()}.wav" if cached_wav_path.is_file(): # Use WAV file from cache _LOGGER.debug("Using WAV from cache: %s", cached_wav_path) wav_bytes = cached_wav_path.read_bytes() from_cache = True if not wav_bytes: # Run text to speech _LOGGER.debug("Synthesizing '%s' (voice=%s)", say.text, voice_name) wav_bytes = self.synthesize(voice, say.text) assert wav_bytes, "No WAV data synthesized" _LOGGER.debug("Got %s byte(s) of WAV data", len(wav_bytes)) # Adjust volume volume = self.volume if say.volume is not None: # Override with message volume volume = say.volume original_wav_bytes = wav_bytes if volume is not None: wav_bytes = TtsHermesMqtt.change_volume(wav_bytes, volume) finished_event = asyncio.Event() # Play WAV if self.play_command: try: # Play locally play_command = shlex.split( self.play_command.format(lang=say.lang)) _LOGGER.debug(play_command) subprocess.run(play_command, input=wav_bytes, check=True) # Don't wait for playFinished finished_event.set() except Exception as e: _LOGGER.exception("play_command") yield AudioPlayError( error=str(e), context=say.id, site_id=say.site_id, session_id=say.session_id, ) else: # Publish playBytes request_id = say.id or str(uuid4()) self.play_finished_events[request_id] = finished_event yield ( AudioPlayBytes(wav_bytes=wav_bytes), { "site_id": say.site_id, "request_id": request_id }, ) # Save to cache if (not from_cache) and cached_wav_path: with open(cached_wav_path, "wb") as cached_wav_file: cached_wav_file.write(original_wav_bytes) try: # Wait for audio to finished playing or timeout wav_duration = TtsHermesMqtt.get_wav_duration(wav_bytes) wav_timeout = wav_duration + self.finished_timeout_extra _LOGGER.debug("Waiting for play finished (timeout=%s)", wav_timeout) await asyncio.wait_for(finished_event.wait(), timeout=wav_timeout) except asyncio.TimeoutError: _LOGGER.warning("Did not receive playFinished before timeout") except Exception as e: _LOGGER.exception("handle_say") yield TtsError( error=str(e), context=say.id, site_id=say.site_id, session_id=say.session_id, ) finally: yield TtsSayFinished(id=say.id, site_id=say.site_id, session_id=say.session_id)