예제 #1
0
    async def handle_get_voices(
        self, get_voices: GetVoices
    ) -> typing.AsyncIterable[typing.Union[Voices, TtsError]]:
        """Publish list of available voices"""
        voices: typing.List[Voice] = []
        try:
            assert self.voices_command, "No voices command"
            _LOGGER.debug(self.voices_command)

            lines = (subprocess.check_output(self.voices_command,
                                             shell=True).decode().splitlines())

            # Read a voice on each line.
            # The line must start with a voice ID, optionally follow by
            # whitespace and a description.
            for line in lines:
                line = line.strip()
                if line:
                    # ID description with whitespace
                    parts = line.split(maxsplit=1)
                    voice = Voice(voice_id=parts[0])
                    if len(parts) > 1:
                        voice.description = parts[1]

                    voices.append(voice)
        except Exception as e:
            _LOGGER.exception("handle_get_voices")
            yield TtsError(error=str(e),
                           context=get_voices.id,
                           site_id=get_voices.site_id)

        # Publish response
        yield Voices(voices=voices,
                     id=get_voices.id,
                     site_id=get_voices.site_id)
예제 #2
0
    async def handle_say(
        self, say: TtsSay
    ) -> typing.AsyncIterable[
        typing.Union[typing.Tuple[AudioPlayBytes, TopicArgs], TtsSayFinished, TtsError]
    ]:
        """Do text to speech."""
        try:
            if self.tts_url:
                # Remote text to speech server
                _LOGGER.debug(self.tts_url)

                params = {"play": "false"}
                if say.lang:
                    # Add ?language=<lang> query parameter
                    params["language"] = say.lang

                async with self.http_session.post(
                    self.tts_url, data=say.text, params=params, ssl=self.ssl_context
                ) as response:
                    response.raise_for_status()
                    content_type = response.headers["Content-Type"]
                    if content_type != "audio/wav":
                        _LOGGER.warning(
                            "Expected audio/wav content type, got %s", content_type
                        )

                    wav_bytes = await response.read()
                    if wav_bytes:
                        yield (
                            AudioPlayBytes(wav_bytes=wav_bytes),
                            {"site_id": say.site_id, "request_id": say.id},
                        )
                    else:
                        _LOGGER.error("Received empty response")
        except Exception as e:
            _LOGGER.exception("handle_say")
            yield TtsError(
                error=str(e),
                context=say.id,
                site_id=say.site_id,
                session_id=say.session_id,
            )
        finally:
            yield TtsSayFinished(
                id=say.id, site_id=say.site_id, session_id=say.session_id
            )
예제 #3
0
    async def handle_get_voices(
        self, get_voices: GetVoices
    ) -> typing.AsyncIterable[typing.Union[Voices, TtsError]]:
        """Publish list of available voices."""
        voices: typing.List[Voice] = []
        try:
            for voice in self.voices:
                voices.append(Voice(voice_id=voice))
        except Exception as e:
            _LOGGER.exception("handle_get_voices")
            yield TtsError(error=str(e),
                           context=get_voices.id,
                           site_id=get_voices.site_id)

        # Publish response
        yield Voices(voices=voices,
                     id=get_voices.id,
                     site_id=get_voices.site_id)
예제 #4
0
    async def handle_say(
        self, say: TtsSay
    ) -> typing.AsyncIterable[typing.Union[TtsSayFinished, typing.Tuple[
            AudioPlayBytes, TopicArgs], TtsError, AudioPlayError, ]]:
        """Run TTS system and publish WAV data."""
        wav_bytes: typing.Optional[bytes] = None
        temp_wav_path: typing.Optional[str] = None

        try:
            language = say.lang or self.language
            format_args = {"lang": language}

            if self.use_temp_wav:
                # WAV audio will be stored in a temporary file
                temp_wav_path = tempfile.NamedTemporaryFile(suffix=".wav",
                                                            delete=False).name

                # Path to WAV file
                format_args["file"] = temp_wav_path

            if self.use_jinja2:
                # Interpret TTS command as a Jinja2 template
                if not self.jinja2_template:
                    from jinja2 import Environment

                    self.jinja2_template = Environment().from_string(
                        self.tts_command)

                tts_command_str = self.jinja2_template.render(**format_args)
            else:
                # Interpret TTS command as a formatted string
                tts_command_str = self.tts_command.format(**format_args)

            say_command = shlex.split(tts_command_str)

            if not self.text_on_stdin:
                # Text as command-line arguments
                say_command += [say.text]

            _LOGGER.debug(say_command)

            # WAV audio on stdout, text as command-line argument
            proc_stdin: typing.Optional[int] = None
            proc_stdout: typing.Optional[int] = subprocess.PIPE
            proc_input: typing.Optional[bytes] = None

            if self.use_temp_wav:
                # WAV audio from file
                proc_stdout = None

            if self.text_on_stdin:
                # Text from standard in
                proc_stdin = subprocess.PIPE
                proc_input = say.text.encode()

            # Run TTS process
            proc = subprocess.Popen(say_command,
                                    stdin=proc_stdin,
                                    stdout=proc_stdout)
            wav_bytes, _ = proc.communicate(input=proc_input)
            proc.wait()

            assert proc.returncode == 0, f"Non-zero exit code: {proc.returncode}"

            if self.use_temp_wav and temp_wav_path:
                with open(temp_wav_path, "rb") as wav_file:
                    wav_bytes = wav_file.read()

            assert wav_bytes, "No WAV data received"
            _LOGGER.debug("Got %s byte(s) of WAV data", len(wav_bytes))

            if wav_bytes:
                volume = self.volume
                if say.volume is not None:
                    # Override with message volume
                    volume = say.volume

                if volume is not None:
                    wav_bytes = TtsHermesMqtt.change_volume(wav_bytes, volume)

                finished_event = asyncio.Event()

                # Play WAV
                if self.play_command:
                    try:
                        # Play locally
                        play_command = shlex.split(
                            self.play_command.format(lang=say.lang))
                        _LOGGER.debug(play_command)

                        subprocess.run(play_command,
                                       input=wav_bytes,
                                       check=True)

                        # Don't wait for playFinished
                        finished_event.set()
                    except Exception as e:
                        _LOGGER.exception("play_command")
                        yield AudioPlayError(
                            error=str(e),
                            context=say.id,
                            site_id=say.site_id,
                            session_id=say.session_id,
                        )
                else:
                    # Publish playBytes
                    request_id = say.id or str(uuid4())
                    self.play_finished_events[request_id] = finished_event

                    yield (
                        AudioPlayBytes(wav_bytes=wav_bytes),
                        {
                            "site_id": say.site_id,
                            "request_id": request_id
                        },
                    )

                try:
                    # Wait for audio to finished playing or timeout
                    wav_duration = get_wav_duration(wav_bytes)
                    wav_timeout = wav_duration + self.finished_timeout_extra

                    _LOGGER.debug("Waiting for play finished (timeout=%s)",
                                  wav_timeout)
                    await asyncio.wait_for(finished_event.wait(),
                                           timeout=wav_timeout)
                except asyncio.TimeoutError:
                    _LOGGER.warning(
                        "Did not receive playFinished before timeout")

        except Exception as e:
            _LOGGER.exception("handle_say")
            yield TtsError(
                error=str(e),
                context=say.id,
                site_id=say.site_id,
                session_id=say.session_id,
            )
        finally:
            yield TtsSayFinished(id=say.id,
                                 site_id=say.site_id,
                                 session_id=say.session_id)

            if temp_wav_path:
                try:
                    os.unlink(temp_wav_path)
                except Exception:
                    pass
예제 #5
0
    async def handle_say(
        self, say: TtsSay
    ) -> typing.AsyncIterable[typing.Union[TtsSayFinished, typing.Tuple[
            AudioPlayBytes, TopicArgs], TtsError, AudioPlayError, ]]:
        """Run TTS system and publish WAV data."""
        wav_bytes: typing.Optional[bytes] = None

        try:
            # Try to pull WAV from cache first
            voice_name = say.lang or self.default_voice
            voice = self.voices.get(voice_name)
            assert voice is not None, f"No voice named {voice_name}"

            # Check cache
            sentence_hash = TtsHermesMqtt.get_sentence_hash(
                voice.cache_id, say.text)
            wav_bytes = None
            from_cache = False
            cached_wav_path = None

            if self.cache_dir:
                # Create cache directory in profile if it doesn't exist
                self.cache_dir.mkdir(parents=True, exist_ok=True)

                # Load from cache
                cached_wav_path = self.cache_dir / f"{sentence_hash.hexdigest()}.wav"

                if cached_wav_path.is_file():
                    # Use WAV file from cache
                    _LOGGER.debug("Using WAV from cache: %s", cached_wav_path)
                    wav_bytes = cached_wav_path.read_bytes()
                    from_cache = True

            if not wav_bytes:
                # Run text to speech
                _LOGGER.debug("Synthesizing '%s' (voice=%s)", say.text,
                              voice_name)
                wav_bytes = self.synthesize(voice, say.text)

                assert wav_bytes, "No WAV data synthesized"
                _LOGGER.debug("Got %s byte(s) of WAV data", len(wav_bytes))

            # Adjust volume
            volume = self.volume
            if say.volume is not None:
                # Override with message volume
                volume = say.volume

            original_wav_bytes = wav_bytes
            if volume is not None:
                wav_bytes = TtsHermesMqtt.change_volume(wav_bytes, volume)

            finished_event = asyncio.Event()

            # Play WAV
            if self.play_command:
                try:
                    # Play locally
                    play_command = shlex.split(
                        self.play_command.format(lang=say.lang))
                    _LOGGER.debug(play_command)

                    subprocess.run(play_command, input=wav_bytes, check=True)

                    # Don't wait for playFinished
                    finished_event.set()
                except Exception as e:
                    _LOGGER.exception("play_command")
                    yield AudioPlayError(
                        error=str(e),
                        context=say.id,
                        site_id=say.site_id,
                        session_id=say.session_id,
                    )
            else:
                # Publish playBytes
                request_id = say.id or str(uuid4())
                self.play_finished_events[request_id] = finished_event

                yield (
                    AudioPlayBytes(wav_bytes=wav_bytes),
                    {
                        "site_id": say.site_id,
                        "request_id": request_id
                    },
                )

            # Save to cache
            if (not from_cache) and cached_wav_path:
                with open(cached_wav_path, "wb") as cached_wav_file:
                    cached_wav_file.write(original_wav_bytes)

            try:
                # Wait for audio to finished playing or timeout
                wav_duration = TtsHermesMqtt.get_wav_duration(wav_bytes)
                wav_timeout = wav_duration + self.finished_timeout_extra

                _LOGGER.debug("Waiting for play finished (timeout=%s)",
                              wav_timeout)
                await asyncio.wait_for(finished_event.wait(),
                                       timeout=wav_timeout)
            except asyncio.TimeoutError:
                _LOGGER.warning("Did not receive playFinished before timeout")

        except Exception as e:
            _LOGGER.exception("handle_say")
            yield TtsError(
                error=str(e),
                context=say.id,
                site_id=say.site_id,
                session_id=say.session_id,
            )
        finally:
            yield TtsSayFinished(id=say.id,
                                 site_id=say.site_id,
                                 session_id=say.session_id)