def text_to_speech(self, text: str, slow: bool = False, use_cache: bool = True) -> str: from gtts import gTTS file_path = self._get_cache_file_path(text=text, use_cache=use_cache) if self._is_valid_cached_file(file_path=file_path, use_cache=use_cache): LOG.debug("Using existing cached file: %s" % (file_path)) return file_path LOG.trace('Performing TTS on text "%s" and saving result to %s' % (text, file_path)) lang = "en-US" # Sometimes API returns "Unable to find token seed" error so we retry up to 3 times for index in range(0, 3): try: audio_file = gTTS(text=text, lang=lang, slow=slow, lang_check=False) audio_file.save(file_path) # Even on successful response API sometimes returns a corrupted response so we need # to retry on such scenario as well # TODO: Could we just perform file size check to speed the operation up? get_audio_file_duration(file_path) break except ValueError as e: if "Unable to find token seed" not in str(e): raise e LOG.debug( "Retrying gtts call due to failure: %s (attempt=%s)" % (str(e), (index + 1))) except mutagen.mp3.HeaderNotFoundError as e: LOG.debug( "Retrying gtts call due to failure: %s (attempt=%s)" % (str(e), (index + 1))) if not os.path.isfile(file_path) or os.stat(file_path).st_size == 0: LOG.error('Failed to perform TTS on text "%s"' % (text)) return "" return file_path
def test_text_to_speech_gtts(self): tts = TextToSpeech(implementation="gtts") text = "Hello World. Test." output_file_path = tts._tts._get_cache_file_path(text=text, use_cache=True) self.assertFalse(os.path.isfile(output_file_path)) # This test is sometimes flaky when running on CI and "Unable to find token seed" error is # throw. To try to mitigate this, we retry on failure. for i in range(0, 3): try: tts.text_to_speech(text=text) break except ValueError as e: if "Unable to find token seed" not in str(e): raise e print("Retrying gtts call due to failure: %s" % (str(e))) time.sleep(random.randint(2, 5)) self.assertTrue(os.path.isfile(output_file_path)) duration = get_audio_file_duration(file_path=output_file_path) self.assertTrue(duration >= 1.5)
def duration(self): if not self._duration: if self.type in ["text", "text_to_morse"]: self._duration = CronSayPlugin.calculate_duration_for_text( text=self.value) elif self.type == "morse": # TODO: Use more accurate estimation for more code self._duration = CronSayPlugin.calculate_duration_for_text( text=self.value) elif self.type == "file": self._duration = get_audio_file_duration(file_path=self.value) return self._duration
def test_text_to_speech_speak(self): tts = TextToSpeech(implementation="espeak") text = "Hello World. Test." output_file_path = tts._tts._get_cache_file_path(text=text, use_cache=True) self.assertFalse(os.path.isfile(output_file_path)) tts.text_to_speech(text=text) self.assertTrue(os.path.isfile(output_file_path)) duration = get_audio_file_duration(file_path=output_file_path) self.assertTrue(duration >= 1.5)
def test_text_to_speech_govornik(self): tts = TextToSpeech(implementation="govornik") text = "Dober dan. Dobro jutri vsi." output_file_path = tts._tts._get_cache_file_path(text=text, use_cache=True) self.assertFalse(os.path.isfile(output_file_path)) for i in range(0, 3): try: tts.text_to_speech(text=text, language="sl_SI") break except Exception as e: print("Retrying govornik call due to failure: %s" % (str(e))) time.sleep(random.randint(2, 5)) self.assertTrue(os.path.isfile(output_file_path)) duration = get_audio_file_duration(file_path=output_file_path) self.assertTrue(duration >= 1.5)
def test_get_audio_file_duration(self): duration = get_audio_file_duration(MP3_FILE_PATH) self.assertEqual(duration, 6.048) duration = get_audio_file_duration(WAV_FILE_PATH) self.assertEqual(duration, 6.048)