Example #1
0
    def check_ffmpeg(cls):
        """
        Check whether ``ffmpeg`` can be called.

        Return ``True`` on failure and ``False`` on success.

        :rtype: bool
        """
        try:
            from aeneas.ffmpegwrapper import FFMPEGWrapper
            input_file_path = gf.absolute_path(u"tools/res/audio.mp3",
                                               __file__)
            handler, output_file_path = gf.tmp_file(suffix=u".wav")
            converter = FFMPEGWrapper()
            result = converter.convert(input_file_path, output_file_path)
            gf.delete_file(handler, output_file_path)
            if result:
                gf.print_success(u"ffmpeg         OK")
                return False
        except:
            pass
        gf.print_error(u"ffmpeg         ERROR")
        gf.print_info(
            u"  Please make sure you have ffmpeg installed correctly")
        gf.print_info(
            u"  and that its path is in your PATH environment variable")
        return True
Example #2
0
 def synthesize_multiple(self,
                         text_file,
                         ofp=None,
                         quit_after=None,
                         backwards=False,
                         zero_length=False):
     if ofp is None:
         handler, output_file_path = gf.tmp_file(suffix=".wav")
     else:
         handler = None
         output_file_path = ofp
     try:
         rconf = RuntimeConfiguration()
         rconf[RuntimeConfiguration.TTS] = u"festival"
         rconf[RuntimeConfiguration.TTS_PATH] = u"text2wave"
         tts_engine = FESTIVALWrapper(rconf=rconf)
         anchors, total_time, num_chars = tts_engine.synthesize_multiple(
             text_file, output_file_path, quit_after, backwards)
         gf.delete_file(handler, output_file_path)
         if zero_length:
             self.assertEqual(total_time, 0.0)
         else:
             self.assertGreater(total_time, 0.0)
     except (OSError, TypeError, UnicodeDecodeError, ValueError) as exc:
         gf.delete_file(handler, output_file_path)
         raise exc
Example #3
0
    def check_espeak(cls):
        """
        Check whether ``espeak`` can be called.

        Return ``True`` on failure and ``False`` on success.

        :rtype: bool
        """
        try:
            from aeneas.espeakwrapper import ESPEAKWrapper
            text = u"From fairest creatures we desire increase,"
            language = u"eng"
            handler, output_file_path = gf.tmp_file(suffix=u".wav")
            espeak = ESPEAKWrapper()
            result = espeak.synthesize_single(text, language, output_file_path)
            gf.delete_file(handler, output_file_path)
            if result:
                gf.print_success(u"espeak         OK")
                return False
        except:
            pass
        gf.print_error(u"espeak         ERROR")
        gf.print_info(
            u"  Please make sure you have espeak installed correctly")
        gf.print_info(
            u"  and that its path is in your PATH environment variable")
        gf.print_info(
            u"  You might also want to check that the espeak-data directory")
        gf.print_info(
            u"  is set up correctly, for example, it has the correct permissions"
        )
        return True
Example #4
0
    def check_espeak(cls):
        """
        Check whether ``espeak`` can be called.

        Return ``True`` on failure and ``False`` on success.

        :rtype: bool
        """
        try:
            from aeneas.espeakwrapper import ESPEAKWrapper
            text = u"From fairest creatures we desire increase,"
            language = u"eng"
            handler, output_file_path = gf.tmp_file(suffix=u".wav")
            espeak = ESPEAKWrapper()
            result = espeak.synthesize_single(
                text,
                language,
                output_file_path
            )
            gf.delete_file(handler, output_file_path)
            if result:
                gf.print_success(u"espeak         OK")
                return False
        except:
            pass
        gf.print_error(u"espeak         ERROR")
        gf.print_info(u"  Please make sure you have espeak installed correctly")
        gf.print_info(u"  and that its path is in your PATH environment variable")
        gf.print_info(u"  You might also want to check that the espeak-data directory")
        gf.print_info(u"  is set up correctly, for example, it has the correct permissions")
        return True
 def synthesize_multiple(self, text_file, ofp=None, quit_after=None, backwards=False, zero_length=False):
     if ofp is None:
         handler, output_file_path = gf.tmp_file(suffix=".wav")
     else:
         handler = None
         output_file_path = ofp
     try:
         rconf = RuntimeConfiguration()
         rconf[RuntimeConfiguration.TTS] = u"festival"
         rconf[RuntimeConfiguration.TTS_PATH] = u"text2wave"
         tts_engine = FESTIVALWrapper(rconf=rconf)
         anchors, total_time, num_chars = tts_engine.synthesize_multiple(
             text_file,
             output_file_path,
             quit_after,
             backwards
         )
         gf.delete_file(handler, output_file_path)
         if zero_length:
             self.assertEqual(total_time, 0.0)
         else:
             self.assertGreater(total_time, 0.0)
     except (OSError, TypeError, UnicodeDecodeError, ValueError) as exc:
         gf.delete_file(handler, output_file_path)
         raise exc
Example #6
0
 def inner(c_ext, cew_subprocess, cache):
     if ofp is None:
         handler, output_file_path = gf.tmp_file(suffix=".wav")
     else:
         handler = None
         output_file_path = ofp
     try:
         rconf = RuntimeConfiguration()
         rconf[RuntimeConfiguration.TTS] = self.TTS
         rconf[RuntimeConfiguration.TTS_PATH] = self.TTS_PATH
         rconf[RuntimeConfiguration.C_EXTENSIONS] = c_ext
         rconf[RuntimeConfiguration.CEW_SUBPROCESS_ENABLED] = cew_subprocess
         rconf[RuntimeConfiguration.TTS_CACHE] = cache
         tts_engine = self.TTS_CLASS(rconf=rconf)
         anchors, total_time, num_chars = tts_engine.synthesize_multiple(
             text_file,
             output_file_path,
             quit_after,
             backwards
         )
         gf.delete_file(handler, output_file_path)
         if cache:
             tts_engine.clear_cache()
         if zero_length:
             self.assertEqual(total_time, 0.0)
         else:
             self.assertGreater(total_time, 0.0)
     except (OSError, TypeError, UnicodeDecodeError, ValueError) as exc:
         gf.delete_file(handler, output_file_path)
         if (cache) and (tts_engine is not None):
             tts_engine.clear_cache()
         with self.assertRaises(expected_exc):
             raise exc
Example #7
0
    def check_espeak(cls):
        """
        Check whether ``espeak`` can be called.

        Return ``True`` on failure and ``False`` on success.

        :rtype: bool
        """
        try:
            from aeneas.textfile import TextFile
            from aeneas.textfile import TextFragment
            from aeneas.ttswrappers.espeakttswrapper import ESPEAKTTSWrapper
            text = u"From fairest creatures we desire increase,"
            text_file = TextFile()
            text_file.add_fragment(TextFragment(language=u"eng", lines=[text], filtered_lines=[text]))
            handler, output_file_path = gf.tmp_file(suffix=u".wav")
            ESPEAKTTSWrapper().synthesize_multiple(text_file, output_file_path)
            gf.delete_file(handler, output_file_path)
            gf.print_success(u"espeak         OK")
            return False
        except:
            pass
        gf.print_error(u"espeak         ERROR")
        gf.print_info(u"  Please make sure you have espeak installed correctly")
        gf.print_info(u"  and that its path is in your PATH environment variable")
        gf.print_info(u"  You might also want to check that the espeak-data directory")
        gf.print_info(u"  is set up correctly, for example, it has the correct permissions")
        return True
Example #8
0
    def check_espeak(cls):
        """
        Check whether ``espeak`` can be called.

        Return ``True`` on failure and ``False`` on success.

        :rtype: bool
        """
        try:
            from aeneas.textfile import TextFile
            from aeneas.textfile import TextFragment
            from aeneas.ttswrappers.espeakttswrapper import ESPEAKTTSWrapper
            text = u"From fairest creatures we desire increase,"
            text_file = TextFile()
            text_file.add_fragment(TextFragment(language=u"eng", lines=[text], filtered_lines=[text]))
            handler, output_file_path = gf.tmp_file(suffix=u".wav")
            ESPEAKTTSWrapper().synthesize_multiple(text_file, output_file_path)
            gf.delete_file(handler, output_file_path)
            gf.print_success(u"espeak         OK")
            return False
        except:
            pass
        gf.print_error(u"espeak         ERROR")
        gf.print_info(u"  Please make sure you have espeak installed correctly")
        gf.print_info(u"  and that its path is in your PATH environment variable")
        gf.print_info(u"  You might also want to check that the espeak-data directory")
        gf.print_info(u"  is set up correctly, for example, it has the correct permissions")
        return True
Example #9
0
 def inner(c_ext, cew_subprocess, cache):
     if ofp is None:
         handler, output_file_path = gf.tmp_file(suffix=".wav")
     else:
         handler = None
         output_file_path = ofp
     try:
         rconf = RuntimeConfiguration()
         rconf[RuntimeConfiguration.TTS] = self.TTS
         rconf[RuntimeConfiguration.TTS_PATH] = self.TTS_PATH
         rconf[RuntimeConfiguration.C_EXTENSIONS] = c_ext
         rconf[RuntimeConfiguration.
               CEW_SUBPROCESS_ENABLED] = cew_subprocess
         rconf[RuntimeConfiguration.TTS_CACHE] = cache
         tts_engine = self.TTS_CLASS(rconf=rconf)
         anchors, total_time, num_chars = tts_engine.synthesize_multiple(
             text_file, output_file_path, quit_after, backwards)
         gf.delete_file(handler, output_file_path)
         if cache:
             tts_engine.clear_cache()
         if zero_length:
             self.assertEqual(total_time, 0.0)
         else:
             self.assertGreater(total_time, 0.0)
     except (OSError, TypeError, UnicodeDecodeError, ValueError) as exc:
         gf.delete_file(handler, output_file_path)
         if (cache) and (tts_engine is not None):
             tts_engine.clear_cache()
         with self.assertRaises(expected_exc):
             raise exc
Example #10
0
 def inner(c_ext, cew_subprocess):
     if ofp is None:
         handler, output_file_path = gf.tmp_file(suffix=".wav")
     else:
         handler = None
         output_file_path = ofp
     try:
         rconf = RuntimeConfiguration()
         rconf[RuntimeConfiguration.C_EXTENSIONS] = c_ext
         rconf[RuntimeConfiguration.CEW_SUBPROCESS_ENABLED] = cew_subprocess
         tts_engine = ESPEAKWrapper(rconf=rconf)
         anchors, total_time, num_chars = tts_engine.synthesize_multiple(
             text_file,
             output_file_path,
             quit_after,
             backwards
         )
         gf.delete_file(handler, output_file_path)
         if zero_length:
             self.assertEqual(total_time, 0.0)
         else:
             self.assertGreater(total_time, 0.0)
     except (OSError, TypeError, UnicodeDecodeError, ValueError) as exc:
         gf.delete_file(handler, output_file_path)
         raise exc
Example #11
0
 def test_close_file_handler(self):
     handler, path = gf.tmp_file()
     self.assertTrue(gf.file_exists(path))
     gf.close_file_handler(handler)
     self.assertTrue(gf.file_exists(path))
     gf.delete_file(handler, path)
     self.assertFalse(gf.file_exists(path))
 def test_read_file_bytes(self):
     handler, path = gf.tmp_file()
     with io.open(path, "w", encoding="utf-8") as tmp_file:
         tmp_file.write(u"Foo bar")
     contents = gf.read_file_bytes(path)
     self.assertTrue(gf.is_bytes(contents))
     self.assertEqual(len(contents), 7)
     gf.delete_file(handler, path)
Example #13
0
 def test_read_file_bytes(self):
     handler, path = gf.tmp_file()
     with io.open(path, "w", encoding="utf-8") as tmp_file:
         tmp_file.write(u"Foo bar")
     contents = gf.read_file_bytes(path)
     self.assertTrue(gf.is_bytes(contents))
     self.assertEqual(len(contents), 7)
     gf.delete_file(handler, path)
Example #14
0
 def test_write(self):
     audiofile = self.load(self.AUDIO_FILE_WAVE, rs=True)
     data = audiofile.audio_samples
     handler, output_file_path = gf.tmp_file(suffix=".wav")
     audiofile.write(output_file_path)
     audiocopy = self.load(output_file_path)
     datacopy = audiocopy.audio_samples
     self.assertTrue((datacopy == data).all())
     gf.delete_file(handler, output_file_path)
Example #15
0
 def test_write(self):
     audiofile = self.load(self.AUDIO_FILE_WAVE, rs=True)
     data = audiofile.audio_samples
     handler, output_file_path = gf.tmp_file(suffix=".wav")
     audiofile.write(output_file_path)
     audiocopy = self.load(output_file_path)
     datacopy = audiocopy.audio_samples
     self.assertTrue((datacopy == data).all())
     gf.delete_file(handler, output_file_path)
Example #16
0
 def clear(self):
     """
     Clear the cache and remove all the files from disk.
     """
     self.log(u"Clearing cache...")
     for file_handler, file_info in self.cache.values():
         self.log([u"  Removing file '%s'", file_info])
         gf.delete_file(file_handler, file_info)
     self._initialize_cache()
     self.log(u"Clearing cache... done")
Example #17
0
 def test_compress_file(self):
     input_path = self.FILES["unpacked"]["path"]
     for key in self.FILES:
         fmt = self.FILES[key]["format"]
         if fmt != ContainerFormat.UNPACKED:
             handler, output_path = gf.tmp_file(suffix="." + fmt)
             cont = Container(output_path, fmt)
             cont.compress(input_path)
             self.assertTrue(os.path.isfile(output_path))
             copy = Container(output_path, fmt)
             self.assertEqual(copy.entries, self.EXPECTED_ENTRIES)
             gf.delete_file(handler, output_path)
Example #18
0
 def inner(c_ext, cew_subprocess):
     handler, output_file_path = gf.tmp_file(suffix=".wav")
     tfl = TextFile(gf.absolute_path(path, __file__), TextFileFormat.PLAIN)
     tfl.set_language(Language.ENG)
     synth = Synthesizer(logger=logger)
     synth.rconf[RuntimeConfiguration.C_EXTENSIONS] = c_ext
     synth.rconf[RuntimeConfiguration.CEW_SUBPROCESS_ENABLED] = cew_subprocess
     result = synth.synthesize(tfl, output_file_path, quit_after=quit_after, backwards=backwards)
     gf.delete_file(handler, output_file_path)
     self.assertEqual(len(result[0]), expected)
     if expected2 is not None:
         self.assertAlmostEqual(result[1], expected2, places=0)
Example #19
0
 def test_compress_file(self):
     input_path = self.FILES["unpacked"]["path"]
     for key in self.FILES:
         fmt = self.FILES[key]["format"]
         if fmt != ContainerFormat.UNPACKED:
             handler, output_path = gf.tmp_file(suffix="." + fmt)
             cont = Container(output_path, fmt)
             cont.compress(input_path)
             self.assertTrue(os.path.isfile(output_path))
             copy = Container(output_path, fmt)
             self.assertEqual(copy.entries, self.EXPECTED_ENTRIES)
             gf.delete_file(handler, output_path)
Example #20
0
 def test_output_sync_map(self):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration["language"] = Language.ENG
     task.configuration["o_format"] = SyncMapFormat.TXT
     task.sync_map = self.dummy_sync_map()
     handler, output_file_path = gf.tmp_file(suffix=".txt")
     task.sync_map_file_path_absolute = output_file_path
     path = task.output_sync_map_file()
     self.assertIsNotNone(path)
     self.assertEqual(path, output_file_path)
     gf.delete_file(handler, output_file_path)
 def test_output_sync_map(self):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration["language"] = Language.ENG
     task.configuration["o_format"] = SyncMapFormat.TXT
     task.sync_map = self.dummy_sync_map()
     handler, output_file_path = gf.tmp_file(suffix=".txt")
     task.sync_map_file_path_absolute = output_file_path
     path = task.output_sync_map_file()
     self.assertIsNotNone(path)
     self.assertEqual(path, output_file_path)
     gf.delete_file(handler, output_file_path)
Example #22
0
 def inner(c_ext, cew_subprocess):
     handler, output_file_path = gf.tmp_file(suffix=".wav")
     tfl = TextFile(gf.absolute_path(path, __file__), TextFileFormat.PLAIN)
     tfl.set_language(Language.ENG)
     synth = Synthesizer(logger=logger)
     synth.rconf[RuntimeConfiguration.C_EXTENSIONS] = c_ext
     synth.rconf[RuntimeConfiguration.CEW_SUBPROCESS_ENABLED] = cew_subprocess
     result = synth.synthesize(tfl, output_file_path, quit_after=quit_after, backwards=backwards)
     gf.delete_file(handler, output_file_path)
     self.assertEqual(len(result[0]), expected)
     if expected2 is not None:
         self.assertAlmostEqual(result[1], expected2, places=0)
Example #23
0
 def execute(self, config_string, audio_path, text_path):
     handler, tmp_path = gf.tmp_file()
     task = Task(config_string)
     task.audio_file_path_absolute = gf.absolute_path(audio_path, __file__)
     task.text_file_path_absolute = gf.absolute_path(text_path, __file__)
     executor = ExecuteTask(task)
     executor.execute()
     task.sync_map_file_path_absolute = tmp_path
     result_path = task.output_sync_map_file()
     self.assertIsNotNone(result_path)
     self.assertEqual(result_path, tmp_path)
     self.assertGreater(len(gf.read_file_bytes(result_path)), 0)
     gf.delete_file(handler, tmp_path)
Example #24
0
 def download(
     self,
     expected_size,
     download_format=None,
     largest_audio=True,
 ):
     path = self.audio_from_youtube(self.URL_VALID,
                                    download=True,
                                    output_file_path=None,
                                    download_format=download_format,
                                    largest_audio=largest_audio)
     self.assertTrue(gf.file_can_be_read(path))
     self.assertEqual(gf.file_size(path), expected_size)
     gf.delete_file(None, path)
 def perform_run(self, audio_file_path, text_file_path, config_string, rconf_string):
     output_file_handler, output_file_path = gf.tmp_file()
     executor = ExecuteTaskCLI(use_sys=False)
     verbose = "-v" if self.verbose else ""
     executor.run(arguments=[
         "dummy placeholder for aeneas.tools.execute_task",
         audio_file_path,
         text_file_path,
         config_string,
         output_file_path,
         "-r=\"%s\"" % rconf_string,
         verbose
     ])
     gf.delete_file(output_file_handler, output_file_path)
     return executor.logger
Example #26
0
 def test_cew_synthesize_single(self):
     handler, output_file_path = gf.tmp_file(suffix=".wav")
     try:
         import aeneas.cew.cew
         sr, begin, end = aeneas.cew.cew.synthesize_single(
             output_file_path,
             u"en",                      # NOTE cew requires the actual eSpeak voice code
             u"Dummy"
         )
         self.assertEqual(sr, 22050)
         self.assertEqual(begin, 0)
         self.assertGreater(end, 0)
     except ImportError:
         pass
     gf.delete_file(handler, output_file_path)
Example #27
0
 def download(
         self,
         expected_size,
         download_format=None,
         largest_audio=True,
 ):
     path = self.audio_from_youtube(
         self.URL_VALID,
         download=True,
         output_file_path=None,
         download_format=download_format,
         largest_audio=largest_audio
     )
     self.assertTrue(gf.file_can_be_read(path))
     self.assertEqual(gf.file_size(path), expected_size)
     gf.delete_file(None, path)
Example #28
0
    def _execute_inner(self,
                       audio_file_mfcc,
                       text_file,
                       adjust_boundaries=True,
                       log=True):
        """
        Align a subinterval of the given AudioFileMFCC
        with the given TextFile.

        Return the computed time map, as a list of intervals.

        The begin and end positions inside the AudioFileMFCC
        must have been set ahead by the caller.

        The text fragments being aligned are the vchildren of ``text_file``.

        :param audio_file_mfcc: the audio file MFCC representation
        :type  audio_file_mfcc: :class:`~aeneas.audiofilemfcc.AudioFileMFCC`
        :param text_file: the text file subtree to align
        :type  text_file: :class:`~aeneas.textfile.TextFile`
        :param bool adjust_boundaries: if ``True``, execute the adjust boundary algorithm
        :param bool log: if ``True``, log steps
        :rtype: list
        """
        self._step_begin(u"synthesize text", log=log)
        synt_handler, synt_path, synt_anchors, synt_mono = self._synthesize(
            text_file)
        self._step_end(log=log)

        self._step_begin(u"extract MFCC synt wave", log=log)
        synt_wave_mfcc = self._extract_mfcc(file_path=synt_path,
                                            file_path_is_mono_wave=synt_mono)
        gf.delete_file(synt_handler, synt_path)
        self._step_end(log=log)

        self._step_begin(u"align waves", log=log)
        indices = self._align_waves(audio_file_mfcc, synt_wave_mfcc,
                                    synt_anchors)
        self._step_end(log=log)

        self._step_begin(u"adjust boundaries", log=log)
        time_map = self._adjust_boundaries(audio_file_mfcc, text_file, indices,
                                           adjust_boundaries)
        self._step_end(log=log)

        return time_map
Example #29
0
 def convert(self, input_file_path, ofp=None, runtime_configuration=None):
     if ofp is None:
         output_path = gf.tmp_directory()
         output_file_path = os.path.join(output_path, "audio.wav")
     else:
         output_file_path = ofp
     try:
         converter = FFMPEGWrapper(rconf=runtime_configuration)
         result = converter.convert(
             gf.absolute_path(input_file_path, __file__), output_file_path)
         self.assertEqual(result, output_file_path)
         gf.delete_directory(output_path)
     except OSError as exc:
         if ofp is None:
             gf.delete_directory(output_path)
         else:
             gf.delete_file(None, ofp)
         raise exc
Example #30
0
        def synthesize_and_clean(text, voice_code):
            """
            Synthesize a single fragment via subprocess,
            and immediately remove the temporary file.

            :rtype: tuple (duration, sample_rate, encoding, samples)
            """
            self.log(u"Synthesizing text...")
            handler, tmp_destination = gf.tmp_file(suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH])
            result, data = self._synthesize_single_subprocess(
                text=(text + u" "),
                voice_code=voice_code,
                output_file_path=tmp_destination
            )
            self.log([u"Removing temporary file '%s'", tmp_destination])
            gf.delete_file(handler, tmp_destination)
            self.log(u"Synthesizing text... done")
            return data
Example #31
0
    def _execute_inner(self, audio_file_mfcc, text_file, sync_root=None, force_aba_auto=False, log=True, leaf_level=False):
        """
        Align a subinterval of the given AudioFileMFCC
        with the given TextFile.

        Return the computed tree of time intervals,
        rooted at ``sync_root`` if the latter is not ``None``,
        or as a new ``Tree`` otherwise.

        The begin and end positions inside the AudioFileMFCC
        must have been set ahead by the caller.

        The text fragments being aligned are the vchildren of ``text_file``.

        :param audio_file_mfcc: the audio file MFCC representation
        :type  audio_file_mfcc: :class:`~aeneas.audiofilemfcc.AudioFileMFCC`
        :param text_file: the text file subtree to align
        :type  text_file: :class:`~aeneas.textfile.TextFile`
        :param sync_root: the tree node to which fragments should be appended
        :type  sync_root: :class:`~aeneas.tree.Tree`
        :param bool force_aba_auto: if ``True``, do not run aba algorithm
        :param bool log: if ``True``, log steps
        :param bool leaf_level: alert aba if the computation is at a leaf level
        :rtype: :class:`~aeneas.tree.Tree`
        """
        self._step_begin(u"synthesize text", log=log)
        synt_handler, synt_path, synt_anchors, synt_format = self._synthesize(text_file)
        self._step_end(log=log)

        self._step_begin(u"extract MFCC synt wave", log=log)
        synt_wave_mfcc = self._extract_mfcc(
            file_path=synt_path,
            file_format=synt_format,
        )
        gf.delete_file(synt_handler, synt_path)
        self._step_end(log=log)

        self._step_begin(u"align waves", log=log)
        indices = self._align_waves(audio_file_mfcc, synt_wave_mfcc, synt_anchors)
        self._step_end(log=log)

        self._step_begin(u"adjust boundaries", log=log)
        self._adjust_boundaries(indices, text_file, audio_file_mfcc, sync_root, force_aba_auto, leaf_level)
        self._step_end(log=log)
Example #32
0
 def inner(c_ext, cew_subprocess):
     if ofp is None:
         handler, output_file_path = gf.tmp_file(suffix=".wav")
     else:
         handler = None
         output_file_path = ofp
     try:
         rconf = RuntimeConfiguration()
         rconf[RuntimeConfiguration.C_EXTENSIONS] = c_ext
         rconf[RuntimeConfiguration.CEW_SUBPROCESS_ENABLED] = cew_subprocess
         tts_engine = ESPEAKWrapper(rconf=rconf)
         result = tts_engine.synthesize_single(text, language, output_file_path)
         gf.delete_file(handler, output_file_path)
         if zero_length:
             self.assertEqual(result, 0)
         else:
             self.assertGreater(result, 0)
     except (OSError, TypeError, UnicodeDecodeError, ValueError) as exc:
         gf.delete_file(handler, output_file_path)
         raise exc
 def synthesize_single(self, text, language, ofp=None, zero_length=False):
     if ofp is None:
         handler, output_file_path = gf.tmp_file(suffix=".wav")
     else:
         handler = None
         output_file_path = ofp
     try:
         rconf = RuntimeConfiguration()
         rconf[RuntimeConfiguration.TTS] = u"festival"
         rconf[RuntimeConfiguration.TTS_PATH] = u"text2wave"
         tts_engine = FESTIVALWrapper(rconf=rconf)
         result = tts_engine.synthesize_single(text, language, output_file_path)
         gf.delete_file(handler, output_file_path)
         if zero_length:
             self.assertEqual(result, 0)
         else:
             self.assertGreater(result, 0)
     except (OSError, TypeError, UnicodeDecodeError, ValueError) as exc:
         gf.delete_file(handler, output_file_path)
         raise exc
Example #34
0
 def convert(self, input_file_path, ofp=None, runtime_configuration=None):
     if ofp is None:
         output_path = gf.tmp_directory()
         output_file_path = os.path.join(output_path, "audio.wav")
     else:
         output_file_path = ofp
     try:
         converter = FFMPEGWrapper(rconf=runtime_configuration)
         result = converter.convert(
             gf.absolute_path(input_file_path, __file__),
             output_file_path
         )
         self.assertEqual(result, output_file_path)
         gf.delete_directory(output_path)
     except OSError as exc:
         if ofp is None:
             gf.delete_directory(output_path)
         else:
             gf.delete_file(None, ofp)
         raise exc
Example #35
0
 def _compose_output_file_path(self, extension, output_file_path=None):
     """
     If ``output_file_path`` is given, use it.
     Otherwise (``output_file_path`` is ``None``),
     create a temporary file with the correct extension.
     """
     self.log(u"Determining output file path...")
     if output_file_path is None:
         self.log(u"output_file_path is None: creating temp file")
         handler, output_file_path = gf.tmp_file(
             root=self.rconf[RuntimeConfiguration.TMP_PATH],
             suffix=(".%s" % extension)
         )
         gf.delete_file(handler, output_file_path)
     else:
         self.log(u"output_file_path is not None: cheking that file can be written")
         if not gf.file_can_be_written(output_file_path):
             self.log_exc(u"Path '%s' cannot be written. Wrong permissions?" % (output_file_path), None, True, OSError)
     self.log(u"Determining output file path... done")
     self.log([u"Output file path is '%s'", output_file_path])
     return output_file_path
Example #36
0
 def synthesize_single(self, text, language, ofp=None, zero_length=False):
     if ofp is None:
         handler, output_file_path = gf.tmp_file(suffix=".wav")
     else:
         handler = None
         output_file_path = ofp
     try:
         rconf = RuntimeConfiguration()
         rconf[RuntimeConfiguration.TTS] = u"festival"
         rconf[RuntimeConfiguration.TTS_PATH] = u"text2wave"
         tts_engine = FESTIVALWrapper(rconf=rconf)
         result = tts_engine.synthesize_single(text, language,
                                               output_file_path)
         gf.delete_file(handler, output_file_path)
         if zero_length:
             self.assertEqual(result, 0)
         else:
             self.assertGreater(result, 0)
     except (OSError, TypeError, UnicodeDecodeError, ValueError) as exc:
         gf.delete_file(handler, output_file_path)
         raise exc
Example #37
0
 def test_cew_synthesize_multiple_lang(self):
     handler, output_file_path = gf.tmp_file(suffix=".wav")
     try:
         c_quit_after = 0.0
         c_backwards = 0
         c_text = [
             (u"en",
              u"Dummy 1"),  # NOTE cew requires the actual eSpeak voice code
             (u"it", u"Segnaposto 2"
              ),  # NOTE cew requires the actual eSpeak voice code
             (u"en",
              u"Dummy 3"),  # NOTE cew requires the actual eSpeak voice code
         ]
         import aeneas.cew.cew
         sr, sf, intervals = aeneas.cew.cew.synthesize_multiple(
             output_file_path, c_quit_after, c_backwards, c_text)
         self.assertEqual(sr, 22050)
         self.assertEqual(sf, 3)
         self.assertEqual(len(intervals), 3)
     except ImportError:
         pass
     gf.delete_file(handler, output_file_path)
Example #38
0
    def _execute_inner(self, audio_file_mfcc, text_file, adjust_boundaries=True, log=True):
        """
        Align a subinterval of the given AudioFileMFCC
        with the given TextFile.

        Return the computed time map, as a list of intervals.

        The begin and end positions inside the AudioFileMFCC
        must have been set ahead by the caller.

        The text fragments being aligned are the vchildren of ``text_file``.

        :param audio_file_mfcc: the audio file MFCC representation
        :type  audio_file_mfcc: :class:`~aeneas.audiofilemfcc.AudioFileMFCC`
        :param text_file: the text file subtree to align
        :type  text_file: :class:`~aeneas.textfile.TextFile`
        :param bool adjust_boundaries: if ``True``, execute the adjust boundary algorithm
        :param bool log: if ``True``, log steps
        :rtype: list
        """
        self._step_begin(u"synthesize text", log=log)
        synt_handler, synt_path, synt_anchors, synt_mono = self._synthesize(text_file)
        self._step_end(log=log)

        self._step_begin(u"extract MFCC synt wave", log=log)
        synt_wave_mfcc = self._extract_mfcc(file_path=synt_path, file_path_is_mono_wave=synt_mono)
        gf.delete_file(synt_handler, synt_path)
        self._step_end(log=log)

        self._step_begin(u"align waves", log=log)
        indices = self._align_waves(audio_file_mfcc, synt_wave_mfcc, synt_anchors)
        self._step_end(log=log)

        self._step_begin(u"adjust boundaries", log=log)
        time_map = self._adjust_boundaries(audio_file_mfcc, text_file, indices, adjust_boundaries)
        self._step_end(log=log)

        return time_map
Example #39
0
 def test_cew_synthesize_multiple_lang(self):
     handler, output_file_path = gf.tmp_file(suffix=".wav")
     try:
         c_quit_after = 0.0
         c_backwards = 0
         c_text = [
             (u"en", u"Dummy 1"),        # NOTE cew requires the actual eSpeak voice code
             (u"it", u"Segnaposto 2"),   # NOTE cew requires the actual eSpeak voice code
             (u"en", u"Dummy 3"),        # NOTE cew requires the actual eSpeak voice code
         ]
         import aeneas.cew.cew
         sr, sf, intervals = aeneas.cew.cew.synthesize_multiple(
             output_file_path,
             c_quit_after,
             c_backwards,
             c_text
         )
         self.assertEqual(sr, 22050)
         self.assertEqual(sf, 3)
         self.assertEqual(len(intervals), 3)
     except ImportError:
         pass
     gf.delete_file(handler, output_file_path)
Example #40
0
    def check_ffmpeg(cls):
        """
        Check whether ``ffmpeg`` can be called.

        Return ``True`` on failure and ``False`` on success.

        :rtype: bool
        """
        try:
            from aeneas.ffmpegwrapper import FFMPEGWrapper
            input_file_path = gf.absolute_path(u"tools/res/audio.mp3", __file__)
            handler, output_file_path = gf.tmp_file(suffix=u".wav")
            converter = FFMPEGWrapper()
            result = converter.convert(input_file_path, output_file_path)
            gf.delete_file(handler, output_file_path)
            if result:
                gf.print_success(u"ffmpeg         OK")
                return False
        except:
            pass
        gf.print_error(u"ffmpeg         ERROR")
        gf.print_info(u"  Please make sure you have ffmpeg installed correctly")
        gf.print_info(u"  and that its path is in your PATH environment variable")
        return True
Example #41
0
 def _compose_output_file_path(self, extension, output_file_path=None):
     """
     If ``output_file_path`` is given, use it.
     Otherwise (``output_file_path`` is ``None``),
     create a temporary file with the correct extension.
     """
     self.log(u"Determining output file path...")
     if output_file_path is None:
         self.log(u"output_file_path is None: creating temp file")
         handler, output_file_path = gf.tmp_file(
             root=self.rconf[RuntimeConfiguration.TMP_PATH],
             suffix=(".%s" % extension))
         gf.delete_file(handler, output_file_path)
     else:
         self.log(
             u"output_file_path is not None: cheking that file can be written"
         )
         if not gf.file_can_be_written(output_file_path):
             self.log_exc(
                 u"Path '%s' cannot be written. Wrong permissions?" %
                 (output_file_path), None, True, OSError)
     self.log(u"Determining output file path... done")
     self.log([u"Output file path is '%s'", output_file_path])
     return output_file_path
Example #42
0
    def audio_from_youtube(self,
                           source_url,
                           download=True,
                           output_file_path=None,
                           preferred_index=None,
                           largest_audio=True,
                           preferred_format=None):
        """
        Download an audio stream from a YouTube video,
        and save it to file.

        If ``download`` is ``False``, return the list
        of available audiostreams but do not download.

        Otherwise, download the audio stream best matching
        the provided parameters, as follows.
        If ``preferred_index`` is not ``None``,
        download the audio stream at that index.
        If ``largest_audio`` is ``True``,
        download the largest audiostream;
        otherwise, download the smallest audiostream.
        If ``preferred_format`` is not ``None``,
        download the audiostream having that format.
        The latter option works in combination with ``largest_audio``.

        Return the path of the downloaded file.

        :param string source_url: the URL of the YouTube video
        :param bool download: if ``True``, download the audio stream
                              best matching ``preferred_index`` or ``preferred_format``
                              and ``largest_audio``;
                              if ``False``, return the list of available audio streams
        :param string output_file_path: the path where the downloaded audio should be saved;
                                        if ``None``, create a temporary file
        :param int preferred_index: preferably download this audio stream
        :param bool largest_audio: if ``True``, download the largest audio stream available;
                                   if ``False``, download the smallest one.
        :param string preferred_format: preferably download this audio format
        :rtype: string or list of pafy audio streams
        :raises: ImportError: if ``pafy`` is not installed
        :raises: OSError: if ``output_file_path`` cannot be written
        :raises: ValueError: if ``source_url`` is not a valid YouTube URL
        """
        def select_audiostream(audiostreams):
            """ Select the audiostream best matching the given parameters. """
            if preferred_index is not None:
                if preferred_index in range(len(audiostreams)):
                    self.log([
                        u"Selecting audiostream with index %d", preferred_index
                    ])
                    return audiostreams[preferred_index]
                else:
                    self.log_warn([
                        u"Audio stream index '%d' not allowed", preferred_index
                    ])
                    self.log_warn(u"Ignoring the requested audio stream index")
            # selecting by preferred format
            streams = audiostreams
            if preferred_format is not None:
                self.log([
                    u"Selecting audiostreams by preferred format %s",
                    preferred_format
                ])
                streams = [
                    audiostream for audiostream in streams
                    if audiostream.extension == preferred_format
                ]
                if len(streams) < 1:
                    self.log([
                        u"No audiostream with preferred format %s",
                        preferred_format
                    ])
                    streams = audiostreams
            # sort by size
            streams = sorted([(audio.get_filesize(), audio)
                              for audio in streams])
            if largest_audio:
                self.log(u"Selecting largest audiostream")
                selected = streams[-1][1]
            else:
                self.log(u"Selecting smallest audiostream")
                selected = streams[0][1]
            return selected

        try:
            import pafy
        except ImportError as exc:
            self.log_exc(u"Python module pafy is not installed", exc, True,
                         ImportError)

        try:
            video = pafy.new(source_url)
        except (IOError, OSError, ValueError) as exc:
            self.log_exc(
                u"The specified source URL '%s' is not a valid YouTube URL or you are offline"
                % (source_url), exc, True, ValueError)

        if not download:
            self.log(u"Returning the list of audio streams")
            return video.audiostreams

        output_path = output_file_path
        if output_file_path is None:
            self.log(u"output_path is None: creating temp file")
            handler, output_path = gf.tmp_file(
                root=self.rconf[RuntimeConfiguration.TMP_PATH])
        else:
            if not gf.file_can_be_written(output_path):
                self.log_exc(
                    u"Path '%s' cannot be written. Wrong permissions?" %
                    (output_path), None, True, OSError)

        audiostream = select_audiostream(video.audiostreams)
        if output_file_path is None:
            gf.delete_file(handler, output_path)
            output_path += "." + audiostream.extension

        self.log([u"output_path is '%s'", output_path])
        self.log(u"Downloading...")
        audiostream.download(filepath=output_path, quiet=True)
        self.log(u"Downloading... done")
        return output_path
Example #43
0
    def read_samples_from_file(self):
        """
        Load the audio samples from file into memory.

        If ``self.file_format`` is ``None`` or it is not
        ``("pcm_s16le", 1, self.rconf.sample_rate)``,
        the file will be first converted
        to a temporary PCM16 mono WAVE file.
        Audio data will be read from this temporary file,
        which will be then deleted from disk immediately.

        Otherwise,
        the audio data will be read directly
        from the given file,
        which will not be deleted from disk.

        :raises: :class:`~aeneas.audiofile.AudioFileConverterError`: if the path to the ``ffmpeg`` executable cannot be called
        :raises: :class:`~aeneas.audiofile.AudioFileUnsupportedFormatError`: if the audio file has a format not supported
        :raises: OSError: if the audio file cannot be read
        """
        self.log(u"Loading audio data...")

        # check the file can be read
        if not gf.file_can_be_read(self.file_path):
            self.log_exc(u"File '%s' cannot be read" % (self.file_path), None, True, OSError)

        # determine if we need to convert the audio file
        convert_audio_file = (
            (self.file_format is None) or
            (
                (self.rconf.safety_checks) and
                (self.file_format != ("pcm_s16le", 1, self.rconf.sample_rate))
            )
        )

        # convert the audio file if needed
        if convert_audio_file:
            # convert file to PCM16 mono WAVE with correct sample rate
            self.log(u"self.file_format is None or not good => converting self.file_path")
            tmp_handler, tmp_file_path = gf.tmp_file(suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH])
            self.log([u"Temporary PCM16 mono WAVE file: '%s'", tmp_file_path])
            try:
                self.log(u"Converting audio file to mono...")
                converter = FFMPEGWrapper(rconf=self.rconf, logger=self.logger)
                converter.convert(self.file_path, tmp_file_path)
                self.file_format = ("pcm_s16le", 1, self.rconf.sample_rate)
                self.log(u"Converting audio file to mono... done")
            except FFMPEGPathError:
                gf.delete_file(tmp_handler, tmp_file_path)
                self.log_exc(u"Unable to call ffmpeg executable", None, True, AudioFileConverterError)
            except OSError:
                gf.delete_file(tmp_handler, tmp_file_path)
                self.log_exc(u"Audio file format not supported by ffmpeg", None, True, AudioFileUnsupportedFormatError)
        else:
            # read the file directly
            if self.rconf.safety_checks:
                self.log(u"self.file_format is good => reading self.file_path directly")
            else:
                self.log_warn(u"Safety checks disabled => reading self.file_path directly")
            tmp_handler = None
            tmp_file_path = self.file_path

        # TODO allow calling C extension cwave to read samples faster
        try:
            self.audio_format = "pcm16"
            self.audio_channels = 1
            self.audio_sample_rate, self.__samples = scipywavread(tmp_file_path)
            # scipy reads a sample as an int16_t, that is, a number in [-32768, 32767]
            # so we convert it to a float64 in [-1, 1]
            self.__samples = self.__samples.astype("float64") / 32768
            self.__samples_capacity = len(self.__samples)
            self.__samples_length = self.__samples_capacity
            self._update_length()
        except ValueError:
            self.log_exc(u"Audio format not supported by scipywavread", None, True, AudioFileUnsupportedFormatError)

        # if we converted the audio file, delete the temporary converted audio file
        if convert_audio_file:
            gf.delete_file(tmp_handler, tmp_file_path)
            self.log([u"Deleted temporary audio file: '%s'", tmp_file_path])

        self._update_length()
        self.log([u"Sample length:  %.3f", self.audio_length])
        self.log([u"Sample rate:    %d", self.audio_sample_rate])
        self.log([u"Audio format:   %s", self.audio_format])
        self.log([u"Audio channels: %d", self.audio_channels])
        self.log(u"Loading audio data... done")
 def test_delete_file_existing(self):
     handler, path = gf.tmp_file()
     self.assertTrue(gf.file_exists(path))
     gf.delete_file(handler, path)
     self.assertFalse(gf.file_exists(path))
 def test_file_size_zero(self):
     handler, path = gf.tmp_file()
     self.assertEqual(gf.file_size(path), 0)
     gf.delete_file(handler, path)
Example #46
0
    def read_samples_from_file(self):
        """
        Load the audio samples from file into memory.

        If ``self.is_mono_wave`` is ``False``,
        the file will be first converted
        to a temporary PCM16 mono WAVE file.
        Audio data will be read from this temporary file,
        which will be then deleted from disk immediately.

        If ``self.is_mono_wave`` is ``True``,
        the audio data will be read directly
        from the given file,
        which will not be deleted from disk.

        :raises: :class:`~aeneas.audiofile.AudioFileConverterError`: if the path to the ``ffmpeg`` executable cannot be called
        :raises: :class:`~aeneas.audiofile.AudioFileUnsupportedFormatError`: if the audio file has a format not supported
        :raises: OSError: if the audio file cannot be read
        """
        self.log(u"Loading audio data...")

        # check the file can be read
        if not gf.file_can_be_read(self.file_path):
            self.log_exc(u"File '%s' cannot be read" % (self.file_path), None, True, OSError)

        # convert file to PCM16 mono WAVE
        if self.is_mono_wave:
            self.log(u"is_mono_wave=True => reading self.file_path directly")
            tmp_handler = None
            tmp_file_path = self.file_path
        else:
            self.log(u"is_mono_wave=False => converting self.file_path")
            tmp_handler, tmp_file_path = gf.tmp_file(suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH])
            self.log([u"Temporary PCM16 mono WAVE file: '%s'", tmp_file_path])
            try:
                self.log(u"Converting audio file to mono...")
                converter = FFMPEGWrapper(rconf=self.rconf, logger=self.logger)
                converter.convert(self.file_path, tmp_file_path)
                self.log(u"Converting audio file to mono... done")
            except FFMPEGPathError:
                gf.delete_file(tmp_handler, tmp_file_path)
                self.log_exc(u"Unable to call ffmpeg executable", None, True, AudioFileConverterError)
            except OSError:
                gf.delete_file(tmp_handler, tmp_file_path)
                self.log_exc(u"Audio file format not supported by ffmpeg", None, True, AudioFileUnsupportedFormatError)

        # TODO allow calling C extension cwave to read samples faster
        try:
            self.audio_format = "pcm16"
            self.audio_channels = 1
            self.audio_sample_rate, self.__samples = scipywavread(tmp_file_path)
            # scipy reads a sample as an int16_t, that is, a number in [-32768, 32767]
            # so we convert it to a float64 in [-1, 1]
            self.__samples = self.__samples.astype("float64") / 32768
            self.__samples_capacity = len(self.__samples)
            self.__samples_length = self.__samples_capacity
            self._update_length()
        except ValueError:
            self.log_exc(u"Audio format not supported by scipywavread", None, True, AudioFileUnsupportedFormatError)

        if not self.is_mono_wave:
            gf.delete_file(tmp_handler, tmp_file_path)
            self.log([u"Deleted temporary PCM16 mono WAVE file: '%s'", tmp_file_path])

        self._update_length()
        self.log([u"Sample length:  %.3f", self.audio_length])
        self.log([u"Sample rate:    %d", self.audio_sample_rate])
        self.log([u"Audio format:   %s", self.audio_format])
        self.log([u"Audio channels: %d", self.audio_channels])
        self.log(u"Loading audio data... done")
 def test_delete_file_not_existing(self):
     handler = None
     path = "/foo/bar/baz"
     self.assertFalse(gf.file_exists(path))
     gf.delete_file(handler, path)
     self.assertFalse(gf.file_exists(path))
Example #48
0
 def test_delete_file_existing(self):
     handler, path = gf.tmp_file()
     self.assertTrue(gf.file_exists(path))
     gf.delete_file(handler, path)
     self.assertFalse(gf.file_exists(path))
Example #49
0
 def test_delete_file_not_existing(self):
     handler = None
     path = "/foo/bar/baz"
     self.assertFalse(gf.file_exists(path))
     gf.delete_file(handler, path)
     self.assertFalse(gf.file_exists(path))
Example #50
0
    def synthesize_multiple(self, audio_file_path, c_quit_after, c_backwards, u_text):
        """
        Synthesize the text contained in the given fragment list
        into a ``wav`` file.

        :param string audio_file_path: the path to the output audio file
        :param float c_quit_after: stop synthesizing as soon as
                                   reaching this many seconds
        :param bool c_backwards: synthesizing from the end of the text file
        :param object u_text: a list of ``(voice_code, text)`` tuples
        :rtype: tuple ``(sample_rate, synthesized, intervals)``
        """
        self.log([u"Audio file path: '%s'", audio_file_path])
        self.log([u"c_quit_after: '%.3f'", c_quit_after])
        self.log([u"c_backwards: '%d'", c_backwards])

        text_file_handler, text_file_path = gf.tmp_file()
        data_file_handler, data_file_path = gf.tmp_file()
        self.log([u"Temporary text file path: '%s'", text_file_path])
        self.log([u"Temporary data file path: '%s'", data_file_path])

        self.log(u"Populating the text file...")
        with io.open(text_file_path, "w", encoding="utf-8") as tmp_text_file:
            for f_voice_code, f_text in u_text:
                tmp_text_file.write(u"%s %s\n" % (f_voice_code, f_text))
        self.log(u"Populating the text file... done")

        arguments = [
            self.rconf[RuntimeConfiguration.CEW_SUBPROCESS_PATH],
            "-m",
            "aeneas.cewsubprocess",
            "%.3f" % c_quit_after,
            "%d" % c_backwards,
            text_file_path,
            audio_file_path,
            data_file_path,
        ]
        self.log([u"Calling with arguments '%s'", u" ".join(arguments)])
        proc = subprocess.Popen(
            arguments, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True
        )
        proc.communicate()

        self.log(u"Reading output data...")
        with io.open(data_file_path, "r", encoding="utf-8") as data_file:
            lines = data_file.read().splitlines()
            sr = int(lines[0])
            sf = int(lines[1])
            intervals = []
            for line in lines[2:]:
                values = line.split(u" ")
                if len(values) == 2:
                    intervals.append((TimeValue(values[0]), TimeValue(values[1])))
        self.log(u"Reading output data... done")

        self.log(u"Deleting text and data files...")
        gf.delete_file(text_file_handler, text_file_path)
        gf.delete_file(data_file_handler, data_file_path)
        self.log(u"Deleting text and data files... done")

        return (sr, sf, intervals)
Example #51
0
    def _synthesize_single_subprocess_helper(self,
                                             text,
                                             voice_code,
                                             output_file_path=None,
                                             return_audio_data=True):
        """
        This is an helper function to synthesize a single text fragment via ``subprocess``.

        If ``output_file_path`` is ``None``,
        the audio data will not persist to file at the end of the method.

        If ``return_audio_data`` is ``True``,
        return the audio data at the end of the function call;
        if ``False``, just return ``(True, None)`` in case of success.

        :rtype: tuple (result, (duration, sample_rate, codec, data)) or (result, None)
        """
        # return zero if text is the empty string
        if len(text) == 0:
            #
            # NOTE sample_rate, codec, data do not matter
            #      if the duration is 0.000 => set them to None
            #
            self.log(u"len(text) is zero: returning 0.000")
            return (True, (TimeValue("0.000"), None, None, None))

        # create a temporary output file if needed
        synt_tmp_file = (output_file_path is None)
        if synt_tmp_file:
            self.log(
                u"Synthesizer helper called with output_file_path=None => creating temporary output file"
            )
            output_file_handler, output_file_path = gf.tmp_file(
                suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH])
            self.log([u"Temporary output file path is '%s'", output_file_path])

        try:
            # if the TTS engine reads text from file,
            # write the text into a temporary file
            if self.CLI_PARAMETER_TEXT_PATH in self.subprocess_arguments:
                self.log(u"TTS engine reads text from file")
                tmp_text_file_handler, tmp_text_file_path = gf.tmp_file(
                    suffix=u".txt",
                    root=self.rconf[RuntimeConfiguration.TMP_PATH])
                self.log([
                    u"Creating temporary text file '%s'...", tmp_text_file_path
                ])
                with io.open(tmp_text_file_path, "w",
                             encoding="utf-8") as tmp_text_file:
                    tmp_text_file.write(text)
                self.log([
                    u"Creating temporary text file '%s'... done",
                    tmp_text_file_path
                ])
            else:
                self.log(u"TTS engine reads text from stdin")
                tmp_text_file_handler = None
                tmp_text_file_path = None

            # copy all relevant arguments
            self.log(u"Creating arguments list...")
            arguments = []
            for arg in self.subprocess_arguments:
                if arg == self.CLI_PARAMETER_VOICE_CODE_FUNCTION:
                    arguments.extend(
                        self._voice_code_to_subprocess(voice_code))
                elif arg == self.CLI_PARAMETER_VOICE_CODE_STRING:
                    arguments.append(voice_code)
                elif arg == self.CLI_PARAMETER_TEXT_PATH:
                    arguments.append(tmp_text_file_path)
                elif arg == self.CLI_PARAMETER_WAVE_PATH:
                    arguments.append(output_file_path)
                elif arg == self.CLI_PARAMETER_TEXT_STDIN:
                    # placeholder, do not append
                    pass
                elif arg == self.CLI_PARAMETER_WAVE_STDOUT:
                    # placeholder, do not append
                    pass
                else:
                    arguments.append(arg)
            self.log(u"Creating arguments list... done")

            # actual call via subprocess
            self.log(u"Calling TTS engine...")
            self.log([u"Calling with arguments '%s'", arguments])
            self.log([u"Calling with text '%s'", text])
            proc = subprocess.Popen(arguments,
                                    stdout=subprocess.PIPE,
                                    stdin=subprocess.PIPE,
                                    stderr=subprocess.PIPE,
                                    universal_newlines=True)
            if self.CLI_PARAMETER_TEXT_STDIN in self.subprocess_arguments:
                self.log(u"Passing text via stdin...")
                if gf.PY2:
                    (stdoutdata,
                     stderrdata) = proc.communicate(input=gf.safe_bytes(text))
                else:
                    (stdoutdata, stderrdata) = proc.communicate(input=text)
                self.log(u"Passing text via stdin... done")
            else:
                self.log(u"Passing text via file...")
                (stdoutdata, stderrdata) = proc.communicate()
                self.log(u"Passing text via file... done")
            proc.stdout.close()
            proc.stdin.close()
            proc.stderr.close()

            if self.CLI_PARAMETER_WAVE_STDOUT in self.subprocess_arguments:
                self.log(u"TTS engine wrote audio data to stdout")
                self.log(
                    [u"Writing audio data to file '%s'...", output_file_path])
                with io.open(output_file_path, "wb") as output_file:
                    output_file.write(stdoutdata)
                self.log([
                    u"Writing audio data to file '%s'... done",
                    output_file_path
                ])
            else:
                self.log(u"TTS engine wrote audio data to file")

            if tmp_text_file_path is not None:
                self.log(
                    [u"Delete temporary text file '%s'", tmp_text_file_path])
                gf.delete_file(tmp_text_file_handler, tmp_text_file_path)

            self.log(u"Calling TTS ... done")
        except Exception as exc:
            self.log_exc(
                u"An unexpected error occurred while calling TTS engine via subprocess",
                exc, False, None)
            return (False, None)

        # check the file can be read
        if not gf.file_can_be_read(output_file_path):
            self.log_exc(
                u"Output file '%s' cannot be read" % (output_file_path), None,
                True, None)
            return (False, None)

        # read audio data
        ret = self._read_audio_data(
            output_file_path) if return_audio_data else (True, None)

        # if the output file was temporary, remove it
        if synt_tmp_file:
            self.log([
                u"Removing temporary output file path '%s'", output_file_path
            ])
            gf.delete_file(output_file_handler, output_file_path)

        # return audio data or (True, None)
        return ret
Example #52
0
 def test_output_html_for_tuning(self):
     syn = self.read(SyncMapFormat.XML, multiline=True, utf8=True)
     handler, output_file_path = gf.tmp_file(suffix=".html")
     audio_file_path = "foo.mp3"
     syn.output_html_for_tuning(audio_file_path, output_file_path, None)
     gf.delete_file(handler, output_file_path)
Example #53
0
    def _detect(self, min_length, max_length, tail=False):
        """
        Detect the head or tail within ``min_length`` and ``max_length`` duration.

        If detecting the tail, the real wave MFCC and the query are reversed
        so that the tail detection problem reduces to a head detection problem.

        Return the duration of the head or tail, in seconds.

        :param min_length: estimated minimum length
        :type  min_length: :class:`~aeneas.timevalue.TimeValue`
        :param max_length: estimated maximum length
        :type  max_length: :class:`~aeneas.timevalue.TimeValue`
        :rtype: :class:`~aeneas.timevalue.TimeValue`
        :raises: TypeError: if one of the parameters is not ``None`` or a number
        :raises: ValueError: if one of the parameters is negative
        """
        def _sanitize(value, default, name):
            if value is None:
                value = default
            try:
                value = TimeValue(value)
            except (TypeError, ValueError, InvalidOperation) as exc:
                self.log_exc(u"The value of %s is not a number" % (name), exc, True, TypeError)
            if value < 0:
                self.log_exc(u"The value of %s is negative" % (name), None, True, ValueError)
            return value

        min_length = _sanitize(min_length, self.MIN_LENGTH, "min_length")
        max_length = _sanitize(max_length, self.MAX_LENGTH, "max_length")
        mws = self.rconf.mws
        min_length_frames = int(min_length / mws)
        max_length_frames = int(max_length / mws)
        self.log([u"MFCC window shift s:     %.3f", mws])
        self.log([u"Min start length s:      %.3f", min_length])
        self.log([u"Min start length frames: %d", min_length_frames])
        self.log([u"Max start length s:      %.3f", max_length])
        self.log([u"Max start length frames: %d", max_length_frames])
        self.log([u"Tail?:                   %s", str(tail)])

        self.log(u"Synthesizing query...")
        synt_duration = max_length * self.QUERY_FACTOR
        self.log([u"Synthesizing at least %.3f seconds", synt_duration])
        tmp_handler, tmp_file_path = gf.tmp_file(suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH])
        synt = Synthesizer(rconf=self.rconf, logger=self.logger)
        anchors, total_time, synthesized_chars = synt.synthesize(
            self.text_file,
            tmp_file_path,
            quit_after=synt_duration,
            backwards=tail
        )
        self.log(u"Synthesizing query... done")

        self.log(u"Extracting MFCCs for query...")
        query_mfcc = AudioFileMFCC(tmp_file_path, rconf=self.rconf, logger=self.logger)
        self.log(u"Extracting MFCCs for query... done")

        self.log(u"Cleaning up...")
        gf.delete_file(tmp_handler, tmp_file_path)
        self.log(u"Cleaning up... done")

        search_window = max_length * self.AUDIO_FACTOR
        search_window_end = min(int(search_window / mws), self.real_wave_mfcc.all_length)
        self.log([u"Query MFCC length (frames): %d", query_mfcc.all_length])
        self.log([u"Real MFCC length (frames):  %d", self.real_wave_mfcc.all_length])
        self.log([u"Search window end (s):      %.3f", search_window])
        self.log([u"Search window end (frames): %d", search_window_end])

        if tail:
            self.log(u"Tail => reversing real_wave_mfcc and query_mfcc")
            self.real_wave_mfcc.reverse()
            query_mfcc.reverse()

        # NOTE: VAD will be run here, if not done before
        speech_intervals = self.real_wave_mfcc.intervals(speech=True, time=False)
        if len(speech_intervals) < 1:
            self.log(u"No speech intervals, hence no start found")
            if tail:
                self.real_wave_mfcc.reverse()
            return TimeValue("0.000")

        # generate a list of begin indices
        search_end = None
        candidates_begin = []
        for interval in speech_intervals:
            if (interval[0] >= min_length_frames) and (interval[0] <= max_length_frames):
                candidates_begin.append(interval[0])
            search_end = interval[1]
            if search_end >= search_window_end:
                break

        # for each begin index, compute the acm cost
        # to match the query
        # note that we take the min over the last column of the acm
        # meaning that we allow to match the entire query wave
        # against a portion of the real wave
        candidates = []
        for candidate_begin in candidates_begin:
            self.log([u"Candidate interval starting at %d == %.3f", candidate_begin, candidate_begin * mws])
            try:
                rwm = AudioFileMFCC(
                    mfcc_matrix=self.real_wave_mfcc.all_mfcc[:, candidate_begin:search_end],
                    rconf=self.rconf,
                    logger=self.logger
                )
                dtw = DTWAligner(
                    real_wave_mfcc=rwm,
                    synt_wave_mfcc=query_mfcc,
                    rconf=self.rconf,
                    logger=self.logger
                )
                acm = dtw.compute_accumulated_cost_matrix()
                last_column = acm[:, -1]
                min_value = numpy.min(last_column)
                min_index = numpy.argmin(last_column)
                self.log([u"Candidate interval: %d %d == %.3f %.3f", candidate_begin, search_end, candidate_begin * mws, search_end * mws])
                self.log([u"  Min value: %.6f", min_value])
                self.log([u"  Min index: %d == %.3f", min_index, min_index * mws])
                candidates.append((min_value, candidate_begin, min_index))
            except Exception as exc:
                self.log_exc(u"An unexpected error occurred while running _detect", exc, False, None)

        # reverse again the real wave
        if tail:
            self.log(u"Tail => reversing real_wave_mfcc again")
            self.real_wave_mfcc.reverse()

        # return
        if len(candidates) < 1:
            self.log(u"No candidates found")
            return TimeValue("0.000")
        self.log(u"Candidates:")
        for candidate in candidates:
            self.log([u"  Value: %.6f Begin Time: %.3f Min Index: %d", candidate[0], candidate[1] * mws, candidate[2]])
        best = sorted(candidates)[0][1]
        self.log([u"Best candidate: %d == %.3f", best, best * mws])
        return best * mws
 def write(self, fmt, multiline=False, utf8=False, parameters=PARAMETERS):
     suffix = "." + fmt
     syn = self.read(SyncMapFormat.XML, multiline, utf8, self.PARAMETERS)
     handler, output_file_path = gf.tmp_file(suffix=suffix)
     syn.write(fmt, output_file_path, parameters)
     gf.delete_file(handler, output_file_path)
Example #55
0
    def _synthesize_single_subprocess(self, text, voice_code, output_file_path):
        """
        Synthesize a single text fragment via ``subprocess``.

        :rtype: tuple (result, (duration, sample_rate, encoding, samples))
        """
        self.log(u"Synthesizing using pure Python...")
        try:
            # if the TTS engine reads text from file,
            # write the text into a temporary file
            if self.CLI_PARAMETER_TEXT_PATH in self.subprocess_arguments:
                self.log(u"TTS engine reads text from file")
                tmp_text_file_handler, tmp_text_file_path = gf.tmp_file(suffix=u".txt", root=self.rconf[RuntimeConfiguration.TMP_PATH])
                self.log([u"Creating temporary text file '%s'...", tmp_text_file_path])
                with io.open(tmp_text_file_path, "w", encoding="utf-8") as tmp_text_file:
                    tmp_text_file.write(text)
                self.log([u"Creating temporary text file '%s'... done", tmp_text_file_path])
            else:
                self.log(u"TTS engine reads text from stdin")
                tmp_text_file_handler = None
                tmp_text_file_path = None

            # copy all relevant arguments
            self.log(u"Creating arguments list...")
            arguments = []
            for arg in self.subprocess_arguments:
                if arg == self.CLI_PARAMETER_VOICE_CODE_FUNCTION:
                    arguments.extend(self._voice_code_to_subprocess(voice_code))
                elif arg == self.CLI_PARAMETER_VOICE_CODE_STRING:
                    arguments.append(voice_code)
                elif arg == self.CLI_PARAMETER_TEXT_PATH:
                    arguments.append(tmp_text_file_path)
                elif arg == self.CLI_PARAMETER_WAVE_PATH:
                    arguments.append(output_file_path)
                elif arg == self.CLI_PARAMETER_TEXT_STDIN:
                    # placeholder, do not append
                    pass
                elif arg == self.CLI_PARAMETER_WAVE_STDOUT:
                    # placeholder, do not append
                    pass
                else:
                    arguments.append(arg)
            self.log(u"Creating arguments list... done")

            # actual call via subprocess
            self.log(u"Calling TTS engine...")
            self.log([u"Calling with arguments '%s'", arguments])
            self.log([u"Calling with text '%s'", text])
            proc = subprocess.Popen(
                arguments,
                stdout=subprocess.PIPE,
                stdin=subprocess.PIPE,
                stderr=subprocess.PIPE,
                universal_newlines=True
            )
            if self.CLI_PARAMETER_TEXT_STDIN in self.subprocess_arguments:
                self.log(u"Passing text via stdin...")
                if gf.PY2:
                    (stdoutdata, stderrdata) = proc.communicate(input=gf.safe_bytes(text))
                else:
                    (stdoutdata, stderrdata) = proc.communicate(input=text)
                self.log(u"Passing text via stdin... done")
            else:
                self.log(u"Passing text via file...")
                (stdoutdata, stderrdata) = proc.communicate()
                self.log(u"Passing text via file... done")
            proc.stdout.close()
            proc.stdin.close()
            proc.stderr.close()

            if self.CLI_PARAMETER_WAVE_STDOUT in self.subprocess_arguments:
                self.log(u"TTS engine wrote audio data to stdout")
                self.log([u"Writing audio data to file '%s'...", output_file_path])
                with io.open(output_file_path, "wb") as output_file:
                    output_file.write(stdoutdata)
                self.log([u"Writing audio data to file '%s'... done", output_file_path])
            else:
                self.log(u"TTS engine wrote audio data to file")

            if tmp_text_file_path is not None:
                self.log([u"Delete temporary text file '%s'", tmp_text_file_path])
                gf.delete_file(tmp_text_file_handler, tmp_text_file_path)

            self.log(u"Calling TTS ... done")
        except Exception as exc:
            self.log_exc(u"An unexpected error occurred while calling TTS engine via subprocess", exc, False, None)
            return (False, None)

        # check the file can be read
        if not gf.file_can_be_read(output_file_path):
            self.log_exc(u"Output file '%s' cannot be read" % (output_file_path), None, True, None)
            return (False, None)

        # return the duration of the output file
        try:
            # if we know the TTS outputs to PCM16 mono WAVE,
            # we can read samples directly from it,
            # without an intermediate conversion through ffmpeg
            audio_file = AudioFile(
                file_path=output_file_path,
                is_mono_wave=self.OUTPUT_MONO_WAVE,
                rconf=self.rconf,
                logger=self.logger
            )
            audio_file.read_samples_from_file()
            self.log([u"Duration of '%s': %f", output_file_path, audio_file.audio_length])
            self.log(u"Synthesizing using pure Python... done")
            return (True, (
                audio_file.audio_length,
                audio_file.audio_sample_rate,
                audio_file.audio_format,
                audio_file.audio_samples
            ))
        except (AudioFileUnsupportedFormatError, OSError) as exc:
            self.log_exc(u"An unexpected error occurred while trying to read the sythesized audio file", exc, True, None)
            return (False, None)
Example #56
0
    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 1:
            return self.print_help()

        if self.has_option([u"-e", u"--examples"]):
            return self.print_examples(False)

        if self.has_option(u"--examples-all"):
            return self.print_examples(True)

        if self.has_option([u"--list-parameters"]):
            return self.print_parameters()

        parameter = self.has_option_with_value(u"--list-values")
        if parameter is not None:
            return self.print_values(parameter)
        elif self.has_option(u"--list-values"):
            return self.print_values(u"?")

        # NOTE list() is needed for Python3, where keys() is not a list!
        demo = self.has_option(list(self.DEMOS.keys()))
        demo_parameters = u""
        download_from_youtube = self.has_option([u"-y", u"--youtube"])
        largest_audio = self.has_option(u"--largest-audio")
        keep_audio = self.has_option(u"--keep-audio")
        output_html = self.has_option(u"--output-html")
        validate = not self.has_option(u"--skip-validator")
        print_faster_rate = self.has_option(u"--faster-rate")
        print_rates = self.has_option(u"--rates")
        print_zero = self.has_option(u"--zero")

        if demo:
            validate = False
            for key in self.DEMOS:
                if self.has_option(key):
                    demo_parameters = self.DEMOS[key]
                    audio_file_path = demo_parameters[u"audio"]
                    text_file_path = demo_parameters[u"text"]
                    config_string = demo_parameters[u"config"]
                    sync_map_file_path = demo_parameters[u"syncmap"]
                    # TODO allow injecting rconf options directly from DEMOS options field
                    if key == u"--example-cewsubprocess":
                        self.rconf[
                            RuntimeConfiguration.CEW_SUBPROCESS_ENABLED] = True
                    elif key == u"--example-ctw-espeak":
                        self.rconf[RuntimeConfiguration.TTS] = "custom"
                        self.rconf[
                            RuntimeConfiguration.TTS_PATH] = self.CTW_ESPEAK
                    elif key == u"--example-ctw-speect":
                        self.rconf[RuntimeConfiguration.TTS] = "custom"
                        self.rconf[
                            RuntimeConfiguration.TTS_PATH] = self.CTW_SPEECT
                    elif key == u"--example-festival":
                        self.rconf[RuntimeConfiguration.TTS] = "festival"
                    elif key == u"--example-mws":
                        self.rconf[
                            RuntimeConfiguration.MFCC_WINDOW_LENGTH] = "1.500"
                        self.rconf[
                            RuntimeConfiguration.MFCC_WINDOW_SHIFT] = "0.500"
                    elif key == u"--example-multilevel-tts":
                        self.rconf[RuntimeConfiguration.TTS_L1] = "festival"
                        self.rconf[RuntimeConfiguration.TTS_L2] = "festival"
                        self.rconf[RuntimeConfiguration.TTS_L3] = "espeak"
                    elif key == u"--example-words-festival-cache":
                        self.rconf[RuntimeConfiguration.TTS] = "festival"
                        self.rconf[RuntimeConfiguration.TTS_CACHE] = True
                    elif key == u"--example-faster-rate":
                        print_faster_rate = True
                    elif key == u"--example-no-zero":
                        print_zero = True
                    elif key == u"--example-py":
                        self.rconf[RuntimeConfiguration.C_EXTENSIONS] = False
                    elif key == u"--example-rates":
                        print_rates = True
                    elif key == u"--example-youtube":
                        download_from_youtube = True
                    break
        else:
            if len(self.actual_arguments) < 4:
                return self.print_help()
            audio_file_path = self.actual_arguments[0]
            text_file_path = self.actual_arguments[1]
            config_string = self.actual_arguments[2]
            sync_map_file_path = self.actual_arguments[3]

        html_file_path = None
        if output_html:
            keep_audio = True
            html_file_path = sync_map_file_path + u".html"

        if download_from_youtube:
            youtube_url = audio_file_path

        if (not download_from_youtube) and (
                not self.check_input_file(audio_file_path)):
            return self.ERROR_EXIT_CODE
        if not self.check_input_file(text_file_path):
            return self.ERROR_EXIT_CODE
        if not self.check_output_file(sync_map_file_path):
            return self.ERROR_EXIT_CODE
        if (html_file_path
                is not None) and (not self.check_output_file(html_file_path)):
            return self.ERROR_EXIT_CODE

        self.check_c_extensions()

        if demo:
            msg = []
            msg.append(u"Running example task with arguments:")
            if download_from_youtube:
                msg.append(u"  YouTube URL:   %s" % youtube_url)
            else:
                msg.append(u"  Audio file:    %s" % audio_file_path)
            msg.append(u"  Text file:     %s" % text_file_path)
            msg.append(u"  Config string: %s" % config_string)
            msg.append(u"  Sync map file: %s" % sync_map_file_path)
            if len(demo_parameters[u"options"]) > 0:
                msg.append(u"  Options:       %s" %
                           demo_parameters[u"options"])
            self.print_info(u"\n".join(msg))

        if validate:
            self.print_info(
                u"Validating config string (specify --skip-validator to bypass)..."
            )
            validator = Validator(logger=self.logger)
            result = validator.check_configuration_string(config_string,
                                                          is_job=False,
                                                          external_name=True)
            if not result.passed:
                self.print_error(u"The given config string is not valid:")
                self.print_generic(result.pretty_print())
                return self.ERROR_EXIT_CODE
            self.print_info(u"Validating config string... done")

        if download_from_youtube:
            try:
                self.print_info(u"Downloading audio from '%s' ..." %
                                youtube_url)
                downloader = Downloader(logger=self.logger)
                audio_file_path = downloader.audio_from_youtube(
                    youtube_url,
                    download=True,
                    output_file_path=None,
                    largest_audio=largest_audio)
                self.print_info(u"Downloading audio from '%s' ... done" %
                                youtube_url)
            except ImportError:
                self.print_no_pafy_error()
                return self.ERROR_EXIT_CODE
            except Exception as exc:
                self.print_error(
                    u"An unexpected error occurred while downloading audio from YouTube:"
                )
                self.print_error(u"%s" % exc)
                return self.ERROR_EXIT_CODE
        else:
            audio_extension = gf.file_extension(audio_file_path)
            if audio_extension.lower() not in AudioFile.FILE_EXTENSIONS:
                self.print_warning(
                    u"Your audio file path has extension '%s', which is uncommon for an audio file."
                    % audio_extension)
                self.print_warning(
                    u"Attempting at executing your Task anyway.")
                self.print_warning(
                    u"If it fails, you might have swapped the first two arguments."
                )
                self.print_warning(
                    u"The audio file path should be the first argument, the text file path the second."
                )

        try:
            self.print_info(u"Creating task...")
            task = Task(config_string, logger=self.logger)
            task.audio_file_path_absolute = audio_file_path
            task.text_file_path_absolute = text_file_path
            task.sync_map_file_path_absolute = sync_map_file_path
            self.print_info(u"Creating task... done")
        except Exception as exc:
            self.print_error(
                u"An unexpected error occurred while creating the task:")
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE

        try:
            self.print_info(u"Executing task...")
            executor = ExecuteTask(task=task,
                                   rconf=self.rconf,
                                   logger=self.logger)
            executor.execute()
            self.print_info(u"Executing task... done")
        except Exception as exc:
            self.print_error(
                u"An unexpected error occurred while executing the task:")
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE

        try:
            self.print_info(u"Creating output sync map file...")
            path = task.output_sync_map_file()
            self.print_info(u"Creating output sync map file... done")
            self.print_success(u"Created file '%s'" % path)
        except Exception as exc:
            self.print_error(
                u"An unexpected error occurred while writing the sync map file:"
            )
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE

        if output_html:
            try:
                parameters = {}
                parameters[gc.PPN_TASK_OS_FILE_FORMAT] = task.configuration[
                    "o_format"]
                parameters[
                    gc.PPN_TASK_OS_FILE_EAF_AUDIO_REF] = task.configuration[
                        "o_eaf_audio_ref"]
                parameters[
                    gc.PPN_TASK_OS_FILE_SMIL_AUDIO_REF] = task.configuration[
                        "o_smil_audio_ref"]
                parameters[
                    gc.PPN_TASK_OS_FILE_SMIL_PAGE_REF] = task.configuration[
                        "o_smil_page_ref"]
                self.print_info(u"Creating output HTML file...")
                task.sync_map.output_html_for_tuning(audio_file_path,
                                                     html_file_path,
                                                     parameters)
                self.print_info(u"Creating output HTML file... done")
                self.print_success(u"Created file '%s'" % html_file_path)
            except Exception as exc:
                self.print_error(
                    u"An unexpected error occurred while writing the HTML file:"
                )
                self.print_error(u"%s" % exc)
                return self.ERROR_EXIT_CODE

        if download_from_youtube:
            if keep_audio:
                self.print_info(
                    u"Option --keep-audio set: keeping downloaded file '%s'" %
                    audio_file_path)
            else:
                gf.delete_file(None, audio_file_path)

        if print_zero:
            zero_duration = [
                l for l in task.sync_map.fragments_tree.vleaves_not_empty
                if l.begin == l.end
            ]
            if len(zero_duration) > 0:
                self.print_warning(u"Fragments with zero duration:")
                for fragment in zero_duration:
                    self.print_generic(u"  %s" % fragment)

        if print_rates:
            self.print_info(u"Fragments with rates:")
            for fragment in task.sync_map.fragments_tree.vleaves_not_empty:
                self.print_generic(u"  %s (rate: %.3f chars/s)" %
                                   (fragment, fragment.rate))

        if print_faster_rate:
            max_rate = task.configuration["aba_rate_value"]
            if max_rate is not None:
                faster = [
                    l for l in task.sync_map.fragments_tree.vleaves_not_empty
                    if l.rate >= max_rate + Decimal("0.001")
                ]
                if len(faster) > 0:
                    self.print_warning(
                        u"Fragments with rate greater than %.3f:" % max_rate)
                    for fragment in faster:
                        self.print_generic(u"  %s (rate: %.3f chars/s)" %
                                           (fragment, fragment.rate))

        return self.NO_ERROR_EXIT_CODE
Example #57
0
    def synthesize_multiple(self, audio_file_path, c_quit_after, c_backwards,
                            u_text):
        """
        Synthesize the text contained in the given fragment list
        into a ``wav`` file.

        :param string audio_file_path: the path to the output audio file
        :param float c_quit_after: stop synthesizing as soon as
                                   reaching this many seconds
        :param bool c_backwards: synthesizing from the end of the text file
        :param object u_text: a list of ``(voice_code, text)`` tuples
        :rtype: tuple ``(sample_rate, synthesized, intervals)``
        """
        self.log([u"Audio file path: '%s'", audio_file_path])
        self.log([u"c_quit_after: '%.3f'", c_quit_after])
        self.log([u"c_backwards: '%d'", c_backwards])

        text_file_handler, text_file_path = gf.tmp_file()
        data_file_handler, data_file_path = gf.tmp_file()
        self.log([u"Temporary text file path: '%s'", text_file_path])
        self.log([u"Temporary data file path: '%s'", data_file_path])

        self.log(u"Populating the text file...")
        with io.open(text_file_path, "w", encoding="utf-8") as tmp_text_file:
            for f_voice_code, f_text in u_text:
                tmp_text_file.write(u"%s %s\n" % (f_voice_code, f_text))
        self.log(u"Populating the text file... done")

        arguments = [
            self.rconf[RuntimeConfiguration.CEW_SUBPROCESS_PATH], "-m",
            "aeneas.cewsubprocess",
            "%.3f" % c_quit_after,
            "%d" % c_backwards, text_file_path, audio_file_path, data_file_path
        ]
        self.log([u"Calling with arguments '%s'", u" ".join(arguments)])
        proc = subprocess.Popen(arguments,
                                stdout=subprocess.PIPE,
                                stdin=subprocess.PIPE,
                                stderr=subprocess.PIPE,
                                universal_newlines=True)
        proc.communicate()

        self.log(u"Reading output data...")
        with io.open(data_file_path, "r", encoding="utf-8") as data_file:
            lines = data_file.read().splitlines()
            sr = int(lines[0])
            sf = int(lines[1])
            intervals = []
            for line in lines[2:]:
                values = line.split(u" ")
                if len(values) == 2:
                    intervals.append(
                        (TimeValue(values[0]), TimeValue(values[1])))
        self.log(u"Reading output data... done")

        self.log(u"Deleting text and data files...")
        gf.delete_file(text_file_handler, text_file_path)
        gf.delete_file(data_file_handler, data_file_path)
        self.log(u"Deleting text and data files... done")

        return (sr, sf, intervals)
Example #58
0
    def _detect(self, min_length, max_length, tail=False):
        """
        Detect the head or tail within ``min_length`` and ``max_length`` duration.

        If detecting the tail, the real wave MFCC and the query are reversed
        so that the tail detection problem reduces to a head detection problem.

        Return the duration of the head or tail, in seconds.

        :param min_length: estimated minimum length
        :type  min_length: :class:`~aeneas.exacttiming.TimeValue`
        :param max_length: estimated maximum length
        :type  max_length: :class:`~aeneas.exacttiming.TimeValue`
        :rtype: :class:`~aeneas.exacttiming.TimeValue`
        :raises: TypeError: if one of the parameters is not ``None`` or a number
        :raises: ValueError: if one of the parameters is negative
        """
        def _sanitize(value, default, name):
            if value is None:
                value = default
            try:
                value = TimeValue(value)
            except (TypeError, ValueError, InvalidOperation) as exc:
                self.log_exc(u"The value of %s is not a number" % (name), exc, True, TypeError)
            if value < 0:
                self.log_exc(u"The value of %s is negative" % (name), None, True, ValueError)
            return value

        min_length = _sanitize(min_length, self.MIN_LENGTH, "min_length")
        max_length = _sanitize(max_length, self.MAX_LENGTH, "max_length")
        mws = self.rconf.mws
        min_length_frames = int(min_length / mws)
        max_length_frames = int(max_length / mws)
        self.log([u"MFCC window shift s:     %.3f", mws])
        self.log([u"Min start length s:      %.3f", min_length])
        self.log([u"Min start length frames: %d", min_length_frames])
        self.log([u"Max start length s:      %.3f", max_length])
        self.log([u"Max start length frames: %d", max_length_frames])
        self.log([u"Tail?:                   %s", str(tail)])

        self.log(u"Synthesizing query...")
        synt_duration = max_length * self.QUERY_FACTOR
        self.log([u"Synthesizing at least %.3f seconds", synt_duration])
        tmp_handler, tmp_file_path = gf.tmp_file(suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH])
        synt = Synthesizer(rconf=self.rconf, logger=self.logger)
        anchors, total_time, synthesized_chars = synt.synthesize(
            self.text_file,
            tmp_file_path,
            quit_after=synt_duration,
            backwards=tail
        )
        self.log(u"Synthesizing query... done")

        self.log(u"Extracting MFCCs for query...")
        query_mfcc = AudioFileMFCC(tmp_file_path, rconf=self.rconf, logger=self.logger)
        self.log(u"Extracting MFCCs for query... done")

        self.log(u"Cleaning up...")
        gf.delete_file(tmp_handler, tmp_file_path)
        self.log(u"Cleaning up... done")

        search_window = max_length * self.AUDIO_FACTOR
        search_window_end = min(int(search_window / mws), self.real_wave_mfcc.all_length)
        self.log([u"Query MFCC length (frames): %d", query_mfcc.all_length])
        self.log([u"Real MFCC length (frames):  %d", self.real_wave_mfcc.all_length])
        self.log([u"Search window end (s):      %.3f", search_window])
        self.log([u"Search window end (frames): %d", search_window_end])

        if tail:
            self.log(u"Tail => reversing real_wave_mfcc and query_mfcc")
            self.real_wave_mfcc.reverse()
            query_mfcc.reverse()

        # NOTE: VAD will be run here, if not done before
        speech_intervals = self.real_wave_mfcc.intervals(speech=True, time=False)
        if len(speech_intervals) < 1:
            self.log(u"No speech intervals, hence no start found")
            if tail:
                self.real_wave_mfcc.reverse()
            return TimeValue("0.000")

        # generate a list of begin indices
        search_end = None
        candidates_begin = []
        for interval in speech_intervals:
            if (interval[0] >= min_length_frames) and (interval[0] <= max_length_frames):
                candidates_begin.append(interval[0])
            search_end = interval[1]
            if search_end >= search_window_end:
                break

        # for each begin index, compute the acm cost
        # to match the query
        # note that we take the min over the last column of the acm
        # meaning that we allow to match the entire query wave
        # against a portion of the real wave
        candidates = []
        for candidate_begin in candidates_begin:
            self.log([u"Candidate interval starting at %d == %.3f", candidate_begin, candidate_begin * mws])
            try:
                rwm = AudioFileMFCC(
                    mfcc_matrix=self.real_wave_mfcc.all_mfcc[:, candidate_begin:search_end],
                    rconf=self.rconf,
                    logger=self.logger
                )
                dtw = DTWAligner(
                    real_wave_mfcc=rwm,
                    synt_wave_mfcc=query_mfcc,
                    rconf=self.rconf,
                    logger=self.logger
                )
                acm = dtw.compute_accumulated_cost_matrix()
                last_column = acm[:, -1]
                min_value = numpy.min(last_column)
                min_index = numpy.argmin(last_column)
                self.log([u"Candidate interval: %d %d == %.3f %.3f", candidate_begin, search_end, candidate_begin * mws, search_end * mws])
                self.log([u"  Min value: %.6f", min_value])
                self.log([u"  Min index: %d == %.3f", min_index, min_index * mws])
                candidates.append((min_value, candidate_begin, min_index))
            except Exception as exc:
                self.log_exc(u"An unexpected error occurred while running _detect", exc, False, None)

        # reverse again the real wave
        if tail:
            self.log(u"Tail => reversing real_wave_mfcc again")
            self.real_wave_mfcc.reverse()

        # return
        if len(candidates) < 1:
            self.log(u"No candidates found")
            return TimeValue("0.000")
        self.log(u"Candidates:")
        for candidate in candidates:
            self.log([u"  Value: %.6f Begin Time: %.3f Min Index: %d", candidate[0], candidate[1] * mws, candidate[2]])
        best = sorted(candidates)[0][1]
        self.log([u"Best candidate: %d == %.3f", best, best * mws])
        return best * mws