Example #1
0
 def inner(c_ext, cew_subprocess, cache):
     if ofp is None:
         handler, output_file_path = gf.tmp_file(suffix=".wav")
     else:
         handler = None
         output_file_path = ofp
     try:
         rconf = RuntimeConfiguration()
         rconf[RuntimeConfiguration.TTS] = self.TTS
         rconf[RuntimeConfiguration.TTS_PATH] = self.TTS_PATH
         rconf[RuntimeConfiguration.C_EXTENSIONS] = c_ext
         rconf[RuntimeConfiguration.
               CEW_SUBPROCESS_ENABLED] = cew_subprocess
         rconf[RuntimeConfiguration.TTS_CACHE] = cache
         tts_engine = self.TTS_CLASS(rconf=rconf)
         anchors, total_time, num_chars = tts_engine.synthesize_multiple(
             text_file, output_file_path, quit_after, backwards)
         gf.delete_file(handler, output_file_path)
         if cache:
             tts_engine.clear_cache()
         if zero_length:
             self.assertEqual(total_time, 0.0)
         else:
             self.assertGreater(total_time, 0.0)
     except (OSError, TypeError, UnicodeDecodeError, ValueError) as exc:
         gf.delete_file(handler, output_file_path)
         if (cache) and (tts_engine is not None):
             tts_engine.clear_cache()
         with self.assertRaises(expected_exc):
             raise exc
Example #2
0
 def synthesize_multiple(self,
                         text_file,
                         ofp=None,
                         quit_after=None,
                         backwards=False,
                         zero_length=False):
     if ofp is None:
         handler, output_file_path = gf.tmp_file(suffix=".wav")
     else:
         handler = None
         output_file_path = ofp
     try:
         rconf = RuntimeConfiguration()
         rconf[RuntimeConfiguration.TTS] = u"festival"
         rconf[RuntimeConfiguration.TTS_PATH] = u"text2wave"
         tts_engine = FESTIVALWrapper(rconf=rconf)
         anchors, total_time, num_chars = tts_engine.synthesize_multiple(
             text_file, output_file_path, quit_after, backwards)
         gf.delete_file(handler, output_file_path)
         if zero_length:
             self.assertEqual(total_time, 0.0)
         else:
             self.assertGreater(total_time, 0.0)
     except (OSError, TypeError, UnicodeDecodeError, ValueError) as exc:
         gf.delete_file(handler, output_file_path)
         raise exc
Example #3
0
def align(text_path, audio_path, align_out_path, word_align=True):
    # create Task object
    config_string = u"task_language=hi"
    config_string += "|os_task_file_format=json"
    rconf = None
    if word_align:
        config_string += "|os_task_file_levels=3"
        config_string += "|is_text_type=mplain"
        rconf = RuntimeConfiguration()
        rconf[RuntimeConfiguration.MFCC_MASK_NONSPEECH] = True
        rconf[RuntimeConfiguration.MFCC_MASK_NONSPEECH_L3] = True
    else:
        config_string += "|is_text_type=plain"

    task = Task(config_string=config_string)
    task.text_file_path_absolute = text_path
    task.audio_file_path_absolute = audio_path
    task.sync_map_file_path_absolute = align_out_path

    # process Task
    ExecuteTask(task, rconf=rconf).execute()

    # output sync map to file
    task.output_sync_map_file()

    # Remove annoying unicode characters
    with open(align_out_path, 'r', encoding='utf8') as f:
        alignment = json.load(f)
    with open(align_out_path, 'w', encoding='utf8') as f:
        json.dump(alignment, f, ensure_ascii=False, indent=2)
Example #4
0
 def test_use_cache(self):
     if self.TTS == u"":
         return
     rconf = RuntimeConfiguration()
     rconf[RuntimeConfiguration.TTS_CACHE] = True
     tts_engine = self.TTS_CLASS(rconf=rconf)
     self.assertTrue(tts_engine.use_cache)
     self.assertIsNotNone(tts_engine.cache)
 def test_set_tts(self):
     rconf = RuntimeConfiguration()
     rconf.set_tts(level=1)
     self.assertEqual(rconf.tts, "espeak")
     self.assertEqual(rconf.tts_path, None)
     rconf.set_tts(level=2)
     self.assertEqual(rconf.tts, "espeak")
     self.assertEqual(rconf.tts_path, None)
     rconf.set_tts(level=3)
     self.assertEqual(rconf.tts, "espeak")
     self.assertEqual(rconf.tts_path, None)
 def test_set_granularity(self):
     rconf = RuntimeConfiguration()
     rconf.set_granularity(level=1)
     self.assertEqual(rconf.mmn, False)
     self.assertEqual(rconf.mwl, TimeValue("0.100"))
     self.assertEqual(rconf.mws, TimeValue("0.040"))
     rconf.set_granularity(level=2)
     self.assertEqual(rconf.mmn, False)
     self.assertEqual(rconf.mwl, TimeValue("0.050"))
     self.assertEqual(rconf.mws, TimeValue("0.020"))
     rconf.set_granularity(level=3)
     self.assertEqual(rconf.mmn, False)
     self.assertEqual(rconf.mwl, TimeValue("0.020"))
     self.assertEqual(rconf.mws, TimeValue("0.005"))
Example #7
0
 def synthesize_single(self, text, language, ofp=None, zero_length=False):
     if ofp is None:
         handler, output_file_path = gf.tmp_file(suffix=".wav")
     else:
         handler = None
         output_file_path = ofp
     try:
         rconf = RuntimeConfiguration()
         rconf[RuntimeConfiguration.TTS] = u"festival"
         rconf[RuntimeConfiguration.TTS_PATH] = u"text2wave"
         tts_engine = FESTIVALWrapper(rconf=rconf)
         result = tts_engine.synthesize_single(text, language,
                                               output_file_path)
         gf.delete_file(handler, output_file_path)
         if zero_length:
             self.assertEqual(result, 0)
         else:
             self.assertGreater(result, 0)
     except (OSError, TypeError, UnicodeDecodeError, ValueError) as exc:
         gf.delete_file(handler, output_file_path)
         raise exc
Example #8
0
    def __adjust_durations(self, subs: List[SubRipItem], audio_file_path: str, stretch_in_lang: str) -> List[SubRipItem]:
        from aeneas.executetask import ExecuteTask
        from aeneas.task import Task
        from aeneas.runtimeconfiguration import RuntimeConfiguration
        from aeneas.logger import Logger as AeneasLogger

        # Initialise a DTW alignment task
        task_config_string = (
            "task_language={}|os_task_file_format=srt|is_text_type=subtitles".format(stretch_in_lang)
        )
        runtime_config_string = "dtw_algorithm=stripe"  # stripe or exact
        task = Task(config_string=task_config_string)

        try:
            segment_path, _ = MediaHelper.extract_audio_from_start_to_end(
                audio_file_path,
                str(subs[0].start),
                str(subs[len(subs) - 1].end),
            )

            # Create a text file for DTW alignments
            root, _ = os.path.splitext(segment_path)
            text_file_path = "{}.txt".format(root)

            with open(text_file_path, "w", encoding="utf8") as text_file:
                for sub_new in subs:
                    text_file.write(sub_new.text)
                    text_file.write(os.linesep * 2)

            task.audio_file_path_absolute = segment_path
            task.text_file_path_absolute = text_file_path
            task.sync_map_file_path_absolute = "{}.srt".format(root)

            tee = False
            if Logger.VERBOSE:
                tee = True
            if Logger.QUIET:
                tee = False
            with self.__lock:
                # Execute the task
                ExecuteTask(
                    task=task,
                    rconf=RuntimeConfiguration(config_string=runtime_config_string),
                    logger=AeneasLogger(tee=tee),
                ).execute()

                # Output new subtitle segment to a file
                task.output_sync_map_file()

            # Load the above subtitle segment
            adjusted_subs = Subtitle.load(
                task.sync_map_file_path_absolute
            ).subs
            for index, sub_new_loaded in enumerate(adjusted_subs):
                sub_new_loaded.index = subs[index].index

            adjusted_subs.shift(
                seconds=MediaHelper.get_duration_in_seconds(
                    start=None, end=str(subs[0].start)
                )
            )
            return adjusted_subs
        finally:
            # Housekeep intermediate files
            if task.audio_file_path_absolute is not None and os.path.exists(
                    task.audio_file_path_absolute
            ):
                os.remove(task.audio_file_path_absolute)
            if task.text_file_path_absolute is not None and os.path.exists(
                    task.text_file_path_absolute
            ):
                os.remove(task.text_file_path_absolute)
            if task.sync_map_file_path_absolute is not None and os.path.exists(task.sync_map_file_path_absolute):
                os.remove(task.sync_map_file_path_absolute)
 def test_config_string(self):
     rconf = RuntimeConfiguration()
     rconf.config_string
 def test_tts(self):
     rconf = RuntimeConfiguration()
     self.assertEqual(rconf.tts, "espeak")
 def test_tts_path(self):
     rconf = RuntimeConfiguration()
     self.assertEqual(rconf.tts_path, None)
 def test_mmn(self):
     rconf = RuntimeConfiguration()
     self.assertEqual(rconf.mmn, False)
 def test_mwl(self):
     rconf = RuntimeConfiguration()
     self.assertEqual(rconf.mwl, TimeValue("0.100"))
    def run(self, arguments, show_help=True):
        """
        Program entry point.

        Please note that the first item in ``arguments`` is discarded,
        as it is assumed to be the script/invocation name;
        pass a "dumb" placeholder if you call this method with
        an argument different that ``sys.argv``.

        :param arguments: the list of arguments
        :type  arguments: list
        :param show_help: if ``False``, do not show help on ``-h`` and ``--help``
        :type  show_help: bool
        :rtype: int
        """
        # convert arguments into Unicode strings
        if self.use_sys:
            # check that sys.stdin.encoding and sys.stdout.encoding are set to utf-8
            if not gf.FROZEN:
                if sys.stdin.encoding not in ["UTF-8", "UTF8"]:
                    self.print_warning(
                        u"The default input encoding is not UTF-8.")
                    self.print_warning(
                        u"You might want to set 'PYTHONIOENCODING=UTF-8' in your shell."
                    )
                if sys.stdout.encoding not in ["UTF-8", "UTF8"]:
                    self.print_warning(
                        u"The default output encoding is not UTF-8.")
                    self.print_warning(
                        u"You might want to set 'PYTHONIOENCODING=UTF-8' in your shell."
                    )
            # decode using sys.stdin.encoding
            args = [gf.safe_unicode_stdin(arg) for arg in arguments]
        else:
            # decode using utf-8 (but you should pass Unicode strings as parameters anyway)
            args = [gf.safe_unicode(arg) for arg in arguments]

        if show_help:
            if u"-h" in args:
                return self.print_help(short=True)

            if u"--help" in args:
                return self.print_help(short=False)

            if u"--version" in args:
                return self.print_name_version()

        # store formal arguments
        self.formal_arguments_raw = arguments
        self.formal_arguments = args

        # to obtain the actual arguments,
        # remove the first one and "special" switches
        args = args[1:]
        set_args = set(args)

        # set verbosity, if requested
        for flag in set([u"-v", u"--verbose"]) & set_args:
            self.verbose = True
            args.remove(flag)
        for flag in set([u"-vv", u"--very-verbose"]) & set_args:
            self.verbose = True
            self.very_verbose = True
            args.remove(flag)

        # set RuntimeConfiguration string, if specified
        for flag in [u"-r", u"--runtime-configuration"]:
            rconf_string = self.has_option_with_value(flag,
                                                      actual_arguments=False)
            if rconf_string is not None:
                self.rconf = RuntimeConfiguration(rconf_string)
                args.remove("%s=%s" % (flag, rconf_string))

        # set log file path, if requested
        log_path = None
        for flag in [u"-l", u"--log"]:
            log_path = self.has_option_with_value(flag, actual_arguments=False)
            if log_path is not None:
                args.remove("%s=%s" % (flag, log_path))
            elif flag in set_args:
                handler, log_path = gf.tmp_file(
                    suffix=u".log",
                    root=self.rconf[RuntimeConfiguration.TMP_PATH])
                args.remove(flag)
            if log_path is not None:
                self.log_file_path = log_path

        # if no actual arguments left, print help
        if (len(args) < 1) and (show_help):
            return self.print_help(short=True)

        # store actual arguments
        self.actual_arguments = args

        # create logger
        self.logger = Logger(tee=self.verbose,
                             tee_show_datetime=self.very_verbose)
        self.log([u"Formal arguments: %s", self.formal_arguments])
        self.log([u"Actual arguments: %s", self.actual_arguments])
        self.log([u"Runtime configuration: '%s'", self.rconf.config_string()])

        # perform command
        exit_code = self.perform_command()
        self.log([u"Execution completed with code %d", exit_code])

        # output log if requested
        if self.log_file_path is not None:
            self.log([
                u"User requested saving log to file '%s'", self.log_file_path
            ])
            self.logger.write(self.log_file_path)
            if self.use_sys:
                self.print_info(u"Log written to file '%s'" %
                                self.log_file_path)

        return self.exit(exit_code)
 def test_dtw_margin(self):
     rconf = RuntimeConfiguration()
     self.assertEqual(rconf.dtw_margin, TimeValue("60.000"))
Example #16
0
 def __init__(self, logger=None, rconf=None):
     self.logger = logger if logger is not None else Logger()
     self.rconf = rconf if rconf is not None else RuntimeConfiguration()
 def test_clone(self):
     rconf = RuntimeConfiguration()
     rconf2 = rconf.clone()
     self.assertNotEqual(id(rconf), id(rconf2))
     self.assertEqual(rconf.config_string, rconf2.config_string)
Example #18
0
if lang not in ["eng", "hi", "hin"]:
    print("only hi and eng allowed for language")
    exit(1)

from aeneas.executetask import ExecuteTask
from aeneas.task import Task
from aeneas.runtimeconfiguration import RuntimeConfiguration

config_string = u"task_language=" + lang + u"|is_text_type=subtitles|os_task_file_format=srt"

tempout, tempfilename = tempfile.mkstemp()
task = Task(config_string=config_string)
task.audio_file_path_absolute = args.audio
task.text_file_path_absolute = args.txt
task.sync_map_file_path_absolute = tempfilename
rconf = RuntimeConfiguration()
# This option ignores the non-word sounds in the audio
rconf[RuntimeConfiguration.MFCC_MASK_NONSPEECH] = True
rconf[RuntimeConfiguration.MFCC_MASK_LOG_ENERGY_THRESHOLD] = 2.5

# To use a different Text-to-Speech engine
#rconf[RuntimeConfiguration.TTS] = "festival"

# process Task
ExecuteTask(task, rconf=rconf).execute()

# output sync map to file
task.output_sync_map_file()

f = open(args.out, "w")
f.writelines("WEBVTT\n")
Example #19
0
 def test_loggable_rconf(self):
     rconf = RuntimeConfiguration()
     loggable = Loggable(rconf=rconf)
     self.assertEqual(rconf, loggable.rconf)
     self.assertIsNotNone(loggable.logger)
Example #20
0
 def test_loggable_rconf_logger(self):
     logger = Logger()
     rconf = RuntimeConfiguration()
     loggable = Loggable(rconf=rconf, logger=logger)
     self.assertEqual(rconf, loggable.rconf)
     self.assertEqual(logger, loggable.logger)
 def test_set_rconf_string(self):
     params = [
         (u"aba_nonspeech_tolerance=0.040", "aba_nonspeech_tolerance",
          TimeValue("0.040")),
         (u"aba_no_zero_duration=0.040", "aba_no_zero_duration",
          TimeValue("0.040")),
         (u"allow_unlisted_languages=True", "allow_unlisted_languages",
          True),
         (u"c_extensions=False", "c_extensions", False),
         (u"cdtw=False", "cdtw", False),
         (u"cew=False", "cew", False),
         (u"cmfcc=False", "cmfcc", False),
         (u"cew_subprocess_enabled=True", "cew_subprocess_enabled", True),
         (u"cew_subprocess_path=/foo/bar/python", "cew_subprocess_path",
          "/foo/bar/python"),
         (u"downloader_sleep=5.000", "downloader_sleep",
          TimeValue("5.000")),
         (u"downloader_retry_attempts=5", "downloader_retry_attempts", 5),
         (u"dtw_algorithm=exact", "dtw_algorithm", "exact"),
         (u"dtw_margin=100", "dtw_margin", TimeValue("100")),
         (u"ffmpeg_path=/foo/bar/ffmpeg", "ffmpeg_path", "/foo/bar/ffmpeg"),
         (u"ffmpeg_sample_rate=8000", "ffmpeg_sample_rate", 8000),
         (u"ffprobe_path=/foo/bar/ffprobe", "ffprobe_path",
          "/foo/bar/ffprobe"),
         (u"job_max_tasks=10", "job_max_tasks", 10),
         (u"mfcc_filters=100", "mfcc_filters", 100),
         (u"mfcc_size=20", "mfcc_size", 20),
         (u"mfcc_fft_order=256", "mfcc_fft_order", 256),
         (u"mfcc_lower_frequency=120.0", "mfcc_lower_frequency", 120.0),
         (u"mfcc_upper_frequency=5000.0", "mfcc_upper_frequency", 5000.0),
         (u"mfcc_emphasis_factor=1.0", "mfcc_emphasis_factor", 1.0),
         (u"mfcc_mask_nonspeech=True", "mfcc_mask_nonspeech", True),
         (u"mfcc_window_length=0.360", "mfcc_window_length",
          TimeValue("0.360")),
         (u"mfcc_window_shift=0.160", "mfcc_window_shift",
          TimeValue("0.160")),
         (u"dtw_margin_l1=100", "dtw_margin_l1", TimeValue("100")),
         (u"mfcc_mask_nonspeech_l1=True", "mfcc_mask_nonspeech_l1", True),
         (u"mfcc_window_length_l1=0.360", "mfcc_window_length_l1",
          TimeValue("0.360")),
         (u"mfcc_window_shift_l1=0.160", "mfcc_window_shift_l1",
          TimeValue("0.160")),
         (u"dtw_margin_l2=30", "dtw_margin_l2", TimeValue("30")),
         (u"mfcc_mask_nonspeech_l2=True", "mfcc_mask_nonspeech_l2", True),
         (u"mfcc_window_length_l2=0.360", "mfcc_window_length_l2",
          TimeValue("0.360")),
         (u"mfcc_window_shift_l2=0.160", "mfcc_window_shift_l2",
          TimeValue("0.160")),
         (u"dtw_margin_l3=10", "dtw_margin_l3", TimeValue("10")),
         (u"mfcc_mask_nonspeech_l3=True", "mfcc_mask_nonspeech_l3", True),
         (u"mfcc_window_length_l3=0.360", "mfcc_window_length_l3",
          TimeValue("0.360")),
         (u"mfcc_window_shift_l3=0.160", "mfcc_window_shift_l3",
          TimeValue("0.160")),
         (u"mfcc_mask_extend_speech_after=1",
          "mfcc_mask_extend_speech_after", 1),
         (u"mfcc_mask_extend_speech_before=1",
          "mfcc_mask_extend_speech_before", 1),
         (u"mfcc_mask_log_energy_threshold=0.750",
          "mfcc_mask_log_energy_threshold", 0.750),
         (u"mfcc_mask_min_nonspeech_length=5",
          "mfcc_mask_min_nonspeech_length", 5),
         (u"nuance_tts_api_id=foo", "nuance_tts_api_id", "foo"),
         (u"nuance_tts_api_key=bar", "nuance_tts_api_key", "bar"),
         (u"safety_checks=False", "safety_checks", False),
         (u"task_max_audio_length=1000", "task_max_audio_length",
          TimeValue("1000")),
         (u"task_max_text_length=1000", "task_max_text_length", 1000),
         (u"tmp_path=/foo/bar", "tmp_path", "/foo/bar"),
         (u"tts=festival", "tts", "festival"),
         (u"tts_path=/foo/bar/festival", "tts_path", "/foo/bar/festival"),
         (u"tts_api_sleep=5.000", "tts_api_sleep", TimeValue("5.000")),
         (u"tts_api_retry_attempts=3", "tts_api_retry_attempts", 3),
         (u"tts_voice_code=ru", "tts_voice_code", "ru"),
         (u"tts_cache=True", "tts_cache", True),
         (u"tts_l1=festival", "tts_l1", "festival"),
         (u"tts_path_l1=/foo/bar/festival", "tts_path_l1",
          "/foo/bar/festival"),
         (u"tts_l2=festival", "tts_l2", "festival"),
         (u"tts_path_l2=/foo/bar/festival", "tts_path_l2",
          "/foo/bar/festival"),
         (u"tts_l3=festival", "tts_l3", "festival"),
         (u"tts_path_l3=/foo/bar/festival", "tts_path_l3",
          "/foo/bar/festival"),
         (u"vad_extend_speech_after=1.000", "vad_extend_speech_after",
          TimeValue("1.000")),
         (u"vad_extend_speech_before=1.000", "vad_extend_speech_before",
          TimeValue("1.000")),
         (u"vad_log_energy_threshold=0.750", "vad_log_energy_threshold",
          0.750),
         (u"vad_min_nonspeech_length=0.500", "vad_min_nonspeech_length",
          TimeValue("0.500")),
     ]
     for string, key, value in params:
         rconf = RuntimeConfiguration(string)
         self.assertEqual(rconf[key], value)
 def test_safety_checks(self):
     rconf = RuntimeConfiguration()
     self.assertEqual(rconf.safety_checks, True)
Example #23
0
 def test_convert_rc(self):
     rc = RuntimeConfiguration(u"ffmpeg_sample_rate=44100")
     for f in self.FILES:
         self.convert(f["path"], runtime_configuration=rc)
 def test_sample_rate(self):
     rconf = RuntimeConfiguration()
     self.assertEqual(rconf.sample_rate, 16000)