コード例 #1
0
ファイル: run_sd.py プロジェクト: ptrwtts/aeneas
    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 4:
            return self.print_help()
        text_format = gf.safe_unicode(self.actual_arguments[0])
        if text_format == u"list":
            text = gf.safe_unicode(self.actual_arguments[1])
        elif text_format in TextFileFormat.ALLOWED_VALUES:
            text = self.actual_arguments[1]
            if not self.check_input_file(text):
                return self.ERROR_EXIT_CODE
        else:
            return self.print_help()

        l1_id_regex = self.has_option_with_value(u"--l1-id-regex")
        l2_id_regex = self.has_option_with_value(u"--l2-id-regex")
        l3_id_regex = self.has_option_with_value(u"--l3-id-regex")
        id_regex = self.has_option_with_value(u"--id-regex")
        class_regex = self.has_option_with_value(u"--class-regex")
        sort = self.has_option_with_value(u"--sort")
        parameters = {
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L1_ID_REGEX : l1_id_regex,
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L2_ID_REGEX : l2_id_regex,
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L3_ID_REGEX : l3_id_regex,
            gc.PPN_JOB_IS_TEXT_UNPARSED_ID_REGEX : id_regex,
            gc.PPN_JOB_IS_TEXT_UNPARSED_CLASS_REGEX : class_regex,
            gc.PPN_JOB_IS_TEXT_UNPARSED_ID_SORT : sort
        }
        if (text_format == TextFileFormat.MUNPARSED) and ((l1_id_regex is None) or (l2_id_regex is None) or (l3_id_regex is None)):
            self.print_error(u"You must specify --l1-id-regex and --l2-id-regex and --l3-id-regex for munparsed format")
            return self.ERROR_EXIT_CODE
        if (text_format == TextFileFormat.UNPARSED) and (id_regex is None) and (class_regex is None):
            self.print_error(u"You must specify --id-regex and/or --class-regex for unparsed format")
            return self.ERROR_EXIT_CODE

        language = gf.safe_unicode(self.actual_arguments[2])

        audio_file_path = self.actual_arguments[3]
        if not self.check_input_file(audio_file_path):
            return self.ERROR_EXIT_CODE

        text_file = self.get_text_file(text_format, text, parameters)
        if text_file is None:
            self.print_error(u"Unable to build a TextFile from the given parameters")
            return self.ERROR_EXIT_CODE
        elif len(text_file) == 0:
            self.print_error(u"No text fragments found")
            return self.ERROR_EXIT_CODE
        text_file.set_language(language)
        self.print_info(u"Read input text with %d fragments" % (len(text_file)))

        self.print_info(u"Reading audio...")
        try:
            audio_file_mfcc = AudioFileMFCC(audio_file_path, rconf=self.rconf, logger=self.logger)
        except AudioFileConverterError:
            self.print_error(u"Unable to call the ffmpeg executable '%s'" % (self.rconf[RuntimeConfiguration.FFMPEG_PATH]))
            self.print_error(u"Make sure the path to ffmpeg is correct")
            return self.ERROR_EXIT_CODE
        except (AudioFileUnsupportedFormatError, AudioFileNotInitializedError):
            self.print_error(u"Cannot read file '%s'" % (audio_file_path))
            self.print_error(u"Check that its format is supported by ffmpeg")
            return self.ERROR_EXIT_CODE
        except Exception as exc:
            self.print_error(u"An unexpected error occurred while reading the audio file:")
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE
        self.print_info(u"Reading audio... done")

        self.print_info(u"Running VAD...")
        audio_file_mfcc.run_vad()
        self.print_info(u"Running VAD... done")

        min_head = gf.safe_float(self.has_option_with_value(u"--min-head"), None)
        max_head = gf.safe_float(self.has_option_with_value(u"--max-head"), None)
        min_tail = gf.safe_float(self.has_option_with_value(u"--min-tail"), None)
        max_tail = gf.safe_float(self.has_option_with_value(u"--max-tail"), None)

        self.print_info(u"Detecting audio interval...")
        start_detector = SD(audio_file_mfcc, text_file, rconf=self.rconf, logger=self.logger)
        start, end = start_detector.detect_interval(min_head, max_head, min_tail, max_tail)
        self.print_info(u"Detecting audio interval... done")

        self.print_result(audio_file_mfcc.audio_length, start, end)
        return self.NO_ERROR_EXIT_CODE
コード例 #2
0
def main():
    """ Entry point """
    if len(sys.argv) < 5:
        usage()
        return
    language = sys.argv[1]
    text_file_path = sys.argv[2]
    text_format = sys.argv[3]
    audio_file_path = sys.argv[-1]
    verbose = False
    parameters = {}

    for i in range(4, len(sys.argv) - 1):
        args = sys.argv[i].split("=")
        if len(args) == 1:
            verbose = (args[0] in ["v", "-v", "verbose", "--verbose"])
        if len(args) == 2:
            key, value = args
            if key == "id_regex":
                parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_ID_REGEX] = value
            if key == "class_regex":
                parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_CLASS_REGEX] = value
            if key == "sort":
                parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_ID_SORT] = value
            if key == "min_head_length":
                parameters["min_head_length"] = float(value)
            if key == "max_head_length":
                parameters["max_head_length"] = float(value)
            if key == "min_tail_length":
                parameters["min_head_length"] = float(value)
            if key == "max_tail_length":
                parameters["max_tail_length"] = float(value)

    if not gf.can_run_c_extension():
        print "[WARN] Unable to load Python C Extensions"
        print "[WARN] Running the slower pure Python code"
        print "[WARN] See the README file for directions to compile the Python C Extensions"

    logger = Logger(tee=verbose)

    print "[INFO] Reading audio..."
    tmp_handler, tmp_file_path = tempfile.mkstemp(suffix=".wav",
                                                  dir=gf.custom_tmp_dir())
    converter = FFMPEGWrapper(logger=logger)
    converter.convert(audio_file_path, tmp_file_path)
    audio_file = AudioFile(tmp_file_path)
    print "[INFO] Reading audio... done"

    print "[INFO] Reading text..."
    if text_format == "list":
        text_file = TextFile()
        text_file.read_from_list(text_file_path.split("|"))
    else:
        text_file = TextFile(text_file_path, text_format, parameters)
    text_file.set_language(language)
    print "[INFO] Reading text... done"

    print "[INFO] Detecting audio interval..."
    sd = SD(audio_file, text_file, logger=logger)
    min_head_length = gc.SD_MIN_HEAD_LENGTH
    if "min_head_length" in parameters:
        min_head_length = parameters["min_head_length"]
    max_head_length = gc.SD_MAX_HEAD_LENGTH
    if "max_head_length" in parameters:
        max_head_length = parameters["max_head_length"]
    min_tail_length = gc.SD_MIN_TAIL_LENGTH
    if "min_tail_length" in parameters:
        min_tail_length = parameters["min_tail_length"]
    max_tail_length = gc.SD_MAX_TAIL_LENGTH
    if "max_tail_length" in parameters:
        max_tail_length = parameters["max_tail_length"]
    start, end = sd.detect_interval(min_head_length, max_head_length,
                                    min_tail_length, max_tail_length)
    zero = 0
    audio_len = audio_file.audio_length
    head_len = start
    text_len = end - start
    tail_len = audio_len - end
    print "[INFO] Detecting audio interval... done"
    print "[INFO] "
    print "[INFO] Head: %.3f %.3f (%.3f)" % (zero, start, head_len)
    print "[INFO] Text: %.3f %.3f (%.3f)" % (start, end, text_len)
    print "[INFO] Tail: %.3f %.3f (%.3f)" % (end, audio_len, tail_len)
    print "[INFO] "
    zero_h = gf.time_to_hhmmssmmm(0)
    start_h = gf.time_to_hhmmssmmm(start)
    end_h = gf.time_to_hhmmssmmm(end)
    audio_len_h = gf.time_to_hhmmssmmm(audio_len)
    head_len_h = gf.time_to_hhmmssmmm(head_len)
    text_len_h = gf.time_to_hhmmssmmm(text_len)
    tail_len_h = gf.time_to_hhmmssmmm(tail_len)
    print "[INFO] Head: %s %s (%s)" % (zero_h, start_h, head_len_h)
    print "[INFO] Text: %s %s (%s)" % (start_h, end_h, text_len_h)
    print "[INFO] Tail: %s %s (%s)" % (end_h, audio_len_h, tail_len_h)

    #print "[INFO]   Cleaning up..."
    cleanup(tmp_handler, tmp_file_path)
コード例 #3
0
ファイル: run_sd.py プロジェクト: fduch2k/aeneas
def main():
    """ Entry point """
    if len(sys.argv) < 5:
        usage()
        return
    language = sys.argv[1]
    text_file_path = sys.argv[2]
    text_format = sys.argv[3]
    audio_file_path = sys.argv[-1]
    verbose = False
    parameters = {}

    for i in range(4, len(sys.argv)-1):
        args = sys.argv[i].split("=")
        if len(args) == 1:
            verbose = (args[0] in ["v", "-v", "verbose", "--verbose"])
        if len(args) == 2:
            key, value = args
            if key == "id_regex":
                parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_ID_REGEX] = value
            if key == "class_regex":
                parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_CLASS_REGEX] = value
            if key == "sort":
                parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_ID_SORT] = value
            if key == "min_head_length":
                parameters["min_head_length"] = float(value)
            if key == "max_head_length":
                parameters["max_head_length"] = float(value)
            if key == "min_tail_length":
                parameters["min_head_length"] = float(value)
            if key == "max_tail_length":
                parameters["max_tail_length"] = float(value)

    if not gf.can_run_c_extension():
        print "[WARN] Unable to load Python C Extensions"
        print "[WARN] Running the slower pure Python code"
        print "[WARN] See the README file for directions to compile the Python C Extensions"

    logger = Logger(tee=verbose)

    print "[INFO] Reading audio..."
    tmp_handler, tmp_file_path = tempfile.mkstemp(
        suffix=".wav",
        dir=gf.custom_tmp_dir()
    )
    converter = FFMPEGWrapper(logger=logger)
    converter.convert(audio_file_path, tmp_file_path)
    audio_file = AudioFile(tmp_file_path)
    print "[INFO] Reading audio... done"

    print "[INFO] Reading text..."
    if text_format == "list":
        text_file = TextFile()
        text_file.read_from_list(text_file_path.split("|"))
    else:
        text_file = TextFile(text_file_path, text_format, parameters)
    text_file.set_language(language)
    print "[INFO] Reading text... done"

    print "[INFO] Detecting audio interval..."
    sd = SD(audio_file, text_file, logger=logger)
    min_head_length = gc.SD_MIN_HEAD_LENGTH
    if "min_head_length" in parameters:
        min_head_length = parameters["min_head_length"]
    max_head_length = gc.SD_MAX_HEAD_LENGTH
    if "max_head_length" in parameters:
        max_head_length = parameters["max_head_length"]
    min_tail_length = gc.SD_MIN_TAIL_LENGTH
    if "min_tail_length" in parameters:
        min_tail_length = parameters["min_tail_length"]
    max_tail_length = gc.SD_MAX_TAIL_LENGTH
    if "max_tail_length" in parameters:
        max_tail_length = parameters["max_tail_length"]
    start, end = sd.detect_interval(
        min_head_length,
        max_head_length,
        min_tail_length,
        max_tail_length
    )
    zero = 0
    audio_len = audio_file.audio_length
    head_len = start
    text_len = end - start
    tail_len = audio_len - end
    print "[INFO] Detecting audio interval... done"
    print "[INFO] "
    print "[INFO] Head: %.3f %.3f (%.3f)" % (zero, start, head_len)
    print "[INFO] Text: %.3f %.3f (%.3f)" % (start, end, text_len)
    print "[INFO] Tail: %.3f %.3f (%.3f)" % (end, audio_len, tail_len)
    print "[INFO] "
    zero_h = gf.time_to_hhmmssmmm(0)
    start_h = gf.time_to_hhmmssmmm(start)
    end_h = gf.time_to_hhmmssmmm(end)
    audio_len_h = gf.time_to_hhmmssmmm(audio_len)
    head_len_h = gf.time_to_hhmmssmmm(head_len)
    text_len_h = gf.time_to_hhmmssmmm(text_len)
    tail_len_h = gf.time_to_hhmmssmmm(tail_len)
    print "[INFO] Head: %s %s (%s)" % (zero_h, start_h, head_len_h)
    print "[INFO] Text: %s %s (%s)" % (start_h, end_h, text_len_h)
    print "[INFO] Tail: %s %s (%s)" % (end_h, audio_len_h, tail_len_h)

    #print "[INFO]   Cleaning up..."
    cleanup(tmp_handler, tmp_file_path)
コード例 #4
0
    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 4:
            return self.print_help()
        text_format = gf.safe_unicode(self.actual_arguments[0])
        if text_format == u"list":
            text = gf.safe_unicode(self.actual_arguments[1])
        elif text_format in TextFileFormat.ALLOWED_VALUES:
            text = self.actual_arguments[1]
            if not self.check_input_file(text):
                return self.ERROR_EXIT_CODE
        else:
            return self.print_help()

        l1_id_regex = self.has_option_with_value(u"--l1-id-regex")
        l2_id_regex = self.has_option_with_value(u"--l2-id-regex")
        l3_id_regex = self.has_option_with_value(u"--l3-id-regex")
        id_regex = self.has_option_with_value(u"--id-regex")
        class_regex = self.has_option_with_value(u"--class-regex")
        sort = self.has_option_with_value(u"--sort")
        parameters = {
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L1_ID_REGEX: l1_id_regex,
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L2_ID_REGEX: l2_id_regex,
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L3_ID_REGEX: l3_id_regex,
            gc.PPN_TASK_IS_TEXT_UNPARSED_CLASS_REGEX: class_regex,
            gc.PPN_TASK_IS_TEXT_UNPARSED_ID_REGEX: id_regex,
            gc.PPN_TASK_IS_TEXT_UNPARSED_ID_SORT: sort,
        }
        if (text_format == TextFileFormat.MUNPARSED) and (
            (l1_id_regex is None) or (l2_id_regex is None) or
            (l3_id_regex is None)):
            self.print_error(
                u"You must specify --l1-id-regex and --l2-id-regex and --l3-id-regex for munparsed format"
            )
            return self.ERROR_EXIT_CODE
        if (text_format == TextFileFormat.UNPARSED) and (
                id_regex is None) and (class_regex is None):
            self.print_error(
                u"You must specify --id-regex and/or --class-regex for unparsed format"
            )
            return self.ERROR_EXIT_CODE

        language = gf.safe_unicode(self.actual_arguments[2])

        audio_file_path = self.actual_arguments[3]
        if not self.check_input_file(audio_file_path):
            return self.ERROR_EXIT_CODE

        text_file = self.get_text_file(text_format, text, parameters)
        if text_file is None:
            self.print_error(
                u"Unable to build a TextFile from the given parameters")
            return self.ERROR_EXIT_CODE
        elif len(text_file) == 0:
            self.print_error(u"No text fragments found")
            return self.ERROR_EXIT_CODE
        text_file.set_language(language)
        self.print_info(u"Read input text with %d fragments" %
                        (len(text_file)))

        self.print_info(u"Reading audio...")
        try:
            audio_file_mfcc = AudioFileMFCC(audio_file_path,
                                            rconf=self.rconf,
                                            logger=self.logger)
        except AudioFileConverterError:
            self.print_error(u"Unable to call the ffmpeg executable '%s'" %
                             (self.rconf[RuntimeConfiguration.FFMPEG_PATH]))
            self.print_error(u"Make sure the path to ffmpeg is correct")
            return self.ERROR_EXIT_CODE
        except (AudioFileUnsupportedFormatError, AudioFileNotInitializedError):
            self.print_error(u"Cannot read file '%s'" % (audio_file_path))
            self.print_error(u"Check that its format is supported by ffmpeg")
            return self.ERROR_EXIT_CODE
        except Exception as exc:
            self.print_error(
                u"An unexpected error occurred while reading the audio file:")
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE
        self.print_info(u"Reading audio... done")

        self.print_info(u"Running VAD...")
        audio_file_mfcc.run_vad()
        self.print_info(u"Running VAD... done")

        min_head = gf.safe_float(self.has_option_with_value(u"--min-head"),
                                 None)
        max_head = gf.safe_float(self.has_option_with_value(u"--max-head"),
                                 None)
        min_tail = gf.safe_float(self.has_option_with_value(u"--min-tail"),
                                 None)
        max_tail = gf.safe_float(self.has_option_with_value(u"--max-tail"),
                                 None)

        self.print_info(u"Detecting audio interval...")
        start_detector = SD(audio_file_mfcc,
                            text_file,
                            rconf=self.rconf,
                            logger=self.logger)
        start, end = start_detector.detect_interval(min_head, max_head,
                                                    min_tail, max_tail)
        self.print_info(u"Detecting audio interval... done")

        self.print_result(audio_file_mfcc.audio_length, start, end)
        return self.NO_ERROR_EXIT_CODE