def _output_smil(self, output_file, parameters=None): """ Output to SMIL """ text_ref = parameters[gc.PPN_TASK_OS_FILE_SMIL_PAGE_REF] audio_ref = parameters[gc.PPN_TASK_OS_FILE_SMIL_AUDIO_REF] output_file.write("<smil xmlns=\"http://www.w3.org/ns/SMIL\" xmlns:epub=\"http://www.idpf.org/2007/ops\" version=\"3.0\">\n") output_file.write(" <body>\n") output_file.write(" <seq id=\"s%s\" epub:textref=\"%s\">\n" % (str(1).zfill(6), text_ref)) i = 1 for fragment in self.fragments: text = fragment.text_fragment output_file.write(" <par id=\"p%s\">\n" % (str(i).zfill(6))) output_file.write(" <text src=\"%s#%s\"/>\n" % ( text_ref, text.identifier )) output_file.write(" <audio clipBegin=\"%s\" clipEnd=\"%s\" src=\"%s\"/>\n" % ( gf.time_to_hhmmssmmm(fragment.begin), gf.time_to_hhmmssmmm(fragment.end), audio_ref )) output_file.write(" </par>\n") i += 1 output_file.write(" </seq>\n") output_file.write(" </body>\n") output_file.write("</smil>\n")
def _output_vtt(self, output_file): """ Output to WebVTT """ output_file.write("WEBVTT\n\n") i = 1 for fragment in self.fragments: text = fragment.text_fragment output_file.write("%d\n" % i) output_file.write("%s --> %s\n" % ( gf.time_to_hhmmssmmm(fragment.begin), gf.time_to_hhmmssmmm(fragment.end) )) output_file.write("%s\n" % text.text) output_file.write("\n") i += 1
def createSyncedLyricsFile(lyrics, file): global lyricsSynced, errors f = open("tempSync.txt", "w+") f.write(lyrics) f.close() config = TaskConfiguration() config[gc.PPN_TASK_LANGUAGE] = Language.FRA config[gc.PPN_TASK_IS_TEXT_FILE_FORMAT] = TextFileFormat.PLAIN config[gc.PPN_TASK_OS_FILE_FORMAT] = SyncMapFormat.AUDH task = Task() task.configuration = config try: task.audio_file_path_absolute = file task.text_file_path_absolute = "tempSync.txt" ExecuteTask(task).execute() syncedLyricsFile = open(file[:-4] + ".lrc", "w+") for fragment in task.sync_map_leaves(): syncedLyricsFile.write( str('[' + gf.time_to_hhmmssmmm(fragment.interval.begin, '.')[3:-1] + ']' + fragment.text + '\n')) syncedLyricsFile.close() print(" Sync Added", sep=' ', end='', flush=True) lyricsSynced += 1 except Exception as e : errors += 1 print(" Sync error", sep=' ', end='',flush=True)
def _write_vtt(self, output_file): """ Write to WebVTT file """ output_file.write("WEBVTT\n\n") i = 1 for fragment in self.fragments: text = fragment.text_fragment output_file.write("%d\n" % i) output_file.write("%s --> %s\n" % ( gf.time_to_hhmmssmmm(fragment.begin), gf.time_to_hhmmssmmm(fragment.end) )) for line in text.lines: output_file.write("%s\n" % line) output_file.write("\n") i += 1
def print_result(self, audio_len, start, end): """ Print result of SD. :param audio_len: the length of the entire audio file, in seconds :type audio_len: float :param start: the start position of the spoken text :type start: float :param end: the end position of the spoken text :type end: float """ msg = [] zero = 0 head_len = start text_len = end - start tail_len = audio_len - end msg.append(u"") msg.append(u"Head: %.3f %.3f (%.3f)" % (zero, start, head_len)) msg.append(u"Text: %.3f %.3f (%.3f)" % (start, end, text_len)) msg.append(u"Tail: %.3f %.3f (%.3f)" % (end, audio_len, tail_len)) msg.append(u"") zero_h = gf.time_to_hhmmssmmm(0) start_h = gf.time_to_hhmmssmmm(start) end_h = gf.time_to_hhmmssmmm(end) audio_len_h = gf.time_to_hhmmssmmm(audio_len) head_len_h = gf.time_to_hhmmssmmm(head_len) text_len_h = gf.time_to_hhmmssmmm(text_len) tail_len_h = gf.time_to_hhmmssmmm(tail_len) msg.append("Head: %s %s (%s)" % (zero_h, start_h, head_len_h)) msg.append("Text: %s %s (%s)" % (start_h, end_h, text_len_h)) msg.append("Tail: %s %s (%s)" % (end_h, audio_len_h, tail_len_h)) msg.append(u"") self.print_info(u"\n".join(msg))
def test_time_to_hhmmssmmm(self): tests = [ [None, "00:00:00.000"], [0.000, "00:00:00.000"], [12.000, "00:00:12.000"], [12.345, "00:00:12.345"], [60, "00:01:00.000"], [83.000, "00:01:23.000"], [83.456, "00:01:23.456"], [3600.000, "01:00:00.000"], [3612.000, "01:00:12.000"], [3612.340, "01:00:12.340"], # numerical issues [4980.000, "01:23:00.000"], [5025.000, "01:23:45.000"], [5025.670, "01:23:45.670"], # numerical issues ] for test in tests: self.assertEqual(gf.time_to_hhmmssmmm(test[0]), test[1])
def test_time_to_hhmmssmmm(self): tests = [ (None, "00:00:00.000"), (0.000, "00:00:00.000"), (12.000, "00:00:12.000"), (12.345, "00:00:12.345"), (60, "00:01:00.000"), (83.000, "00:01:23.000"), (83.456, "00:01:23.456"), (3600.000, "01:00:00.000"), (3612.000, "01:00:12.000"), (3612.340, "01:00:12.340"), # numerical issues (4980.000, "01:23:00.000"), (5025.000, "01:23:45.000"), (5025.670, "01:23:45.670"), # numerical issues ] for test in tests: self.assertEqual(gf.time_to_hhmmssmmm(test[0]), test[1])
def main(): """ Entry point """ if len(sys.argv) < 5: usage() return language = sys.argv[1] text_file_path = sys.argv[2] text_format = sys.argv[3] audio_file_path = sys.argv[-1] verbose = False parameters = {} for i in range(4, len(sys.argv)-1): args = sys.argv[i].split("=") if len(args) == 1: verbose = (args[0] in ["v", "-v", "verbose", "--verbose"]) if len(args) == 2: key, value = args if key == "id_regex": parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_ID_REGEX] = value if key == "class_regex": parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_CLASS_REGEX] = value if key == "sort": parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_ID_SORT] = value if key == "min_head_length": parameters["min_head_length"] = float(value) if key == "max_head_length": parameters["max_head_length"] = float(value) if key == "min_tail_length": parameters["min_head_length"] = float(value) if key == "max_tail_length": parameters["max_tail_length"] = float(value) if not gf.can_run_c_extension(): print "[WARN] Unable to load Python C Extensions" print "[WARN] Running the slower pure Python code" print "[WARN] See the README file for directions to compile the Python C Extensions" logger = Logger(tee=verbose) print "[INFO] Reading audio..." tmp_handler, tmp_file_path = tempfile.mkstemp( suffix=".wav", dir=gf.custom_tmp_dir() ) converter = FFMPEGWrapper(logger=logger) converter.convert(audio_file_path, tmp_file_path) audio_file = AudioFile(tmp_file_path) print "[INFO] Reading audio... done" print "[INFO] Reading text..." if text_format == "list": text_file = TextFile() text_file.read_from_list(text_file_path.split("|")) else: text_file = TextFile(text_file_path, text_format, parameters) text_file.set_language(language) print "[INFO] Reading text... done" print "[INFO] Detecting audio interval..." sd = SD(audio_file, text_file, logger=logger) min_head_length = gc.SD_MIN_HEAD_LENGTH if "min_head_length" in parameters: min_head_length = parameters["min_head_length"] max_head_length = gc.SD_MAX_HEAD_LENGTH if "max_head_length" in parameters: max_head_length = parameters["max_head_length"] min_tail_length = gc.SD_MIN_TAIL_LENGTH if "min_tail_length" in parameters: min_tail_length = parameters["min_tail_length"] max_tail_length = gc.SD_MAX_TAIL_LENGTH if "max_tail_length" in parameters: max_tail_length = parameters["max_tail_length"] start, end = sd.detect_interval( min_head_length, max_head_length, min_tail_length, max_tail_length ) zero = 0 audio_len = audio_file.audio_length head_len = start text_len = end - start tail_len = audio_len - end print "[INFO] Detecting audio interval... done" print "[INFO] " print "[INFO] Head: %.3f %.3f (%.3f)" % (zero, start, head_len) print "[INFO] Text: %.3f %.3f (%.3f)" % (start, end, text_len) print "[INFO] Tail: %.3f %.3f (%.3f)" % (end, audio_len, tail_len) print "[INFO] " zero_h = gf.time_to_hhmmssmmm(0) start_h = gf.time_to_hhmmssmmm(start) end_h = gf.time_to_hhmmssmmm(end) audio_len_h = gf.time_to_hhmmssmmm(audio_len) head_len_h = gf.time_to_hhmmssmmm(head_len) text_len_h = gf.time_to_hhmmssmmm(text_len) tail_len_h = gf.time_to_hhmmssmmm(tail_len) print "[INFO] Head: %s %s (%s)" % (zero_h, start_h, head_len_h) print "[INFO] Text: %s %s (%s)" % (start_h, end_h, text_len_h) print "[INFO] Tail: %s %s (%s)" % (end_h, audio_len_h, tail_len_h) #print "[INFO] Cleaning up..." cleanup(tmp_handler, tmp_file_path)
def main(): """ Entry point """ if len(sys.argv) < 5: usage() return language = sys.argv[1] text_file_path = sys.argv[2] text_format = sys.argv[3] audio_file_path = sys.argv[-1] verbose = False parameters = {} for i in range(4, len(sys.argv) - 1): args = sys.argv[i].split("=") if len(args) == 1: verbose = (args[0] in ["v", "-v", "verbose", "--verbose"]) if len(args) == 2: key, value = args if key == "id_regex": parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_ID_REGEX] = value if key == "class_regex": parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_CLASS_REGEX] = value if key == "sort": parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_ID_SORT] = value if key == "min_head_length": parameters["min_head_length"] = float(value) if key == "max_head_length": parameters["max_head_length"] = float(value) if key == "min_tail_length": parameters["min_head_length"] = float(value) if key == "max_tail_length": parameters["max_tail_length"] = float(value) if not gf.can_run_c_extension(): print "[WARN] Unable to load Python C Extensions" print "[WARN] Running the slower pure Python code" print "[WARN] See the README file for directions to compile the Python C Extensions" logger = Logger(tee=verbose) print "[INFO] Reading audio..." tmp_handler, tmp_file_path = tempfile.mkstemp(suffix=".wav", dir=gf.custom_tmp_dir()) converter = FFMPEGWrapper(logger=logger) converter.convert(audio_file_path, tmp_file_path) audio_file = AudioFile(tmp_file_path) print "[INFO] Reading audio... done" print "[INFO] Reading text..." if text_format == "list": text_file = TextFile() text_file.read_from_list(text_file_path.split("|")) else: text_file = TextFile(text_file_path, text_format, parameters) text_file.set_language(language) print "[INFO] Reading text... done" print "[INFO] Detecting audio interval..." sd = SD(audio_file, text_file, logger=logger) min_head_length = gc.SD_MIN_HEAD_LENGTH if "min_head_length" in parameters: min_head_length = parameters["min_head_length"] max_head_length = gc.SD_MAX_HEAD_LENGTH if "max_head_length" in parameters: max_head_length = parameters["max_head_length"] min_tail_length = gc.SD_MIN_TAIL_LENGTH if "min_tail_length" in parameters: min_tail_length = parameters["min_tail_length"] max_tail_length = gc.SD_MAX_TAIL_LENGTH if "max_tail_length" in parameters: max_tail_length = parameters["max_tail_length"] start, end = sd.detect_interval(min_head_length, max_head_length, min_tail_length, max_tail_length) zero = 0 audio_len = audio_file.audio_length head_len = start text_len = end - start tail_len = audio_len - end print "[INFO] Detecting audio interval... done" print "[INFO] " print "[INFO] Head: %.3f %.3f (%.3f)" % (zero, start, head_len) print "[INFO] Text: %.3f %.3f (%.3f)" % (start, end, text_len) print "[INFO] Tail: %.3f %.3f (%.3f)" % (end, audio_len, tail_len) print "[INFO] " zero_h = gf.time_to_hhmmssmmm(0) start_h = gf.time_to_hhmmssmmm(start) end_h = gf.time_to_hhmmssmmm(end) audio_len_h = gf.time_to_hhmmssmmm(audio_len) head_len_h = gf.time_to_hhmmssmmm(head_len) text_len_h = gf.time_to_hhmmssmmm(text_len) tail_len_h = gf.time_to_hhmmssmmm(tail_len) print "[INFO] Head: %s %s (%s)" % (zero_h, start_h, head_len_h) print "[INFO] Text: %s %s (%s)" % (start_h, end_h, text_len_h) print "[INFO] Tail: %s %s (%s)" % (end_h, audio_len_h, tail_len_h) #print "[INFO] Cleaning up..." cleanup(tmp_handler, tmp_file_path)