def rip(self, post_process: Callable[[str], str]): subs = SubRipFile( path=str(self.pgs.media_path.translate(extension='srt'))) items = self.pgs.items confidence = self.confidence max_width = self.max_tess_width previous_size = len(items) while previous_size > 0: items = self.process(subs, items, post_process, confidence, max_width) if not items: break current_size = len(items) if current_size < 20: max_width = min( sum([item.width + self.gap[1] for item in items]), self.max_tess_width) confidence = 0 remaining_items = self.process(subs, items, post_process, confidence, max_width) if remaining_items: logger.warning( f'Subtitles were not ripped: {remaining_items!r}') break elif current_size > previous_size * 0.8: max_width = min( sum([item.width + self.gap[1] for item in items]), self.max_tess_width) // 2 confidence = max(0, confidence - 5) previous_size = current_size subs.clean_indexes() return subs
def merge_subtitle(sub_a, sub_b, delta, encoding='utf-8'): """ 合并两种不同言语的srt字幕 因为两个字幕文件的时间轴不一样,所以合并后的字幕会在某一字幕文件转换时生成新的一条字幕, 导致双语字幕并不是同时变化,不过这也是没有办法的事,无法避免 参考https://github.com/byroot/pysrt/issues/17 https://github.com/byroot/pysrt/issues/15 :param sub_a: 使用sub_a = SubRipFile.open(sub_a_path, encoding=encoding) :param sub_b: :param delta: :return: """ out = SubRipFile() intervals = [item.start.ordinal for item in sub_a] intervals.extend([item.end.ordinal for item in sub_a]) intervals.extend([item.start.ordinal for item in sub_b]) intervals.extend([item.end.ordinal for item in sub_b]) intervals.sort() j = k = 0 for i in xrange(1, len(intervals)): start = SubRipTime.from_ordinal(intervals[i - 1]) end = SubRipTime.from_ordinal(intervals[i]) if (end - start) > delta: text_a, j = find_subtitle(sub_a, start, end, j) text_b, k = find_subtitle(sub_b, start, end, k) text = join_lines(text_a, text_b) if len(text) > 0: item = SubRipItem(0, start, end, text) out.append(item) out.clean_indexes() return out
def merge_subtitle(sub_a, sub_b, delta): out = SubRipFile() intervals = [item.start.ordinal for item in sub_a] intervals.extend([item.end.ordinal for item in sub_a]) intervals.extend([item.start.ordinal for item in sub_b]) intervals.extend([item.end.ordinal for item in sub_b]) intervals.sort() j = k = 0 for i in xrange(1, len(intervals)): start = SubRipTime.from_ordinal(intervals[i-1]) end = SubRipTime.from_ordinal(intervals[i]) if (end-start) > delta: text_a, j = find_subtitle(sub_a, start, end, j) text_b, k = find_subtitle(sub_b, start, end, k) text = join_lines(text_a, text_b) if len(text) > 0: item = SubRipItem(0, start, end, text) out.append(item) out.clean_indexes() return out
def merge_subtitle(sub_a, sub_b, delta): out = SubRipFile() intervals = [item.start.ordinal for item in sub_a] intervals.extend([item.end.ordinal for item in sub_a]) intervals.extend([item.start.ordinal for item in sub_b]) intervals.extend([item.end.ordinal for item in sub_b]) intervals.sort() j = k = 0 for i in range(1, len(intervals)): start = SubRipTime.from_ordinal(intervals[i - 1]) end = SubRipTime.from_ordinal(intervals[i]) if (end - start) > delta: text_a, j = find_subtitle(sub_a, start, end, j) text_b, k = find_subtitle(sub_b, start, end, k) text = join_lines(text_a, text_b) if len(text) > 0: item = SubRipItem(0, start, end, text) out.append(item) out.clean_indexes() return out
chat_channel, chat_server[0], chat_server[1], twitchclient_version=twitchclient_version) outsrt = SubRipFile() text = '' while 1: raw_msg_list = bot.get_message() if len(raw_msg_list) > 0: if len(text) > 0: end = SubRipTime.from_time(datetime.now()) item = SubRipItem(0, start, end, text) outsrt.append(item) start = SubRipTime.from_time(datetime.now()) text = '' timestamp = get_timestamp(timestamp_format) for item in raw_msg_list: if record_raw: log_add(raw_log_path, timestamp + ' ' + item + '\n') username, message = irc_bot.parse_user(item) if username != '': safe_print(chat_channel + " " + username + ": " + message) log_add(log_path, timestamp + ' ' + username + ': ' + message + '\n') text += username + ": " + message + '\n' outsrt.clean_indexes() outsrt.save(srt_log_path, encoding='utf-8')
srt_log_path = current_directory + '/comment_log/' + chat_channel + '.srt' bot = irc_bot.irc_bot(username, oauth, chat_channel, chat_server[0], chat_server[1], twitchclient_version = twitchclient_version) outsrt = SubRipFile() text = '' while 1: raw_msg_list = bot.get_message() if len(raw_msg_list) > 0: if len(text) > 0: end = SubRipTime.from_time(datetime.now()) item = SubRipItem(0, start, end, text) outsrt.append(item) start = SubRipTime.from_time(datetime.now()) text = '' timestamp = get_timestamp(timestamp_format) for item in raw_msg_list: if record_raw: log_add(raw_log_path, timestamp + ' ' + item + '\n') username, message = irc_bot.parse_user(item) if username != '': safe_print(chat_channel + " " + username + ": " + message) log_add(log_path, timestamp + ' ' + username + ': ' + message + '\n') text += username + ": " + message + '\n' outsrt.clean_indexes() outsrt.save(srt_log_path, encoding='utf-8')
def syncSrts(subs_L1, subs_L2): """Sync subs_L1 by subs_L2 timings and return a SubRipFile. """ out = SubRipFile() subs_L2_out = SubRipFile() j = 0 last_j = -1 dupes = 0 L2_ind = -1 for L2_sub in subs_L2: L2_ind = L2_ind + 1 start = L2_sub.start end = L2_sub.end j = matchSubtitle(subs_L1, start, end, max(last_j, 0)) L1_sub = subs_L1[j] if (j > -1) else None if L1_sub is None: text = L2_sub.text print("---- Missing: {}: {}".format( L2_sub.index, L2_sub.text.replace("\n", "[[NL]]"))) else: text = L1_sub.text if j - 1 > last_j and last_j > -1: # we skipped a sub in L1_subs if isSubMatch(subs_L1[j - 1], subs_L2[L2_ind - 1].start, subs_L2[L2_ind - 1].end): out[len(out) - 1].text = out[len(out) - 1].text + "\n" + subs_L1[j - 1].text elif isSubMatch(subs_L1[j - 1], start, end): text = subs_L1[j - 1].text + "\n" + text else: # A sub line in L1 does not match any in L2 # We add it to synced L1, and add an empty one to subs L2 item = SubRipItem(0, subs_L1[j - 1].start, subs_L1[j - 1].end, subs_L1[j - 1].text) out.append(item) item2 = SubRipItem(0, subs_L1[j - 1].start, subs_L1[j - 1].end, " ") subs_L2_out.append(item2) if j == last_j: dupes = dupes + 1 #print("---- OOPS. {}: {} - {}".format(L2_sub.index, L2_sub.text.replace("\n",""), L1_sub.text.replace("\n",""))) last_j = j item = SubRipItem(0, start, end, text) out.append(item) item2 = SubRipItem(0, start, end, L2_sub.text) subs_L2_out.append(item2) out.clean_indexes() subs_L2_out.clean_indexes() fixed = 0 for i in range(1, len(out)): sub1 = out[i - 1].text sub2 = out[i].text if ((sub1 == sub2) and (subs_L2_out[i - 1].text != subs_L2_out[i].text)): if (trySplitLine(out, i, sub1)): fixed = fixed + 1 i = i + 1 else: print("---- Oy. {}: {} not fixed".format( i, sub1.replace("\n", "[[NL]]"))) return out, dupes, fixed, subs_L2_out
srt = SubRipFile(eol='\n', encoding='utf-8') i = 1 for line in sublog: line = line.split(",", 1) if (line[0] and line[0][0] == '-'): if (START_TIME == None and line[0][:8] == '- start '): START_TIME = datetime.strptime(line[0], '- start ' + TIMEFORMAT + '\n') continue no = datetime.strptime(line[0], TIMEFORMAT) - START_TIME if (abs(no) > timedelta(1)): print("\nCan't go over a day in a subtitle! Delete non-used lines in" + \ " log.\nLet there only be one '- start' line at the top of" + \ " the log-file.") sys.exit(1) time = SubRipTime.from_ordinal(no.seconds*1000 + no.microseconds*0.001) item = SubRipItem(i, start=time, end=time + 30*1000, text=unicode(line[1], 'utf-8')) srt.append(item) i += 1 srt.clean_indexes() #srt.save(path=sys.stdout) for line in srt: sys.stdout.write(unicode(line).encode('utf-8'))