def list_to_ass_str( # pylint: disable=too-many-arguments text_list, styles_list, subtitles_file_format=constants.DEFAULT_SUBTITLES_FORMAT): """ Give an input timed text list, format it to an ass string. """ if subtitles_file_format == 'ass' \ or subtitles_file_format == 'ssa'\ or subtitles_file_format == 'ass.json': pysubs2_obj = pysubs2.SSAFile() pysubs2_obj.styles = \ {styles_list[i]: styles_list[i + 1] for i in range(0, len(styles_list), 2)} if not isinstance(text_list[0], list): # text_list is [((start, end), text), ...] # text_list provides regions sub_utils.pysubs2_ssa_event_add(src_ssafile=None, dst_ssafile=pysubs2_obj, text_list=text_list, style_name=styles_list[0]) else: # text_list is [[src_list], [dst_list]] # src_list provides regions sub_utils.pysubs2_ssa_event_add(src_ssafile=None, dst_ssafile=pysubs2_obj, text_list=text_list[0], style_name=styles_list[0]) if len(styles_list) == 1: sub_utils.pysubs2_ssa_event_add(src_ssafile=None, dst_ssafile=pysubs2_obj, text_list=text_list[1], style_name=styles_list[0]) else: sub_utils.pysubs2_ssa_event_add(src_ssafile=None, dst_ssafile=pysubs2_obj, text_list=text_list[1], style_name=styles_list[2]) if subtitles_file_format != 'ass.json': formatted_subtitles = pysubs2_obj.to_string( format_=subtitles_file_format) else: formatted_subtitles = pysubs2_obj.to_string(format_='json') else: # fallback process print( _("Format \"{fmt}\" not supported. " "Using \"{default_fmt}\" instead.").format( fmt=subtitles_file_format, default_fmt=constants.DEFAULT_SUBTITLES_FORMAT)) pysubs2_obj = pysubs2.SSAFile() sub_utils.pysubs2_ssa_event_add(src_ssafile=None, dst_ssafile=pysubs2_obj, text_list=text_list, style_name=None) formatted_subtitles = pysubs2_obj.to_string( format_=constants.DEFAULT_SUBTITLES_FORMAT) return formatted_subtitles, subtitles_file_format
def list_to_ass_str( text_list, styles_list, subtitles_file_format=constants.DEFAULT_SUBTITLES_FORMAT, same_event_type=0): """ Give an input timed text list, format it to an ass string. """ pysubs2_obj = pysubs2.SSAFile() pysubs2_obj.styles = \ {styles_list[i]: styles_list[i + 1] for i in range(0, len(styles_list), 2)} if not isinstance(text_list[0], list): # text_list is [((start, end), text), ...] # text_list provides regions sub_utils.pysubs2_ssa_event_add( src_ssafile=None, dst_ssafile=pysubs2_obj, text_list=text_list, style_name=styles_list[0]) else: # text_list is [[src_list], [dst_list]] # src_list provides regions sub_utils.pysubs2_ssa_event_add( src_ssafile=None, dst_ssafile=pysubs2_obj, text_list=text_list[0], style_name=styles_list[0]) src_obj = pysubs2_obj pysubs2_obj = pysubs2.SSAFile() if len(styles_list) == 1: sub_utils.pysubs2_ssa_event_add( src_ssafile=src_obj, dst_ssafile=pysubs2_obj, text_list=text_list[1], style_name=styles_list[0], same_event_type=same_event_type) else: sub_utils.pysubs2_ssa_event_add( src_ssafile=src_obj, dst_ssafile=pysubs2_obj, text_list=text_list[1], style_name=styles_list[2], same_event_type=same_event_type) if subtitles_file_format != 'ass.json': formatted_subtitles = pysubs2_obj.to_string(format_=subtitles_file_format) else: formatted_subtitles = pysubs2_obj.to_string(format_='json') return formatted_subtitles
def write_file(self, fname: str) -> None: # TODO: converter to go between self.subs_format and out_format if fname is None: out_format = self._sub_format else: out_format = os.path.splitext(fname)[-1][1:] subs = list(self.gen_raw_resolved_subs()) if self._sub_format in ("ssa", "ass"): ssaf = pysubs2.SSAFile() ssaf.events = subs if self._styles is not None: ssaf.styles = self._styles if self._info is not None: ssaf.info = self._info if self._fonts_opaque is not None: ssaf.fonts_opaque = self._fonts_opaque to_write = ssaf.to_string(out_format) elif self._sub_format == "srt" and out_format in ("ssa", "ass"): to_write = pysubs2.SSAFile.from_string( srt.compose(subs)).to_string(out_format) elif out_format == "srt": to_write = srt.compose(subs) else: raise NotImplementedError("unsupported output format: %s" % out_format) to_write = to_write.encode(self._encoding) if six.PY3: with open(fname or sys.stdout.fileno(), "wb") as f: f.write(to_write) else: with (fname and open(fname, "wb")) or sys.stdout as f: f.write(to_write)
def write_file(self, fname): # TODO: converter to go between self.subs_format and out_format if fname is None: out_format = self._sub_format else: out_format = os.path.splitext(fname)[-1][1:] subs = list(self.gen_raw_resolved_subs()) if self._sub_format in ('ssa', 'ass'): ssaf = pysubs2.SSAFile() ssaf.events = subs ssaf.styles = self.styles if self.info is not None: ssaf.info = self.info to_write = ssaf.to_string(out_format) elif self._sub_format == 'srt' and out_format in ('ssa', 'ass'): to_write = pysubs2.SSAFile.from_string( srt.compose(subs)).to_string(out_format) elif out_format == 'srt': to_write = srt.compose(subs) else: raise NotImplementedError('unsupported output format: %s' % out_format) to_write = to_write.encode(self.encoding) if six.PY3: with open(fname or sys.stdout.fileno(), 'wb') as f: f.write(to_write) else: with (fname and open(fname, 'wb')) or sys.stdout as f: f.write(to_write)
def convert_yt_comments(jsonname, comment_duration, video_info, outputname): with open(jsonname) as f: yt_comments = json.load(f) if len(yt_comments) == 0: return subs = pysubs2.SSAFile() subs.info["PlayResX"] = 384 subs.info["PlayResY"] = 288 start_time_shift = yt_comments[0]["time_in_seconds"] * 1000 comment_channel = [] comment_size = 20 for i in range(0, subs.info["PlayResY"], comment_size): comment_channel.append(None) for msg in yt_comments: now = msg["time_in_seconds"] * 1000 if now > video_info["duration"] * 1000: # print(now, ">", video_info["duration"] * 1000) continue if not msg["message"]: continue selected_channel = 1 for index, chan in enumerate(comment_channel): if (not chan or chan["time_in_seconds"] * 1000 + (200 * len(msg["message"])) < now): comment_channel[index] = msg selected_channel = index + 1 break movement = ("{\move(414," + str(selected_channel * 20) + ",-30," + str(selected_channel * 20) + ",0," + str(comment_duration) + ")}") subs.append( pysubs2.SSAEvent( start=pysubs2.make_time(ms=msg["time_in_seconds"] * 1000), end=pysubs2.make_time(ms=(msg["time_in_seconds"] * 1000) + comment_duration), text=movement + msg["message"])) subs.shift(ms=-start_time_shift + 100) subs.save(outputname)
def write_file(self, fname): subs = list(self.gen_raw_resolved_subs()) if self.sub_format == 'srt': to_write = srt.compose(subs) elif self.sub_format in ('ssa', 'ass'): ssaf = pysubs2.SSAFile() ssaf.events = subs to_write = ssaf.to_string(self.sub_format) else: raise NotImplementedError('unsupported format: %s' % self.sub_format) to_write = to_write.encode(self.encoding) if six.PY3: with open(fname or sys.stdout.fileno(), 'wb') as f: f.write(to_write) else: with (fname and open(fname, 'wb')) or sys.stdout as f: f.write(to_write)
def list_to_vtt_str(subtitles): """ Serialize a list of subtitles according to the VTT format. """ pysubs2_obj = pysubs2.SSAFile() pysubs2_ssa_event_add(src_ssafile=None, dst_ssafile=pysubs2_obj, text_list=subtitles) formatted_subtitles = pysubs2_obj.to_string(format_='srt') i = 0 lines = formatted_subtitles.split('\n') new_lines = [] for line in lines: if i % 4 == 1: line = line.replace(',', '.') new_lines.append(line) i = i + 1 formatted_subtitles = '\n'.join(new_lines) formatted_subtitles = 'WEBVTT\n\n' + formatted_subtitles return formatted_subtitles
def make_ass(wav, segments, transcriptions, utt2spk, ass): """ Формирование .ASS файла из транскрибаций Аргументы: wav: наименование аудио файла segments: путь к файлу описания сегментов transcriptions: путь к файлу транскрибации utt2spk: путь к файлу сопоставления сегментов и говорящих ass: путь к .ASS файлу субтитров """ sub = pysubs2.SSAFile() sub.info['Title'] = 'Default Aegisub file' sub.info['YCbCr Matrix'] = 'None' sub.aegisub_project['Audio File'] = wav sub.aegisub_project['Scroll Position'] = 0 sub.aegisub_project['Active Line'] = 0 segments_df = pd.read_csv(segments, header=None, sep=' ', names=['utt_id', 'wav', 'start', 'end']) transcriptions_df = pd.read_csv(transcriptions, sep='\t', header=None, names=['utt_id', 'text']) utt2spk_df = pd.read_csv(utt2spk, sep='\t', header=None, names=['utt_id', 'speaker']) events = segments_df.merge(transcriptions_df, how='left', on='utt_id').merge(utt2spk_df, how='left', on='utt_id').fillna('') for row in events.values: event = pysubs2.SSAEvent(start=pysubs2.make_time(s=float(row[2])), end=pysubs2.make_time(s=float(row[3])), text=row[4], name=row[5]) sub.events.append(event) sub.sort() sub.save(ass, format_='ass')
def list_to_sub_str( timed_text, fps=30.0, subtitles_file_format=constants.DEFAULT_SUBTITLES_FORMAT): """ Give an input timed text list, format it to a string. """ if subtitles_file_format in ('srt', 'tmp', 'ass', 'ssa'): pysubs2_obj = pysubs2.SSAFile() sub_utils.pysubs2_ssa_event_add( src_ssafile=None, dst_ssafile=pysubs2_obj, text_list=timed_text) formatted_subtitles = pysubs2_obj.to_string( format_=subtitles_file_format) elif subtitles_file_format == 'vtt': formatted_subtitles = sub_utils.list_to_vtt_str( subtitles=timed_text) elif subtitles_file_format == 'json': formatted_subtitles = sub_utils.list_to_json_str( subtitles=timed_text) elif subtitles_file_format == 'ass.json': pysubs2_obj = pysubs2.SSAFile() sub_utils.pysubs2_ssa_event_add( src_ssafile=None, dst_ssafile=pysubs2_obj, text_list=timed_text) formatted_subtitles = pysubs2_obj.to_string( format_='json') elif subtitles_file_format == 'txt': formatted_subtitles = sub_utils.list_to_txt_str( subtitles=timed_text) elif subtitles_file_format == 'sub': pysubs2_obj = pysubs2.SSAFile() sub_utils.pysubs2_ssa_event_add( src_ssafile=None, dst_ssafile=pysubs2_obj, text_list=timed_text) formatted_subtitles = pysubs2_obj.to_string( format_='microdvd', fps=fps) # sub format need fps # ref https://pysubs2.readthedocs.io/en/latest # /api-reference.html#supported-input-output-formats elif subtitles_file_format == 'mpl2.txt': pysubs2_obj = pysubs2.SSAFile() sub_utils.pysubs2_ssa_event_add( src_ssafile=None, dst_ssafile=pysubs2_obj, text_list=timed_text) formatted_subtitles = pysubs2_obj.to_string( format_='mpl2', fps=fps) else: # fallback process print(_("Format \"{fmt}\" not supported. " "Use \"{default_fmt}\" instead.").format( fmt=subtitles_file_format, default_fmt=constants.DEFAULT_SUBTITLES_FORMAT)) pysubs2_obj = pysubs2.SSAFile() sub_utils.pysubs2_ssa_event_add( src_ssafile=None, dst_ssafile=pysubs2_obj, text_list=timed_text) formatted_subtitles = pysubs2_obj.to_string( format_=constants.DEFAULT_SUBTITLES_FORMAT) return formatted_subtitles
def subs_trans( # pylint: disable=too-many-branches, too-many-statements, too-many-locals args, input_m=input, fps=30.0, styles_list=None): """ Give args and translate a subtitles file. """ if not args.output_files: raise exceptions.AutosubException( _("\nNo works done." " Check your \"-of\"/\"--output-files\" option.")) src_sub = pysubs2.SSAFile.load(args.input) text_list = [] if args.styles and \ (args.format == 'ass' or args.format == 'ssa' or args.format == 'ass.json'): src_sub.styles = \ {styles_list[i]: styles_list[i + 1] for i in range(0, len(styles_list), 2)} for event in src_sub.events: event.style = styles_list[0] text_list.append(event.text) else: styles_list = [ src_sub.events[0].style, ] for event in src_sub.events: text_list.append(event.text) # text translation if args.gtransv2: # use gtransv2 translated_text = core.list_to_gtv2( text_list=text_list, api_key=args.gtransv2, concurrency=args.trans_concurrency, src_language=args.src_language, dst_language=args.dst_language, lines_per_trans=args.lines_per_trans) else: # use googletrans translated_text = core.list_to_googletrans( text_list, src_language=args.src_language, dst_language=args.dst_language, sleep_seconds=args.sleep_seconds, user_agent=args.user_agent, service_urls=args.service_urls) if not translated_text or len(translated_text) != len(text_list): raise exceptions.AutosubException(_("Error: Translation failed.")) try: args.output_files.remove("bilingual") bilingual_sub = pysubs2.SSAFile() bilingual_sub.styles = src_sub.styles bilingual_sub.events = src_sub.events[:] if args.styles and \ len(styles_list) == 2 and \ (args.format == 'ass' or args.format == 'ssa' or args.format == 'ass.json'): sub_utils.pysubs2_ssa_event_add(src_ssafile=bilingual_sub, dst_ssafile=bilingual_sub, text_list=translated_text, style_name=styles_list[2]) else: sub_utils.pysubs2_ssa_event_add(src_ssafile=bilingual_sub, dst_ssafile=bilingual_sub, text_list=translated_text, style_name=styles_list[0]) if args.format != 'ass.json': bilingual_string = bilingual_sub.to_string(format_=args.format, fps=fps) else: bilingual_string = bilingual_sub.to_string(format_='json') if args.format == 'mpl2': extension = 'mpl2.txt' else: extension = args.format bilingual_name = "{base}.{nt}.{extension}".format( base=args.output, nt=args.src_language + '&' + args.dst_language, extension=extension) subtitles_file_path = core.str_to_file(str_=bilingual_string, output=bilingual_name, input_m=input_m) # subtitles string to file print( _("Bilingual subtitles file " "created at \"{}\".").format(subtitles_file_path)) if not args.output_files: raise exceptions.AutosubException(_("\nAll works done.")) except KeyError: pass try: args.output_files.remove("dst") dst_sub = pysubs2.SSAFile() dst_sub.styles = src_sub.styles if len(styles_list) == 2: sub_utils.pysubs2_ssa_event_add(src_ssafile=src_sub, dst_ssafile=dst_sub, text_list=translated_text, style_name=styles_list[2]) else: sub_utils.pysubs2_ssa_event_add(src_ssafile=src_sub, dst_ssafile=dst_sub, text_list=translated_text, style_name=styles_list[0]) if args.format != 'ass.json': dst_string = dst_sub.to_string(format_=args.format, fps=fps) else: dst_string = dst_sub.to_string(format_='json') if args.format == 'mpl2': extension = 'mpl2.txt' else: extension = args.format dst_name = "{base}.{nt}.{extension}".format(base=args.output, nt=args.dst_language, extension=extension) subtitles_file_path = core.str_to_file(str_=dst_string, output=dst_name, input_m=input_m) # subtitles string to file print( _("Destination language subtitles " "file created at \"{}\".").format(subtitles_file_path)) except KeyError: pass
import xml.etree.ElementTree as ET import pysubs2 from auto_sub import find_type_file ttmlname = find_type_file('.ttml') tree = ET.parse(ttmlname) root = tree.getroot() styles = root[0][0] captions = root[1][0] sublist = [] styledict = dict() towritesubs = pysubs2.SSAFile() for styling in styles: color = styling.get(u'{http://www.w3.org/ns/ttml#styling}color') stylename = styling.get(u'{http://www.w3.org/XML/1998/namespace}id') if color: if color == "white": r = 255 g = 255 b = 255 a = 0 elif color == "black": r = 0 g = 0 b = 0 a = 0 else: r = int( styling.get(u'{http://www.w3.org/ns/ttml#styling}color')[1:3], 16)
def merge_bilingual_assfile( # pylint: disable=too-many-locals, too-many-branches, too-many-statements subtitles, order=1): """ Merge bilingual subtitles file's events automatically. """ style_events = {} event_pos = {} i = 0 for event in subtitles.events: if event.style not in style_events: style_events[event.style] = [event] event_pos[event.style] = i else: style_events[event.style].append(event) i = i + 1 sorted_events_list = sorted(style_events.values(), key=len) events_1 = sorted_events_list.pop() events_2 = sorted_events_list.pop() dst_ssafile = pysubs2.SSAFile() src_ssafile = pysubs2.SSAFile() if event_pos[events_1[0].style] > event_pos[events_2[0].style] and order: # destination language events are behind source language events in a bilingual subtitles dst_ssafile.events = events_1 src_ssafile.events = events_2 else: dst_ssafile.events = events_2 src_ssafile.events = events_1 dst_ssafile.sort() src_ssafile.sort() new_ssafile = pysubs2.SSAFile() new_ssafile.styles = subtitles.styles new_ssafile.info = subtitles.info # default in dst-lf-src order dst_length = len(dst_ssafile.events) src_length = len(src_ssafile.events) i = 0 j = 0 start = 0 end = 0 events_0 = [] while i < dst_length and j < src_length: if dst_ssafile.events[i].is_comment != src_ssafile.events[j].is_comment: if dst_ssafile.events[i].is_comment: events_0.append(dst_ssafile.events[i]) i = i + 1 continue events_0.append(src_ssafile.events[j]) j = j + 1 continue if dst_ssafile.events[i].start == src_ssafile.events[j].start or \ dst_ssafile.events[i].end == src_ssafile.events[j].end: start = dst_ssafile.events[i].start end = dst_ssafile.events[i].end elif dst_ssafile.events[i].start >= src_ssafile.events[j].end: events_0.append(src_ssafile.events[j]) j = j + 1 continue elif src_ssafile.events[j].start >= dst_ssafile.events[i].end: events_0.append(dst_ssafile.events[i]) i = i + 1 continue elif src_ssafile.events[j].start < dst_ssafile.events[i].start: event = pysubs2.SSAEvent() event.start = src_ssafile.events[j].start event.end = dst_ssafile.events[i].start event.is_comment = src_ssafile.events[j].is_comment event.text = src_ssafile.events[j].text event.style = src_ssafile.events[j].style events_0.append(event) start = dst_ssafile.events[i].start if src_ssafile.events[j].end > dst_ssafile.events[i].end: event = pysubs2.SSAEvent() event.start = dst_ssafile.events[i].end event.end = src_ssafile.events[j].end event.is_comment = src_ssafile.events[j].is_comment event.text = src_ssafile.events[j].text event.style = src_ssafile.events[j].style events_0.append(event) end = dst_ssafile.events[i].end else: end = src_ssafile.events[j].end elif dst_ssafile.events[i].start < src_ssafile.events[j].start: event = pysubs2.SSAEvent() event.start = dst_ssafile.events[i].start event.end = src_ssafile.events[j].start event.is_comment = dst_ssafile.events[i].is_comment event.text = dst_ssafile.events[i].text event.style = dst_ssafile.events[i].style events_0.append(event) start = src_ssafile.events[j].start if dst_ssafile.events[i].end > src_ssafile.events[j].end: event = pysubs2.SSAEvent() event.start = src_ssafile.events[j].end event.end = dst_ssafile.events[i].end event.is_comment = dst_ssafile.events[i].is_comment event.text = dst_ssafile.events[i].text event.style = dst_ssafile.events[i].style events_0.append(event) end = src_ssafile.events[j].end else: end = dst_ssafile.events[i].end event = pysubs2.SSAEvent() event.start = start event.end = end event.is_comment = dst_ssafile.events[i].is_comment event.text = \ dst_ssafile.events[i].text + \ "\\N{{\\r{style_name}}}".format( style_name=src_ssafile.events[j].style) + \ src_ssafile.events[j].text event.style = dst_ssafile.events[i].style new_ssafile.events.append(event) i = i + 1 j = j + 1 if i < dst_length: new_ssafile.events = new_ssafile.events + events_0 + dst_ssafile.events[ i:] else: new_ssafile.events = new_ssafile.events + events_0 + src_ssafile.events[ j:] for events in sorted_events_list: if event_pos[events[0].style] > event_pos[new_ssafile.events[0].style]: new_ssafile.events = new_ssafile.events + events else: new_ssafile.events = events + new_ssafile.events return new_ssafile
def split_dst_lf_src_assfile( # pylint: disable=too-many-locals, too-many-branches subtitles, order=1, style_name=None): """ Split bilingual subtitles file's events automatically. """ style_events = {} event_pos = {} i = 0 for event in subtitles.events: if event.style not in style_events: style_events[event.style] = [event] event_pos[event.style] = i else: style_events[event.style].append(event) i = i + 1 sorted_events_list = sorted(style_events.values(), key=len) events_1 = sorted_events_list.pop() new_ssafile = pysubs2.SSAFile() new_ssafile.styles = subtitles.styles new_ssafile.info = subtitles.info new_events_1 = [] new_events_2 = [] if len(style_name) == 1: style_name = [style_name[0], style_name[0]] elif not style_name: style_name = [events_1[0].style, events_1[0].style] for event in events_1: new_text_list = event.text.split(r'\N') new_events_1.append(copy.deepcopy(event)) if len(new_text_list) == 2: new_events_1[-1].text = new_text_list[0] styles = re.compile(r"{\\r(.*?)}").findall(new_text_list[1]) new_events_1[-1].style = style_name[0] new_events_2.append(copy.deepcopy(event)) if styles: styles = styles[0].split("\\") if len(styles) > 1: new_events_2[-1].text = "{\\" + new_text_list[1][ 4 + len(styles[0]):] else: new_events_2[-1].text = new_text_list[1][4 + len(styles[0]):] new_events_2[-1].style = styles[0] else: new_events_2[-1].text = new_text_list[1] new_events_2[-1].style = style_name[1] if order: new_events = new_events_1 + new_events_2 else: new_events = new_events_2 + new_events_1 sorted_events_list.append(new_events) for events in sorted_events_list: new_ssafile.events = new_ssafile.events + events return new_ssafile
def merge_src_assfile( # pylint: disable=too-many-locals, too-many-nested-blocks, # pylint: disable=too-many-statements, too-many-branches, too-many-arguments # pylint: disable=too-many-boolean-expressions subtitles, stop_words_set_1, stop_words_set_2, max_join_size=constants.DEFAULT_MAX_SIZE_PER_EVENT, max_delta_time=int(constants.DEFAULT_CONTINUOUS_SILENCE * 1000), delimiters=constants.DEFAULT_EVENT_DELIMITERS, avoid_split=False): """ Merge a source subtitles file's events automatically. """ new_ssafile = pysubs2.SSAFile() new_ssafile.styles = subtitles.styles new_ssafile.info = subtitles.info style_events = {} for event in subtitles.events: event.text = event.text.replace("\\N", " ") if event.style not in style_events: style_events[event.style] = [event] else: style_events[event.style].append(event) sorted_events_list = sorted(style_events.values(), key=len) events_1 = sorted_events_list.pop() temp_ssafile = pysubs2.SSAFile() temp_ssafile.events = events_1 temp_ssafile.sort() sub_length = len(temp_ssafile.events) event_count = 1 merge_count = 0 split_count = 0 new_ssafile.events.append(temp_ssafile.events[0]) while event_count < sub_length: if not new_ssafile.events[-1].is_comment \ and not temp_ssafile.events[event_count].is_comment \ and new_ssafile.events[-1].style == temp_ssafile.events[event_count].style \ and temp_ssafile.events[event_count].start \ - new_ssafile.events[-1].end < max_delta_time \ and new_ssafile.events[-1].text.rstrip(" ")[-1] not in delimiters \ and temp_ssafile.events[event_count].text.lstrip(" ")[0] not in delimiters: if len(new_ssafile.events[-1].text) + \ len(temp_ssafile.events[event_count].text) < max_join_size: new_ssafile.events[-1].end = temp_ssafile.events[ event_count].end if new_ssafile.events[-1].text[-1] != " ": new_ssafile.events[-1].text = new_ssafile.events[-1].text + " " + \ temp_ssafile.events[event_count].text else: new_ssafile.events[-1].text = \ new_ssafile.events[-1].text + temp_ssafile.events[event_count].text merge_count = merge_count + 1 event_count = event_count + 1 continue if not avoid_split: if len(new_ssafile.events[-1].text) \ > len(temp_ssafile.events[event_count].text) * 1.4 and \ len(new_ssafile.events[-1].text) > max_join_size * 0.8: joint_event = new_ssafile.events[-1] else: joint_event = join_event(new_ssafile.events[-1], temp_ssafile.events[event_count]) event_list = [] while True: word_dict = get_slice_pos_dict(joint_event.text, delimiters=delimiters) total_length = len(joint_event.text) # use punctuations to split the sentence first stop_word_set = set(word_dict.keys()) last_index = find_split_index(total_length=total_length, stop_word_set=stop_word_set, word_dict=word_dict, min_range_ratio=0.1) if len(word_dict) < 2 or not last_index: # then use stop words word_dict = get_slice_pos_dict(joint_event.text) stop_word_set = stop_words_set_1 & \ set(word_dict.keys()) last_index = find_split_index( total_length=total_length, stop_word_set=stop_word_set, word_dict=word_dict, min_range_ratio=0.1) if not last_index: stop_word_set = stop_words_set_2 & \ set(word_dict.keys()) last_index = find_split_index( total_length=total_length, stop_word_set=stop_word_set, word_dict=word_dict, min_range_ratio=0.1) if 0 < last_index < max_join_size: if total_length - last_index < max_join_size: event_list.extend( split_event(joint_event, last_index)) if joint_event.text in new_ssafile.events[-1].text: last_index = -2 else: last_index = -1 new_ssafile.events.pop() if len(event_list) > 2: count = 0 while count < len(event_list) - 1: joint_event = join_event( event_list[count], event_list[count + 1]) if len(joint_event.text) < max_join_size: del event_list[count + 1] event_list[count] = joint_event merge_count = merge_count + 1 count = count + 1 new_ssafile.events.extend(event_list) split_count = split_count + len(event_list) break split_events = split_event(joint_event, last_index) event_list.append(split_events[0]) joint_event = split_events[1] else: break if last_index < 0: if last_index > -2: event_count = event_count + 1 continue new_ssafile.events.append(temp_ssafile.events[event_count]) event_count = event_count + 1 for events in sorted_events_list: new_ssafile.events = events + new_ssafile.events print(_("Merge {count} times.").format(count=merge_count)) print(_("Split {count} times.").format(count=split_count)) delta = len(subtitles.events) - len(new_ssafile.events) if delta > 0: print(_("Reduce {count} lines of events.").format(count=delta)) else: print(_("Add {count} lines of events.").format(count=-delta)) return new_ssafile
# Check if there any ass files found if len(subtitles_full_path) > 0: logger.debug('Found {ASS_FILES_COUNT} ass files, They are {ASS_FILES_LIST}'.format_map({ 'ASS_FILES_COUNT': len(subtitles_full_path), 'ASS_FILES_LIST': str(subtitles_full_path), })) else: logger.warning('Cannot find any ass files.') exit() # create an ass file for unparsed Dialogues # The idea of this file is: if this script is not able to parse # the file for any reason and throw an exception, in this case # the file should just store the original Dialogue with it's style # This way we can keep all these broken rows to be investigated later. unparsed_ass = pysubs2.SSAFile() # For each subtitle file for full_file_path in subtitles_full_path: # Logging the current file logger.debug('Working on file {FILE_NAME}'.format_map({'FILE_NAME': full_file_path})) try: # Load the subtitle file and parse it fl = pysubs2.load(full_file_path) logger.debug('Loaded the file successfully.') logger.debug('File "{FILE_NAME}" has "{STYLES_NUMBER}" styles, and there names are \n"{STYLES_LIST}"'.format_map({ 'FILE_NAME': full_file_path, 'STYLES_NUMBER': len(fl.styles),
if record_raw: ensure_dir(current_directory + '/comment_log_raw') raw_log_path = current_directory + '/comment_log_raw/' + chat_channel + '.txt' log_path = current_directory + '/comment_log/' + chat_channel + '.txt' subs_log_path = current_directory + '/comment_log/' + chat_channel + '.ass' bot = irc_bot.irc_bot(username, oauth, chat_channel, chat_server[0], chat_server[1], twitchclient_version=twitchclient_version) subs = pysubs2.SSAFile() i = 0 text = '' while 1: raw_msg_list = bot.get_message() if len(raw_msg_list) > 0: if len(text) > 0: end = pysubs2.time.make_time(ms=datetime.now().microsecond) subs.insert( i, pysubs2.SSAEvent(start=start, end=end, text=text.replace('\\', '\\\\'))) i = i + 1