def pysubs2_ssa_event_add( src_ssafile, dst_ssafile, text_list, style_name, ): """ Serialize a list of subtitles according to the SRT format. """ if not style_name: style_name = 'Default' if text_list: if not src_ssafile: if isinstance(text_list[0][0], tuple): # text_list is [((start, end), text), ...] # text_list provides regions for ((start, end), text) in text_list: event = pysubs2.SSAEvent() event.start = start event.end = end event.text = text event.style = style_name dst_ssafile.events.append(event) elif isinstance(text_list[0][0], int): # text_list is [(start, end), ...] # text_list provides regions only for start, end in text_list: event = pysubs2.SSAEvent() event.start = start event.end = end event.style = style_name dst_ssafile.events.append(event) else: # if src_ssafile exist # src_ssafile provides regions # text_list is [text, text, ...] i = 0 length = len(text_list) while i < length: event = pysubs2.SSAEvent() event.start = src_ssafile.events[i].start event.end = src_ssafile.events[i].end event.text = text_list[i] event.style = style_name dst_ssafile.events.append(event) i = i + 1 else: # src_ssafile provides regions only i = 0 length = len(src_ssafile.events) while i < length: event = pysubs2.SSAEvent() event.start = src_ssafile.events[i].start event.end = src_ssafile.events[i].end event.style = style_name dst_ssafile.events.append(event) i = i + 1
def _create_bullets(self, content, animation): """Add vocabulary information into subtitle object Args: content (list): vocabulary information with time stamp animation (bool): whether using animation in ass """ style = self._subs.styles["Default"].copy() style.alignment = 7 style.fontsize = 13 style.borderstyle = 1 style.shadow = 0.5 # shadow: 0.5 px style.backcolor = pysubs2.Color( 0, 0, 0, 100) # shadow color: black with (255-100)/255 transparent style.outline = 0.5 # font outline: 0.5 px style.outlinecolor = pysubs2.Color( 0, 0, 0, 20) # outline color: black with (255-20)/255 transparent style.marginl = 70 style.marginv = 30 style.primarycolor = pysubs2.Color( 255, 255, 255, 0) # font color: white, no transparent self._subs.styles["Bullet"] = style for s in self._subs: s.text = s.text.replace("\\N", " ") for bullet in content: ws = "\\N".join([ "\\h\\h\\h\\h".join([ "{\c&H58E08F&}" + w["word"], "{\\c&HFFFFFF&}" + w["meaning"], "{\\c&H2AD6C4&}" + "[" + w["dict_pos"] + "]" ]) for w in bullet["words"] ]) start = pysubs2.time.timestamp_to_ms( pysubs2.time.TIMESTAMP.match(bullet["start"]).groups()) end = pysubs2.time.timestamp_to_ms( pysubs2.time.TIMESTAMP.match(bullet["end"]).groups()) if animation: event = pysubs2.SSAEvent(start=start, end=end, text=ws, style="Bullet", effect="Scroll up;10;110;" + str(100000 / (0.90 * (end - start)))) else: event = pysubs2.SSAEvent(start=start, end=end, text=ws, style="Bullet") self._subs.append(event)
def man_get_vtt_words_index(self): """ Get end timestamps from a SSAEvent list automatically by external regions. """ events = [] path = self.path[:-3] + "txt" path = str_to_file( str_=self.to_text_str(), output=path, input_m=input) input(_("Wait for the events manual adjustment. " "Press Enter to continue.")) with open(path, encoding=constants.DEFAULT_ENCODING) as file_p: i = 0 for line in file_p: word_list = line.split() event = pysubs2.SSAEvent(start=self.vtt_words[i].start) for word in word_list: self.vtt_words[i].word = word i = i + 1 self.vtt_words_index.append(i) if i: event.end = self.vtt_words[i - 1].end events.append(event) constants.delete_path(path) return events
def add(self, begin, end, text): entry = parseLine(text) event = pysubs2.SSAEvent(type='Dialogue', start=begin * 1000.0, end=end * 1000.0, **entry) self.insert(bisect.bisect_left(self, event), event)
def add(self, begin, end, text): if text.startswith('[Script Info]'): self.setHeader(text) else: entry = parseLine(text) event = pysubs2.SSAEvent(type='Dialogue', start=begin * 1000.0, end=end * 1000.0, **entry) self.insert(bisect.bisect_left(self, event), event)
def convert_yt_comments(jsonname, comment_duration, video_info, outputname): with open(jsonname) as f: yt_comments = json.load(f) if len(yt_comments) == 0: return subs = pysubs2.SSAFile() subs.info["PlayResX"] = 384 subs.info["PlayResY"] = 288 start_time_shift = yt_comments[0]["time_in_seconds"] * 1000 comment_channel = [] comment_size = 20 for i in range(0, subs.info["PlayResY"], comment_size): comment_channel.append(None) for msg in yt_comments: now = msg["time_in_seconds"] * 1000 if now > video_info["duration"] * 1000: # print(now, ">", video_info["duration"] * 1000) continue if not msg["message"]: continue selected_channel = 1 for index, chan in enumerate(comment_channel): if (not chan or chan["time_in_seconds"] * 1000 + (200 * len(msg["message"])) < now): comment_channel[index] = msg selected_channel = index + 1 break movement = ("{\move(414," + str(selected_channel * 20) + ",-30," + str(selected_channel * 20) + ",0," + str(comment_duration) + ")}") subs.append( pysubs2.SSAEvent( start=pysubs2.make_time(ms=msg["time_in_seconds"] * 1000), end=pysubs2.make_time(ms=(msg["time_in_seconds"] * 1000) + comment_duration), text=movement + msg["message"])) subs.shift(ms=-start_time_shift + 100) subs.save(outputname)
def add_credits(subs): """Add credits to the software at the end of the subtitle SRT file. Parameters ---------- subs : `pysubs2.ssafile.SSAFile`, required subtitle object with all the subtitle events Returns ------- `pysubs2.ssafile.SSAFile` : subtitle object with credits appended at the end """ start = int(subs[-1].end) + 2000 end = start + 2000 text = "Processed with <i>ReSuber</i>.\nCheck the github page <font color=\"blue\"> https://github.com/polak0v/ReSuber </font> !" event = pysubs2.SSAEvent(start=start, end=end, text=text) subs += [event] return subs
def make_ass(wav, segments, transcriptions, utt2spk, ass): """ Формирование .ASS файла из транскрибаций Аргументы: wav: наименование аудио файла segments: путь к файлу описания сегментов transcriptions: путь к файлу транскрибации utt2spk: путь к файлу сопоставления сегментов и говорящих ass: путь к .ASS файлу субтитров """ sub = pysubs2.SSAFile() sub.info['Title'] = 'Default Aegisub file' sub.info['YCbCr Matrix'] = 'None' sub.aegisub_project['Audio File'] = wav sub.aegisub_project['Scroll Position'] = 0 sub.aegisub_project['Active Line'] = 0 segments_df = pd.read_csv(segments, header=None, sep=' ', names=['utt_id', 'wav', 'start', 'end']) transcriptions_df = pd.read_csv(transcriptions, sep='\t', header=None, names=['utt_id', 'text']) utt2spk_df = pd.read_csv(utt2spk, sep='\t', header=None, names=['utt_id', 'speaker']) events = segments_df.merge(transcriptions_df, how='left', on='utt_id').merge(utt2spk_df, how='left', on='utt_id').fillna('') for row in events.values: event = pysubs2.SSAEvent(start=pysubs2.make_time(s=float(row[2])), end=pysubs2.make_time(s=float(row[3])), text=row[4], name=row[5]) sub.events.append(event) sub.sort() sub.save(ass, format_='ass')
def auditok_gen_speech_regions( # pylint: disable=too-many-arguments audio_wav, energy_threshold=constants.DEFAULT_ENERGY_THRESHOLD, min_region_size=constants.DEFAULT_MIN_REGION_SIZE, max_region_size=constants.DEFAULT_MAX_REGION_SIZE, max_continuous_silence=constants.DEFAULT_CONTINUOUS_SILENCE, mode=auditok.StreamTokenizer.STRICT_MIN_LENGTH, is_ssa_event=False): """ Give an input audio/video file, generate proper speech regions. """ asource = auditok.ADSFactory.ads( filename=audio_wav, record=True) validator = auditok.AudioEnergyValidator( sample_width=asource.get_sample_width(), energy_threshold=energy_threshold) asource.open() tokenizer = auditok.StreamTokenizer( validator=validator, min_length=int(min_region_size * 100), max_length=int(max_region_size * 100), max_continuous_silence=int(max_continuous_silence * 100), mode=mode) # auditok.StreamTokenizer.DROP_TRAILING_SILENCE tokens = tokenizer.tokenize(asource) regions = [] if not is_ssa_event: for token in tokens: # get start and end times regions.append((token[1] * 10, token[2] * 10)) else: for token in tokens: # get start and end times regions.append(pysubs2.SSAEvent( start=token[1] * 10, end=token[2] * 10)) asource.close() # reference # auditok.readthedocs.io/en/latest/apitutorial.html#examples-using-real-audio-data return regions
def auditok_gen_stats_regions( auditok_stats, asource ): """ Give an AuditokSTATS and return itself with regions. """ validator = auditok.AudioEnergyValidator( sample_width=asource.get_sample_width(), energy_threshold=auditok_stats.energy_t) asource.open() tokenizer = auditok.StreamTokenizer( validator=validator, min_length=int(auditok_stats.mnrs * 100), max_length=int(auditok_stats.mxrs * 100), max_continuous_silence=int(auditok_stats.mxcs * 100), mode=auditok_stats.mode) # auditok.StreamTokenizer.DROP_TRAILING_SILENCE tokens = tokenizer.tokenize(asource) max_region_size = int(auditok_stats.mxrs * 1000) small_region_size = max_region_size >> 3 big_region_size = max_region_size - (max_region_size >> 2) total_region_size = 0 for token in tokens: # get start and end times auditok_stats.events.append(pysubs2.SSAEvent( start=token[1] * 10, end=token[2] * 10)) dura = (token[2] - token[1]) * 10 total_region_size = total_region_size + dura if dura <= small_region_size: auditok_stats.small_region_count = auditok_stats.small_region_count + 1 elif dura >= big_region_size: auditok_stats.big_region_count = auditok_stats.big_region_count + 1 average_region_size = total_region_size / len(auditok_stats.events) auditok_stats.delta_region_size = abs(average_region_size - (max_region_size >> 1)) # reference # auditok.readthedocs.io/en/latest/apitutorial.html#examples-using-real-audio-data return auditok_stats
} }) # Log the collection of styles info logger.debug(collection) # Prepare the output ass file output_ass = pysubs2.SSAFile() # This part is not working, so I will comment it till I find the reason. # output_ass.clear() # Clear the ass file from all pre-defined styles. # Insert all styles and their proper text to one ass file object for details in collection: style = pysubs2.SSAStyle() style.fontname = collection[details]['fontname'] style.bold = collection[details]['bold'] style.italic = collection[details]['italic'] event = pysubs2.SSAEvent() event.text = collection[details]['characters'] event.style = details output_ass.styles[details] = style output_ass.append(event) # Finally save the data to one ass file output_ass.save('output.ass', encoding='utf-8-sig') unparsed_ass.save('unparsed_tags.ass', encoding='utf-8-sig')
def pysubs2_ssa_event_add( # pylint: disable=too-many-branches, too-many-statements src_ssafile, dst_ssafile, text_list, style_name, same_event_type=0, ): """ Serialize a list of subtitles using pysubs2. """ if not style_name: style_name = 'Default' if text_list: if not src_ssafile: if isinstance(text_list[0][0], tuple): # text_list is [((start, end), text), ...] # text_list provides regions for ((start, end), text) in text_list: event = pysubs2.SSAEvent() event.start = start event.end = end event.text = text event.style = style_name dst_ssafile.events.append(event) elif isinstance(text_list[0][0], int): # text_list is [(start, end), ...] # text_list provides regions only for start, end in text_list: event = pysubs2.SSAEvent() event.start = start event.end = end event.style = style_name dst_ssafile.events.append(event) else: # if src_ssafile exist # src_ssafile provides regions # text_list is [text, text, ...] i = 0 length = len(text_list) if same_event_type == 0: # append text_list to new events while i < length: event = pysubs2.SSAEvent() event.start = src_ssafile.events[i].start event.end = src_ssafile.events[i].end event.text = text_list[i] event.style = style_name dst_ssafile.events.append(event) i = i + 1 elif same_event_type == 1: # add text_list to src_ssafile # before the existing text in event if src_ssafile.events[0].style == style_name: # same style while i < length: event = pysubs2.SSAEvent() event.start = src_ssafile.events[i].start event.end = src_ssafile.events[i].end event.text = \ text_list[i] + "\\N" + src_ssafile.events[i].text event.style = style_name dst_ssafile.events.append(event) i = i + 1 else: # different style while i < length: event = pysubs2.SSAEvent() event.start = src_ssafile.events[i].start event.end = src_ssafile.events[i].end event.text = \ text_list[i] + \ "\\N{{\\r{style_name}}}".format( style_name=src_ssafile.events[i].style) + \ src_ssafile.events[i].text event.style = style_name dst_ssafile.events.append(event) i = i + 1 elif same_event_type == 2: # add text_list to src_ssafile # after the existing text in event if src_ssafile.events[0].style == style_name: # same style while i < length: event = pysubs2.SSAEvent() event.start = src_ssafile.events[i].start event.end = src_ssafile.events[i].end event.text = \ src_ssafile.events[i].text + "\\N" + text_list[i] event.style = style_name dst_ssafile.events.append(event) i = i + 1 else: # different style while i < length: event = pysubs2.SSAEvent() event.start = src_ssafile.events[i].start event.end = src_ssafile.events[i].end event.text = \ src_ssafile.events[i].text + \ "\\N{{\\r{style_name}}}".format( style_name=style_name) + \ text_list[i] event.style = style_name dst_ssafile.events.append(event) i = i + 1 else: # src_ssafile provides regions only i = 0 length = len(src_ssafile.events) while i < length: event = pysubs2.SSAEvent() event.start = src_ssafile.events[i].start event.end = src_ssafile.events[i].end event.style = style_name dst_ssafile.events.append(event) i = i + 1
styling.get(u'{http://www.w3.org/ns/ttml#styling}color')[3:5], 16) b = int( styling.get(u'{http://www.w3.org/ns/ttml#styling}color')[5:7], 16) a = 0 styledict[stylename] = (pysubs2.SSAStyle( primarycolor=pysubs2.Color(r=r, g=g, b=b, a=a))) for line in captions: start = pysubs2.time.timestamp_to_ms( pysubs2.time.TIMESTAMP.match(line.get('begin')).groups()) end = pysubs2.time.timestamp_to_ms( pysubs2.time.TIMESTAMP.match(line.get('end')).groups()) if len(list(line)) == 0: print('------') text = line.text style = line.get('style') sublist.append( pysubs2.SSAEvent(start=start, end=end, text=text, style=style)) else: for sentence in line: text = sentence.text style = sentence.get('style') sublist.append( pysubs2.SSAEvent(start=start, end=end, text=text, style=style)) towritesubs.events = sublist towritesubs.styles = styledict towritesubs.save(ttmlname + '.ass')
twitchclient_version=twitchclient_version) subs = pysubs2.SSAFile() i = 0 text = '' while 1: raw_msg_list = bot.get_message() if len(raw_msg_list) > 0: if len(text) > 0: end = pysubs2.time.make_time(ms=datetime.now().microsecond) subs.insert( i, pysubs2.SSAEvent(start=start, end=end, text=text.replace('\\', '\\\\'))) i = i + 1 start = pysubs2.time.make_time(ms=datetime.now().microsecond) text = '' timestamp = get_timestamp(timestamp_format) for item in raw_msg_list: if record_raw: log_add(raw_log_path, timestamp + ' ' + item + '\n') username, message = irc_bot.parse_user(item) if username != '': safe_print(chat_channel + " " + username + ": " + message) log_add(log_path, timestamp + ' ' + username + ': ' + message + '\n') text += username + ": " + message + '\n' subs.save(path=subs_log_path, encoding='utf-8')
def _create_bullets(self, content, animation): """Add phrase information into subtitle object Args: content (list): phrase information with time stamp animation (bool): whether using animation in ass """ default_style = self._subs.styles["Default"] default_style.fontsize = 20 default_style.shadow = 0.3 # shadow: 0.3 px default_style.outline = 0.3 # font outline: 0.3 px default_style.italic = -1 default_style.bold = -1 default_style.marginl = 10 default_style.marginr = 10 default_style.marginv = 30 if self._cn_subs else 10 phrase_style = self._subs.styles["Default"].copy() phrase_style.italic = 0 phrase_style.bold = 0 phrase_style.alignment = 4 phrase_style.fontsize = 24 phrase_style.borderphrase_style = 1 phrase_style.shadow = 0.3 # shadow: 0.3 px phrase_style.backcolor = pysubs2.Color( 0, 0, 0, 100) # shadow color: black with (255-100)/255 transparent phrase_style.outline = 0.3 # font outline: 0.3 px phrase_style.outlinecolor = pysubs2.Color( 0, 0, 0, 20) # outline color: black with (255-20)/255 transparent phrase_style.marginl = 24 phrase_style.marginr = 10 phrase_style.marginv = 10 phrase_style.primarycolor = pysubs2.Color( 255, 255, 255, 0) # font color: white, no transparent self._subs.styles["Phrase"] = phrase_style verb_style = self._subs.styles["Default"].copy() verb_style.italic = 0 verb_style.bold = 0 verb_style.alignment = 7 verb_style.fontsize = 24 verb_style.borderverb_style = 1 verb_style.shadow = 0.3 # shadow: 0.3 px verb_style.backcolor = pysubs2.Color( 0, 0, 0, 100) # shadow color: black with (255-100)/255 transparent verb_style.outline = 0.3 # font outline: 0.3 px verb_style.outlinecolor = pysubs2.Color( 0, 0, 0, 20) # outline color: black with (255-20)/255 transparent verb_style.marginl = 24 verb_style.marginr = 10 verb_style.marginv = 44 verb_style.primarycolor = pysubs2.Color( 255, 255, 255, 0) # font color: white, no transparent self._subs.styles["Verb"] = verb_style cn_default_style = self._subs.styles["Default"].copy() cn_default_style.fontsize = 20 cn_default_style.shadow = 0.1 # shadow: 0.1 px cn_default_style.outline = 0.1 # font outline: 0.1 px cn_default_style.italic = -1 cn_default_style.bold = -1 cn_default_style.marginl = 10 cn_default_style.marginr = 10 cn_default_style.marginv = 3 self._subs.styles["CN"] = cn_default_style marker_colors = { "plain": "{\\c&HFFFFFF&}", "verbs": "{\\c&H7C94FF&}", "noun_phrases": "{\\c&H93F8E9&}", } self._subs.events = [] for bullet in content: phrases = bullet["noun_phrases"] _phrases = "\\N".join([ "\\h\\h\\h\\h".join([ marker_colors["noun_phrases"] + w["original"], marker_colors["plain"] + w["translated"] ]) for w in phrases ]) _verbs = "\\N".join([ "\\h\\h\\h\\h".join([ marker_colors["verbs"] + w["text"], marker_colors["plain"] + "(" + w["lemma"] + ")", marker_colors["plain"] + w["meaning"] ]) for w in bullet["verbs"] ]) start = pysubs2.time.timestamp_to_ms( pysubs2.time.TIMESTAMP.match(bullet["start"]).groups()) end = pysubs2.time.timestamp_to_ms( pysubs2.time.TIMESTAMP.match(bullet["end"]).groups()) if animation: phrase_event = pysubs2.SSAEvent(start=start, end=end, text=_phrases, style="Phrase", effect="Scroll up;10;110;" + str(100000 / (0.90 * (end - start)))) verb_event = pysubs2.SSAEvent(start=start, end=end, text=_verbs, style="Verb", effect="Scroll up;10;110;" + str(100000 / (0.90 * (end - start)))) else: phrase_event = pysubs2.SSAEvent(start=start, end=end, text=_phrases, style="Phrase") verb_event = pysubs2.SSAEvent(start=start, end=end, text=_verbs, style="Verb") _markers = " ".join([ marker_colors[w[1]] + _underline(w) for w in bullet["markers"] ]) event = pysubs2.SSAEvent(start=start, end=end, text=_markers, style="Default") self._subs.append(event) self._subs.append(phrase_event) self._subs.append(verb_event) if self._cn_subs: for e in self._cn_subs: e.style = "CN" self._subs.append(e)
def merge_bilingual_assfile( # pylint: disable=too-many-locals, too-many-branches, too-many-statements subtitles, order=1): """ Merge bilingual subtitles file's events automatically. """ style_events = {} event_pos = {} i = 0 for event in subtitles.events: if event.style not in style_events: style_events[event.style] = [event] event_pos[event.style] = i else: style_events[event.style].append(event) i = i + 1 sorted_events_list = sorted(style_events.values(), key=len) events_1 = sorted_events_list.pop() events_2 = sorted_events_list.pop() dst_ssafile = pysubs2.SSAFile() src_ssafile = pysubs2.SSAFile() if event_pos[events_1[0].style] > event_pos[events_2[0].style] and order: # destination language events are behind source language events in a bilingual subtitles dst_ssafile.events = events_1 src_ssafile.events = events_2 else: dst_ssafile.events = events_2 src_ssafile.events = events_1 dst_ssafile.sort() src_ssafile.sort() new_ssafile = pysubs2.SSAFile() new_ssafile.styles = subtitles.styles new_ssafile.info = subtitles.info # default in dst-lf-src order dst_length = len(dst_ssafile.events) src_length = len(src_ssafile.events) i = 0 j = 0 start = 0 end = 0 events_0 = [] while i < dst_length and j < src_length: if dst_ssafile.events[i].is_comment != src_ssafile.events[j].is_comment: if dst_ssafile.events[i].is_comment: events_0.append(dst_ssafile.events[i]) i = i + 1 continue events_0.append(src_ssafile.events[j]) j = j + 1 continue if dst_ssafile.events[i].start == src_ssafile.events[j].start or \ dst_ssafile.events[i].end == src_ssafile.events[j].end: start = dst_ssafile.events[i].start end = dst_ssafile.events[i].end elif dst_ssafile.events[i].start >= src_ssafile.events[j].end: events_0.append(src_ssafile.events[j]) j = j + 1 continue elif src_ssafile.events[j].start >= dst_ssafile.events[i].end: events_0.append(dst_ssafile.events[i]) i = i + 1 continue elif src_ssafile.events[j].start < dst_ssafile.events[i].start: event = pysubs2.SSAEvent() event.start = src_ssafile.events[j].start event.end = dst_ssafile.events[i].start event.is_comment = src_ssafile.events[j].is_comment event.text = src_ssafile.events[j].text event.style = src_ssafile.events[j].style events_0.append(event) start = dst_ssafile.events[i].start if src_ssafile.events[j].end > dst_ssafile.events[i].end: event = pysubs2.SSAEvent() event.start = dst_ssafile.events[i].end event.end = src_ssafile.events[j].end event.is_comment = src_ssafile.events[j].is_comment event.text = src_ssafile.events[j].text event.style = src_ssafile.events[j].style events_0.append(event) end = dst_ssafile.events[i].end else: end = src_ssafile.events[j].end elif dst_ssafile.events[i].start < src_ssafile.events[j].start: event = pysubs2.SSAEvent() event.start = dst_ssafile.events[i].start event.end = src_ssafile.events[j].start event.is_comment = dst_ssafile.events[i].is_comment event.text = dst_ssafile.events[i].text event.style = dst_ssafile.events[i].style events_0.append(event) start = src_ssafile.events[j].start if dst_ssafile.events[i].end > src_ssafile.events[j].end: event = pysubs2.SSAEvent() event.start = src_ssafile.events[j].end event.end = dst_ssafile.events[i].end event.is_comment = dst_ssafile.events[i].is_comment event.text = dst_ssafile.events[i].text event.style = dst_ssafile.events[i].style events_0.append(event) end = src_ssafile.events[j].end else: end = dst_ssafile.events[i].end event = pysubs2.SSAEvent() event.start = start event.end = end event.is_comment = dst_ssafile.events[i].is_comment event.text = \ dst_ssafile.events[i].text + \ "\\N{{\\r{style_name}}}".format( style_name=src_ssafile.events[j].style) + \ src_ssafile.events[j].text event.style = dst_ssafile.events[i].style new_ssafile.events.append(event) i = i + 1 j = j + 1 if i < dst_length: new_ssafile.events = new_ssafile.events + events_0 + dst_ssafile.events[ i:] else: new_ssafile.events = new_ssafile.events + events_0 + src_ssafile.events[ j:] for events in sorted_events_list: if event_pos[events[0].style] > event_pos[new_ssafile.events[0].style]: new_ssafile.events = new_ssafile.events + events else: new_ssafile.events = events + new_ssafile.events return new_ssafile
def auto_get_vtt_words_index( self, events, stop_words_set_1, stop_words_set_2, text_limit=constants.DEFAULT_MAX_SIZE_PER_EVENT, avoid_split=False): """ Adjust end timestamps and get SSAEvent events and self.vtt_words_index automatically by external regions. """ i = 0 j = 0 vtt_words_len = len(self.vtt_words) vtt_words_index = [0] is_started = False # last_len = 0 text_len = 0 events_len = len(events) while j < vtt_words_len and i < events_len: if self.vtt_words[j].start < events[i].end: if not is_started: # start_delta = events[i].start - self.vtt_words[j].start # if start_delta < 1000: # inside the event # start_delta < 0 # or a little ahead of time # 0 <= start_delta < 300 self.vtt_words[j].start = events[i].start if self.vtt_words[j].end <= self.vtt_words[j].start: if j < vtt_words_len - 1: if self.vtt_words[j].start < self.vtt_words[ j + 1].start: self.vtt_words[j].end = self.vtt_words[j + 1].start else: delta = \ (self.vtt_words[j + 1].end - self.vtt_words[j].start) >> 1 self.vtt_words[ j].end = delta + self.vtt_words[j].start self.vtt_words[ j + 1].start = delta + self.vtt_words[j].end else: self.vtt_words[ j].end = self.vtt_words[j].start + 200 is_started = True # else: # # check if it's necessary to insert new events # if i < len(events) - 1: # events.insert( # i, # pysubs2.SSAEvent(start=self.vtt_words[j].start, # end=events[i].start)) # else: # events.insert( # i, # pysubs2.SSAEvent(start=self.vtt_words[j].start, # end=self.vtt_words[j].start + 5000)) # events[i].is_comment = True # # the end time is estimated so it needs a trim # continue text_len = text_len + len(self.vtt_words[j].word) + 1 if text_len > text_limit and not avoid_split: vtt_word_dict = get_vtt_slice_pos_dict( self.vtt_words[vtt_words_index[-1]:j]) stop_word_set = stop_words_set_1 & set( vtt_word_dict.keys()) last_index = find_split_vtt_word( total_length=text_len, stop_word_set=stop_word_set, vtt_word_dict=vtt_word_dict, min_range_ratio=0.1) if not last_index[1]: stop_word_set = stop_words_set_2 & set( vtt_word_dict.keys()) last_index = find_split_vtt_word( total_length=text_len, stop_word_set=stop_word_set, vtt_word_dict=vtt_word_dict, min_range_ratio=0.1) if 0 < last_index[1] < text_limit: vtt_words_index.append(vtt_words_index[-1] + last_index[0]) last_end = events[i].end events[i].end = self.vtt_words[ vtt_words_index[-1]].start events.insert( i + 1, pysubs2.SSAEvent(start=events[i].end, end=last_end)) i = i + 1 events_len = events_len + 1 text_len = text_len - last_index[1] j = j + 1 else: if text_len: # if events[i].is_comment: # # trim the empty region # cur_speed = text_len * 1000 // events[i].duration # if last_len: # last_speed = last_len * 1000 // events[i - 1].duration # else: # last_speed = 10 # if cur_speed < (last_speed >> 2): # events[i].duration = last_speed * events[i].duration // 1000 # events[i].is_comment = False # last_len = text_len text_len = 0 if j - vtt_words_index[-1] > 1: if self.vtt_words[j - 1].speed < 10: # if the duration is too big # it means the start time is not accurate j = j - 1 self.vtt_words[j - 1].end = events[i].end vtt_words_index.append(j) is_started = False i = i + 1 else: del events[i] vtt_words_index = vtt_words_index[1:] if j == vtt_words_len: vtt_words_index.append(j) events = events[:len(vtt_words_index)] self.vtt_words_index = vtt_words_index return events return None
def man_get_vtt_words_index(self): """ Get end timestamps from a SSAEvent list automatically by external regions. """ events = [] path = self.path[:-3] + "txt" path = str_to_file(str_=self.to_text_str(), output=path, input_m=input) input( _("Wait for the events manual adjustment. " "Press Enter to continue.")) line_count = 0 i = 0 j = 0 vtt_len = len(self.vtt_words) is_paused = False trans = str.maketrans(string.punctuation, " " * len(string.punctuation)) while True: file_p = open(path, encoding=constants.DEFAULT_ENCODING) line_list = file_p.readlines() line_list_len = len(line_list) file_p.close() k = line_count while k < line_list_len: word_list = line_list[k].split() event = pysubs2.SSAEvent(start=self.vtt_words[i].start) word_list_len = len(word_list) while j < word_list_len: if self.vtt_words[i].word != word_list[j]: if fuzz.partial_ratio( self.vtt_words[i].word.lower().translate( trans).replace(" ", ""), word_list[j].lower().translate(trans).replace( " ", "")) != 100: if self.vtt_words_index: start_delta = self.vtt_words_index[-1] else: start_delta = 0 if i < vtt_len - 5: end_delta = i + 6 else: end_delta = vtt_len print( _("\nLine {num}, word {num2}").format( num=len(events), num2=j)) cur_line = "" for vtt_word in self.vtt_words[ start_delta:end_delta]: cur_line = "{cur_line} {word}".format( cur_line=cur_line, word=vtt_word.word) print(cur_line) print(" ".join(word_list)) print("{word} | {word2}".format( word=self.vtt_words[i].word, word2=word_list[j])) result = input( _("Press Enter to manual adjust. " "Input 1 to overwrite.")) if result != "1": line_count = k is_paused = True break self.vtt_words[i].word = word_list[j] is_paused = False else: if is_paused: is_paused = False self.vtt_words[i].word = word_list[j] i = i + 1 j = j + 1 if i > vtt_len: break if is_paused: break j = 0 self.vtt_words_index.append(i) if i: event.end = self.vtt_words[i - 1].end events.append(event) k = k + 1 if not is_paused: break constants.DELETE_PATH(path) return events