def makeFile(filename): orig = pysubs2.load(filename, encoding="utf-8-sig") romanji = pysubs2.load(filename, encoding="utf-8-sig") n = 25 symb = "- | -" for i in range(0, len(orig) - n, n): if i % 100 == 0: print(i) lines = orig[i].text for j in range(1, n): lines += symb + orig[i + j].text lines = transliterate(lines) lines = lines.split(symb) for j in range(n): romanji[i + j].text = lines[j] i += 1 - n lines = orig[i].text for j in range(1, len(orig) - i): lines += symb + orig[i + j].text lines = transliterate(lines) lines = lines.split(symb) for j in range(len(orig) - i): romanji[i + j].text = lines[j] romanji.save(filename[:-4] + '_ramaji_' + filename[-4:])
def retrive_info(folder,tagPattern=re.compile('【(.*?)】')): ''' 从folder目录中获取字幕文件信息 ''' print(folder) tag = tagPattern.search(folder).group(1) metainfo = open(folder+'\meta-info.txt',encoding='utf-8').read().split('\n') ch_title=metainfo[0].replace('【中文】','') #获取影片中文标题 en_title=metainfo[1].replace('【英名】','') #获取影片英文标题 assets = filter(folder,'简体&英文') #获取文件名中含“简体&英文”的文件 for i in assets: try: subfile = pysubs2.load(i) except: try: subfile = pysubs2.load(i,encoding='gbk') except: continue for lines in subfile: try: upperline, lowerline = lines.text.split('\n') #利用\n分隔双语字幕,若无法分隔说明是单语,直接跳过 except ValueError: try: upperline, lowerline = lines.text.split(r'\N') except: continue if has_chinese(lowerline): #双语字幕中,一般英文字幕在下方。若下方字幕中含有中文字符,说明本行字幕有问题,直接跳过 continue upperline = re.sub('{.*?}','',upperline) #将字幕中“{}”之间的样式信息删除 lowerline = re.sub('{.*?}','',lowerline) start = pysubs2.time.ms_to_str(lines.start) #获取字幕开始时间 print(lowerline,upperline,en_title,ch_title,start,tag,sep='/////',file=result) if assets != []: #本文件夹若有可用文件,处理完毕后直接删除,便于后期处理不规则命名文件 shutil.rmtree(folder)
def evaluate(subtitles1, subtitles2): # Load the temporary subtitle files subs1 = pysubs2.load(subtitles1) subs2 = pysubs2.load(subtitles2) # File subs2 must have the first timestamp if subs2[0].start > subs1[0].start: subs1, subs2 = subs2, subs1 # Initialize Parameters & Indices score = 0 threshold = 1000 # msec maxReward = 1.5 j = 0 k = 0 # Evalutation Process for i in range(len(subs1) - 1): d = 0 startFlag = True endFlag = True try: while subs2[j].start < subs1[i].start - threshold: j = j + 1 if subs2[j].start > subs1[i].start + threshold: startFlag = False j = j - 1 else: d = d + abs(subs2[j].start - subs1[i].start) / 2 while subs2[k].end < subs1[i].end - threshold: k = k + 1 if subs2[k].end > subs1[i].end + threshold: startFlag = False k = k - 1 else: d = d + abs(subs2[k].end - subs1[i].end) / 2 if startFlag == True and endFlag == True: score = score + maxReward * (1 - d / threshold) # + 1 else: score = score - 1 except IndexError: break # Calculate Accuray NormalizedScore = (score + (len(subs1) - 1)) / (2 * (len(subs1) - 1)) # Accuray may become greater than 100% --> max{Accuracy}=99% if NormalizedScore > 1: NormalizedScore = 1.00 acc = NormalizedScore * 100 return acc
def combine_subtitle_files(first_file, second_file): # By default overlapping subtitles are put on separate lines file_name, ext = os.path.splitext(first_file) combined_subtitle_file = file_name + '_combined.ass' # This is the master subtitle file combined_subs = combine_subs(pysubs2.load(first_file), pysubs2.load(second_file)) combined_subs.save(combined_subtitle_file) logger.info(f'Combined {first_file} and {second_file}') return combined_subtitle_file
def merge(file1, file2, outfile): subs1 = pysubs2.load(file1, encoding=charset_detect(file1)) subs2 = pysubs2.load(file2, encoding=charset_detect(file2)) for line in subs1: subs2.append(line) subs2.styles["Default"].fontsize = 14.0 subs2.styles["Default"].shadow = 0.5 subs2.styles["Default"].outline = 1.0 subs2.save(outfile)
def __init__(self, cn_srtfile=None): """Initialize subtitle object from empty srtfile Args: cn_srtfile (str): Chineses subtitle filename """ self._subs = pysubs2.load(self.__class__._srtfile) self._subs.info['PlayResX'] = 640 self._subs.info['PlayResY'] = 360 self._cn_subs = None if cn_srtfile: self._cn_subs = pysubs2.load(cn_srtfile)
def trans(file): try: subs = pysubs2.load(file) except UnicodeDecodeError: try: subs = pysubs2.load(file, 'gbk') except UnicodeDecodeError: subs = pysubs2.load(file, 'utf-16') subs.styles['Default'] = DEFAULT_STYLE subs.info['PlayResX'] = '384' subs.info['PlayResY'] = '288' subs.info['ScaledBorderAndShadow'] = 'no' for line in subs: line.text = convert(line.text, 'zh-cn') subs.save(output_file(file))
def check_integrity(subtitle: FFprobeSubtitleStream, path: str, sec_offset_threshold=900): """A relative check for the integriy of a file. This can be used to find a failed ffmpeg extraction where the final file might not be complete or might be corrupted. Currently, only ASS and Subrip are supported. :param subtitle: FFprobeSubtitle instance :param path: the path of the subtitle file (ass or srt) :param sec_offset_threshold: the maximum seconds offset to determine if the file is complete :raises: InvalidFile """ if subtitle.extension not in (ASS, SRT): raise InvalidFile(f"Extension not supported: {subtitle.extension}") try: sub = pysubs2.load(path) except (pysubs2.Pysubs2Error, UnicodeError, OSError, FileNotFoundError) as error: raise InvalidFile(error) from error else: off = abs(int(sub[-1].end) - subtitle.duration_ts) if off > abs(sec_offset_threshold) * 1000: raise InvalidFile( f"The last subtitle timestamp ({sub[-1].end/1000} sec) is {off/1000} sec ahead" f" from the subtitle stream total duration ({subtitle.duration} sec)" ) logger.debug("Integrity check passed (%d sec offset)", off / 1000)
def LoadSubs(): GUI.Clean() print("Select a subtitle file you wish to translate") file = GUI.LoadSubs(); if file != "": subs = pysubs2.load(file, encoding="utf-8") #load subs file #get the lang code language = GCloud.GetLanguageCode(subs) #list events for % GUI updater eventcount = len(subs.events) currentevent = 0 GUI.Clean() print(f"Loaded file with {eventcount} Events and a predicted language of {language}") input('Press Any key to select destination and start the translation...') GUI.Clean() print("Select Destination") dest = GUI.SaveSubs() if dest == "": return starttime = time.time() for a in subs.events: currentevent += 1 editedlist = list(filter(None, re.split(reg,a.text))) #get rid of None entries tt = [] for i in range(0,len(editedlist)): if not re.match(reg,str(editedlist[i])): #decide weather it's worth translating tt.append(i) a.text = (''.join(GCloud.TranslateSet(editedlist,tt,language,outlang))) #recompile the translated segments GUI.UpdateTranslation(str(round(currentevent/eventcount*100,1))+"% | "+str(round(time.time()-starttime),0)+" Seconds Elapsed") #update UI subs.save(dest) #save file
def main(args): video = args.video sub = args.sub styles = args.styles apkg = args.apkg name = args.name offset = args.offset crop = args.crop video_path = Path(video) subs_path = Path(sub or video_path.with_suffix(".ass")) apkg_path = Path(apkg or video_path.with_suffix(".apkg")) name = name or str(video_path.with_suffix("").name) tmp_path = video_path.with_suffix("") if not tmp_path.is_dir(): tmp_path.mkdir() subs = pysubs2.load(subs_path) notes, media_files = create_notes(subs, video_path, tmp_path, styles, offset, crop) deck = genanki.Deck(deck_id=random.randrange(1 << 30, 1 << 31), name=name) for note in notes: deck.add_note(note) apkg = genanki.Package(deck) apkg.media_files = media_files apkg.write_to_file(apkg_path)
def __init__(self, srtfile): """Initialize subtitle object from srtfile Args: srtfile (str): subtitle filename """ self._subs = pysubs2.load(srtfile)
def load(self, fn=None, content=None, language=None, encoding="utf-8"): """ :param encoding: used for decoding the content when fn is given, not used in case content is given :param language: babelfish.Language language of the subtitle :param fn: filename :param content: unicode :return: """ self.language = language self.initialized_mods = {} try: if fn: self.f = pysubs2.load(fn, encoding=encoding) elif content: self.f = pysubs2.SSAFile.from_string(content) except (IOError, UnicodeDecodeError, pysubs2.exceptions.UnknownFPSError, pysubs2.exceptions.UnknownFormatIdentifierError, pysubs2.exceptions.FormatAutodetectionError): if fn: logger.exception("Couldn't load subtitle: %s: %s", fn, traceback.format_exc()) elif content: logger.exception("Couldn't load subtitle: %s", traceback.format_exc()) return bool(self.f)
def half_ass(filename, delete=True): subs = pysubs2.load(filename) newname = filename + ".ass" subs.save(newname) if delete and os.path.isfile(filename): os.remove(filename) return newname
def file_to_lines_complex(i, file): lines = [] prev_quote = None subs = pysubs2.load(file, encoding="utf-8") for line in subs: if line.duration < 200: continue if line.style == 'Black and Red': # empirically found this style contains weird texts continue quote = line.text quote = quote.replace('\n', ' ').replace(r'\N', ' ').replace(r'\h', ' ') quote = re.sub(r'\{.*?\}', '', quote) if re.match('^m -?\d+(\.\d+)? -?\d+(\.\d+)?', quote): # those weird long numeric sequences continue if quote.endswith(' '): quote = quote[:-1] # conditioning for gpt-2 model if quote == '': continue quote = re.sub('\s+', ' ', quote).strip() # multiple whitespaces replaced by one quote = f'{i + 1}|{quote}' if prev_quote != quote: lines.append(quote) prev_quote = quote return lines
def load(self, fn=None, content=None, language=None, encoding="utf-8"): """ :param encoding: used for decoding the content when fn is given, not used in case content is given :param language: babelfish.Language language of the subtitle :param fn: filename :param content: unicode :return: """ if language: self.language = Language.rebuild(language, forced=False) self.initialized_mods = {} try: if fn: self.f = pysubs2.load(fn, encoding=encoding) elif content: self.f = pysubs2.SSAFile.from_string(content) except (IOError, UnicodeDecodeError, pysubs2.exceptions.UnknownFPSError, pysubs2.exceptions.UnknownFormatIdentifierError, pysubs2.exceptions.FormatAutodetectionError): if fn: logger.exception("Couldn't load subtitle: %s: %s", fn, traceback.format_exc()) elif content: logger.exception("Couldn't load subtitle: %s", traceback.format_exc()) return bool(self.f)
def to_srt(source: str, output: Optional[str] = None, remove_source: bool = False) -> str: """Convert a subtitle to SubRip. Currently, only ASS is supported. SubRip files will be silently ignored. raises: ConversionError, OSError""" if source.endswith(".srt"): return source split_path = os.path.splitext(source) if split_path[-1] not in (".ass"): raise ConversionError( f"No converter found for extension: {split_path[-1]}") from None output = output or f"{split_path[0]}.srt" try: parsed = pysubs2.load(source) parsed.save(output) except (pysubs2.Pysubs2Error, UnicodeError) as error: raise ConversionError( f"Exception converting {output}: {error}") from error logger.debug("Converted: %s", output) if remove_source and source != output: try: os.remove(source) except OSError as error: logger.debug("Can't remove source: %s (%s)", source, error) return output
def get_vsmod_improper_style(subtitle_filepath: str, input_encoding="utf-8") -> set: subs = pysubs2.load(subtitle_filepath, encoding=input_encoding) improper_style_name: str = "default" used_style_name_set: set = set() line_font_name_override_tag_re_exp: str = "^\\{[^\\{\\}]*?\\\\fn(?P<font_name>[^\\\\{\\}]+?)[^\\{\\}]*?\\}" line_font_name_override_tag_pattern = re.compile( line_font_name_override_tag_re_exp) for event_index in range(len(subs)): if subs[event_index].is_comment: continue if not line_font_name_override_tag_pattern.search( subs[event_index].text): used_style_name_set.add(subs[event_index].style) improper_style_set: set = set() if improper_style_name in used_style_name_set: improper_style_set.add(improper_style_name) return improper_style_set
async def fsub(ctx: commands.Context, *args): time = args[-1].strip() try: sec, mss = [int(item) for item in time.split(".")] except ValueError: raise InvalidRequest(f"Invalid timestamps: {time}") query = " ".join(args).replace(time, "") if is_episode(query): item = Episode.from_query(query) else: item = Movie.from_query(query) subs = pysubs2.load(item.subtitle) subs.shift(s=sec, ms=mss) await ctx.send(f"Shifted `{sec}s:{mss}ms`. Type `reset` to restore it.") try: msg = await bot.wait_for("message", timeout=60, check=_check_botmin) if "reset" in msg.content.lower().strip(): subs.shift(s=-sec, ms=-mss) await ctx.send("Restored.") except asyncio.TimeoutError: pass subs.save(item.subtitle) await ctx.send(f"Subtitles updated for `{item.pretty_title}`.")
def load(caption_path: Optional[str], audio_path: str) -> pysubs2.SSAFile: if not caption_path: caption_path = _probe(audio_path) if not caption_path: raise ValueError("Couldn't find caption file, please specify manually.") return pysubs2.load(caption_path)
def sync_subtitles(self, video_filename, subtitles): with tempfile.NamedTemporaryFile(delete=False, suffix='.srt') as tmp_unsynced: tmp_unsynced.write(subtitles.to_string('srt').encode()) tmp_unsynced.close() with tempfile.NamedTemporaryFile(suffix='.srt') as tmp_synced: tmp_synced.close() self.run_subsync(video_filename, tmp_unsynced.name, tmp_synced.name) return pysubs2.load(tmp_synced.name)
def handle_sub(sub_path, media_path, update): sub_encoding = detect_encoding(sub_path) sub = pysubs2.load(sub_path, sub_encoding) if update: new_sub = update_sub(sub) else: new_sub = sub return save_sub(new_sub, media_path, sub_path)
def merge_subs(sub_zh_path, sub_en_path): """ Merge Chinese and English subtitles into one, using styles to distinguish them """ sub_zh = pysubs2.load(sub_zh_path, detect_encoding(sub_zh_path)) sub_en = pysubs2.load(sub_en_path) sub_default = pysubs2.load("default.ass") sub_zh.rename_style("Default", "Chinese") sub_zh.import_styles(sub_default) sub_en.rename_style("Default", "English") sub_zh.events = sub_zh.events + sub_en.events sub_zh.sort() for e in sub_zh.events: e.plaintext = e.plaintext.replace("\n", " - ") sub_zh.info["ScaledBorderAndShadow"] = "no" return sub_zh
def wc(name, path): text = '' for i in range(len(os.listdir(path))): try: f = pysubs2.load(path + str(i + 1) + '.ass', encoding='utf-8') except: try: f = pysubs2.load(path + str(i + 1) + '.ass', encoding='utf-16') except: continue for j in f: text += j.text #text = open(path.join(d, '../data/constitution.ass')).read() word_jieba = jieba.cut(text, cut_all=False) word_split = ",".join(word_jieba) word_list = word_split.split(',') # trash1 = set(['be1','','fad','frz','pos','an7','fs18','Non','don','zipwinmax', # 'fscx50','NT000A','Rcat']) trash2 = set([ '好像', '应该', '可以', '知道', '不是', '所以', '没有', '想要', '什么', '因为', '就是', '还是', '这种', '时候', '但是', '这样', '这么', '事情', '的话', '不过', '觉得', '真是', '自己', '那个', '如果', '虽然', '不要', '那么', '不会', '那样', '哪里', '那里', '现在', '就算', '已经', '这个', '只是', '只要', '一个', '而且', '来说', '只有', '东西', '这些', '原来', '怎么', '似乎', '可是', '之后', '一样', '非常', '而已', '然后', '完全', '微软', '雅黑', '这里', '我们', '你们', '他们', '为了', '这是', '仪式', '时间轴', '翻译', '特效', '压制', '淅沥', '哗啦', '片源', '为什么', '后期' ]) # trash = trash1 | trash2 word_list = list(filter(lambda a: a not in trash2, word_list)) word_list = list(filter(lambda a: is_chinese(a), word_list)) word_split = ",".join(word_list) # Generate a word cloud image wordcloud = WordCloud(font_path='../data/simsun.ttc', width=1600, height=800).generate(word_split) # Display the generated image: # the matplotlib way: plt.imshow(wordcloud) plt.axis("off") plt.savefig(name + '.jpg', dpi=600)
def slice_dataset(yt_uri, left_align, right_align, out_stage): # Use vid as the diretory name for download and processing vids = parse_qs(urlparse(yt_uri).query, keep_blank_values=True).get('v') vid = None if vids == None else vids[0] v_dir = os.path.join(data_path, vid) out_dir = os.path.join(v_dir, out_stage) try: # Get information on the YouTube content yt = YouTube(yt_uri) # Filename for audio stream (.mp4) and subtitle (.srt) files audio = os.path.join(v_dir, vid + ".mp4") subtitle = os.path.join(v_dir, vid + ".srt") # Retrieving subtitle information audio_content = AudioSegment.from_file(audio, format='mp4') subtitle_content = pysubs2.load(subtitle) punctuation_filter = str.maketrans('', '', string.punctuation) os.makedirs(out_dir, exist_ok=True) # Writing to file for index, event in enumerate(subtitle_content): try: if event.text.translate(punctuation_filter) == "": continue ev_subtitle = os.path.join(out_dir, str(index).zfill(4) + '.txt') ev_audio = os.path.join(out_dir, str(index).zfill(4) + '.wav') ev_subtitle_file = open(ev_subtitle, 'w') ev_subtitle_file.write( event.text.translate(punctuation_filter)) ev_audio_content = audio_content[ max(0, event.start - left_align):min(event.end + right_align, len(audio_content))] ev_audio_content = ev_audio_content.set_channels(1) ev_audio_content.export(ev_audio, format='wav') except: exc_type, exc_obj, exc_tb = sys.exc_info() exc_file = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] print(exc_type, exc_file, exc_tb.tb_lineno) sys.exit(1) except: exc_type, exc_obj, exc_tb = sys.exc_info() exc_file = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] print(exc_type, exc_file, exc_tb.tb_lineno) sys.exit(1)
def subs(filename): # copies blank subtitle file and opens the new file for editing filename = os.path.split(filename) shutil.copy("Output/blank.ass", f"Output/{filename[1]}") if folder: subs = pysubs2.load(f"{filename[0]}/{filename[1]}", encoding="utf-8") else: subs = pysubs2.load(name, encoding="utf-8") newsubs = pysubs2.load(f"Output/{filename[1]}", encoding="utf-8") print(f"Working on {filename[1]}") for line in subs: if line.style in ignoredstyles: continue newsubs.append(line.copy()) if line.style not in newsubs.styles: newsubs.styles[line.style] = subs.styles[line.style].copy() newsubs.save(f"Output\{filename[1]}", encoding="utf-8")
def resync(name, second=15, encoding='utf-8'): if name.split('.')[-1] == 'srt': sub = pysrt.open(name, encoding=encoding) sub.shift(seconds=second) sub.save() if name.split('.')[-1] == 'ass': sub = pysubs2.load(name, encoding=encoding) sub.shift(s=second) sub.save(name)
def __convert_subtitle(source_file_path: str, source_ext: str, target_file_path: Optional[str], target_ext: str, format: str, frame_rate: Optional[float] = None) -> Tuple[str, str]: encoding = Utils.detect_encoding(source_file_path) subs = pysubs2.load(source_file_path, encoding=encoding) new_target_file_path = source_file_path.replace(".%s" % source_ext, ".%s" % target_ext) if target_file_path is None else target_file_path if frame_rate is None: subs.save(new_target_file_path, encoding=encoding, format_=format) else: subs.save(new_target_file_path, encoding=encoding, format_=format, fps=frame_rate) return new_target_file_path, encoding
def loadSRTASS(filepath, ts=0): data, error, warning = None, None, None subs = pysubs2.load(filepath) data = [{ "startTs": ts + sub.start / 1000.0, "stopTs": ts + sub.end / 1000.0, "label": sub.text } for sub in subs] return data, error, warning
def convert(): for srt_file in [x for x in Path(__file__).parent.glob('**/*.srt') if x.is_file()]: with open(srt_file, 'r', encoding='utf-8') as file_object: contents = file_object.readlines() # Check for repeating lines from auto-generated subtitles (use lines 3+10i) # or non-repeating subtitles from a manual upload (parse using srt library) if ( len(contents) > 40 and ((contents[3].strip() != '' and contents[3] == contents[7] and contents[7] == contents[12]) or (contents[13].strip() != '' and contents[13] == contents[17] and contents[17] == contents[22]) or (contents[23].strip() != '' and contents[23] == contents[27] and contents[27] == contents[32]) or (contents[33].strip() != '' and contents[33] == contents[37] and contents[37] == contents[42])) ): print(srt_file.name + ' is a repeating file') lines = [] i = 0 while 3+10*i < len(contents): lines.append(contents[3+10*i]) i += 1 times = [] i = 0 while i < len(lines): t = datetime.strptime(contents[1+10*i][:8], '%H:%M:%S') times.append(str(t.hour*3600 + t.minute*60 + t.second) + '\n') i += 1 else: print(srt_file.name + ' is a nonrepeating file') subtitles = pysubs2.load(srt_file, encoding="utf-8") lines = [] for i in subtitles: lines.append(str(i.text.replace('\n', ' ').replace(r'\N', ' ')) + '\n') times = [] for i in subtitles: times.append(str(int(i.start // 1000)) + '\n') # Make .en.cnt with lines then times with open(str(Path(__file__).parent) + '\\' + srt_file.name[:-7] + '.en.cnt', 'a') as cnt_file: for i in lines: cnt_file.write(i) with open(str(Path(__file__).parent) + '\\' + srt_file.name[:-7] + '.en.cnt', 'a') as cnt_file: for i in times: cnt_file.write(str(i))
def save_data(self, output_data): self.normal_lines = output_data full_data = self._combine_data() if self.file_format == "txt": with open(self.destination_path, 'w', encoding="utf-8") as output_file: output_file.write("\n".join(full_data)) elif self.file_format == "ass" or self.file_format == "srt": output_file = pysubs2.load(self.file_path) for line in output_file: line.text = full_data.pop(0) output_file.save(self.destination_path)
async def _run(self, main_window: QtWidgets.QMainWindow) -> None: path = load_dialog( main_window, "Subtitles (*.ass *.srt);;All files (*.*)" ) if not path: return source = pysubs2.load(str(path)) with self.api.undo.capture(): for line in source: self.api.subs.events.append( AssEvent(start=line.start, end=line.end, note=line.text) )
def tratamento_legendas_crunchroll(dir_trabalho=None, dir_legenda=None, dir_backup='Legendas Originais', extensao_legenda='.ass'): for arquivo_de_legenda in dir_legenda: if arquivo_de_legenda.endswith(extensao_legenda): subs = pysubs2.load(dir_trabalho + '/' + dir_backup + '/' + arquivo_de_legenda, encoding="utf-8") corrigi_estilos_crunchroll(subs) # cheque_fontes_instaladas(subs) subs.save(dir_trabalho + '/' + arquivo_de_legenda)
def corrigi_estilos_subs(temp_arq_de_legenda, temp_dir_salvar, temp_nome_salvar): subs = pysubs2.load(temp_arq_de_legenda, encoding="utf-8") novas_fontes_estilos = CONFIG["fontesEstilos"] for nome_estilo, estilo in zip(subs.styles.keys(), subs.styles.values()): try: estilo.fontname = CONFIG["fontesEstilos"][nome_estilo] except: estilo.fontname = CONFIG["fontePadrao"] subs.save(temp_dir_salvar + '/' + temp_nome_salvar)
def process_path(self, input_path): #import time; time.sleep(1) # XXX subs = pysubs2.load(input_path, self.input_encoding) for f in self.steps: f(subs) output_path = self.get_output_path(input_path, subs) subs.save(output_path, self.output_encoding, self.output_format or subs.format, self.output_fps) return output_path
def loadTemplate(): global subs subs = pysubs2.load("template.srt"); del subs[0:2]
import pysubs2 subsJ = pysubs2.load("ja.ass", encoding="utf-8") subsE = pysubs2.load("eng.ssa", encoding="utf-8") #subs.shift(s=2.5) #for line in subs: # line.text = "{\\be1}" + line.text #subs.save("my_subtitles_edited.ass") # Visualize enumerate #print(list(enumerate("iamtheverymodelofamodernmajorgeneral"))) currentJ = 0 currentE = 0 offset_tolerance = 1000 doubleUp_tolerance = 1500 def getIndexOffset(subsJ, subsE): for idx_j, item_j in enumerate(subsJ): for idx_e, item_e in enumerate(subsE): if abs(item_j.start - item_e.start) < offset_tolerance: print "Found Offset! X: {} Y: {}".format(idx_j, idx_e) return idx_j, idx_e def isBlank(offset=0): global currentJ, currentE # Checks if the current fields are malformed or blank if (subsJ[currentJ + offset].start == 0 or subsJ[currentJ + offset].text == ''): #print "J BLANK SPOT - INCREMENT J" currentJ = currentJ + 1 return 1 if (subsE[currentE + offset].start == 0 or subsE[currentE + offset].text == ''):