def _get_subtitles_from_single_file(self, fn): sta = self.fix_path(fn) # The nested try is from the pysubs examples, # https://github.com/tigr42/pysubs/blob/ # master/examples/chapter_gen.py try: return pysubs.load(sta) except pysubs.exceptions.EncodingDetectionError: try: return pysubs.load(sta, self.default_sub_encoding) except UnicodeDecodeError: with open(sta) as fp: subs = pysubs.SSAFile() subs.from_str(unicode(fp.read(), encoding=self.default_sub_encoding, errors="replace")) return subs
def _get_subtitles_from_single_file(self, fn): sta = self.fix_path(fn) # The nested try is from the pysubs examples, # https://github.com/tigr42/pysubs/blob/ # master/examples/chapter_gen.py try: return pysubs.load(sta) except pysubs.exceptions.EncodingDetectionError: try: return pysubs.load(sta, self.default_sub_encoding) except UnicodeDecodeError: with open(sta) as fp: subs = pysubs.SSAFile() subs.from_str( unicode(fp.read(), encoding=self.default_sub_encoding, errors="replace")) return subs
import atexit atexit.register(exit_handler) #################################################### # # 从字幕文件里取一行字幕. 待会我们翻译它. # ###################################################### subs = pysubs.load(file_full_path, encoding='utf-8') # 打开字幕文件 e_txt = subs[4].text.replace('\\N', ' ') # 获取一行字幕. 并且把换行去掉, 因为英文字幕有时会分成几行. # 这个换行符也拿去翻译的话后面的处理会很麻烦. 所以直接删掉. # 这里的数字4可任意替换 print (e_txt) print() # 把原文输出来. ################################
里的全角符号换成半角 此程序需要和 Translate_Whole_SubtitleFile.py 配合使用. 因为 Translate_Whole_SubtitleFile.py 在翻译完字幕后 会保存到新文件里, 而且新文件的文件名会多一个(done) ''' # encoding=utf-8 import glob import pysubs filename_list = glob.glob("(done)*.srt") for filename in filename_list: subs = pysubs.load(filename, encoding='utf-8') for line in subs: line.text = line.text.replace(',', ', ') line.text = line.text.replace('。', '. ') line.text = line.text.replace('!', '! ') line.text = line.text.replace('’', "'") line.text = line.text.replace(':', ': ') line.text = line.text.replace('?', '? ') subs.save(filename) print("done") input()
return p.sub(r'\1 \2', string) #== (start) 2. 处理逗号 句号 数字问题(字幕文件) == ugly = pysubs.load(need_split_path, encoding="utf-8") # 打开字幕文件 # 循环每行字幕 for line in ugly: temp = line.text.split('\\N') # 有些英文字幕是两行的, 用了杠N隔开, 我们按照换行符号分割成列表. # temp 现在是个列表 # 循环这个列表, index是索引, one是内容 for index,one in enumerate(temp): temp[index] = Handle_Number(one) temp[index] = Handle_Period(temp[index])
########################## # # 正式开始处理字幕文件 # ########################## try: subs = pysubs.load(filepath, encoding='utf-8') except: print('字幕文件打开失败 程序退出') for line in subs: try: whole_Sentence = line.text l = whole_Sentence.split(r'\N') l[0] = l[0].replace(',', ' ') l[0] = l[0].replace('.', ' ') l[0] = l[0].replace(',', ' ') l[0] = l[0].replace('。', ' ') except: pass
try: print (sys.argv[1]) # 输出拖入的文件的路径 file_path = sys.argv[1] except: print('请拖入需要删除第一行字幕的字幕文件. 不要直接运行. 按[回车键]退出程序') input() exit() # 如果不是拖入字幕文件来运行 就提示需要拖入文件 subs = pysubs.load(file_path, encoding="utf-8") for line in subs: text = line.text try: text = text.split(r'\N') if len(text) == 2: text = str(text[1]) else: text = str(text[0]) line.text = text except: pass subs.save(file_path)
time_elapsed = '' if hours > 0: time_elapsed += str(hours) + ' hrs ' if minutes > 0: time_elapsed += str(minutes) + ' min ' if seconds > 0: time_elapsed += str(seconds) + ' sec' with codecs.open('subtitles.srt', 'a', 'utf-8-sig') as s: s.write(str(i + 1)) s.write('\n') s.write( str(substart)[11:] + ',000 --> ' + str(subend)[11:] + ',000\n') s.write(time_elapsed + ' skipped \n' + times[i][2] + '\n \n') s.close() subs = pysubs.load('subtitles.srt') subs.save('subtitles.ass') subtitle = FFmpeg(inputs={args.i: None}, outputs={'subtitled.mp4': '-vf "ass=subtitles.ass"'}) subtitle.run() for i in range(len(starts)): name = str(i) + '.mp4' if i == 0: a = 'w' else: a = 'a' with open('videos.txt', a) as v: v.write("file '" + name + "'\n") v.close()
def __init__(self, filepath): """打开字幕 filepath 准备处理""" self._filepath = filepath self._subs = pysubs.load(filepath) self._timemap = None self._timedelta = None