def main(argv=None, log_ger=None): if log_ger is None: log_ger = create_log(log_name="subtitle", level=logging.INFO) fname = None start_dtime = datetime.now() # print("Start time: "+str(start_dtime))#.strftime("%Y-%m-%d %H:%M:%S")) print() # sub=Subtitle(logging.getLogger()) sub = Subtitle(log_ger) try: opts, args = getopt.getopt( argv, "hvf:w:t:d:e:p:s:b:?lm:WDc", ["help", "version", "parse", "checkup" "file=", "word=", "type=", "dir=", "pickle=", "limit=", "section=", "bigger="]) # print opts, args log_ger.info("opts:{0};args:{1}".format(opts, args)) except getopt.GetoptError as msg: print("error happened when get options!!! error:{0}".format(msg)) usage() log_ger.error("getopt.GetoptError:{0}, exit!".format(msg)) sys.exit(2) except Exception as msg: log_ger.error("error:{0}, exit!".format(msg)) sys.exit(2) _is_lines_show = False _is_words_show = False sub_type = "" words_limit = None for opt, arg in opts: if opt in ("-?", "-h", "--help"): usage() sys.exit() pass elif opt in ("-v", "--version"): version() sys.exit() pass elif opt in ("-b", "--bigger"): sub.set_times_bigger(int(arg)) pass elif opt in ("-c", "--checkup"): sub.checkup = True pass elif opt in ("-d", "--dir"): print("Sorry, -d --dir option still not offer") sys.exit() pass elif opt in ("-e", "--excel"): pkl = arg sub.set_lexicon_file(pkl) pass elif opt in ("-s", "--section"): if ',' in arg: section = arg.split(',') if len(section) == 2: # print(section) start, end = section if len(start) != 0: sub.set_start(int(start)) if len(end) != 0: sub.set_end(int(end)) # print(start, end) else: print("something wrong, with option -s --section:", arg) sys.exit() else: print("something wrong, with option -s --section:", arg) sys.exit() pass elif opt in ('-f', "--file"): fname = arg sub.add_file(fname) pass elif opt in ('-p', "--parse"): sub.set_parse(True) pass elif opt == '-D': log_ger.setLevel(logging.DEBUG) sub.set_logger(log_ger) sub.set_debug(True) pass elif opt in ("-w", "--word"): word = arg sub.add_word(word) # 多用于测试,放弃写入 sub.set_output(False) pass elif opt in ("-t", "--type"): sub_type = arg if sub_type not in ('save', 'scan', 'cloud'): usage() sys.exit() pass pass elif opt in ("-m", "--limit"): words_limit = int(arg) # print words_limit _is_words_show = True pass elif opt == '-l': # show lines _is_lines_show = True pass elif opt == '-W': # show words _is_words_show = True pass """ if(len(sys.argv)<2): print "need args!!" log_ger.error("need args!!sys.argv:{0}".format(sys.argv)) return None pass """ # print sys.argv # sub.add_punctuation([',','!',';','.',':','>','<']) # sub.addLexicon(["hello", "world"]) if sub.lexicon_path is None: sub.set_lexicon_file("lexicon/lexicon.xlsx") sub.load_old_data() sub.add_files(args) # sub.add_strings("hello world, I'm wang. Please call me wang.") sub.check_all(encode='utf-8') if _is_lines_show: sub.lines_show(words_limit) pass if _is_words_show: # print words_limit sub.words_show(words_limit) pass sub.show() if sub_type == 'save': sub.dump_data() elif sub_type == 'cloud': sub.cloud() print() end_dtime = datetime.now() # print("End time: "+str(end_dtime)) timedelta = end_dtime - start_dtime print("Cost time: " + str(timedelta)) # getChecksum(sys.argv[1]) pass
class TransferBak(object): """ # save srt to srt.bak # transfer all srt.bak files # mv wordch.srt to srt # create m3u from sub srt # create and cat word.txt """ def __init__(self, logger=None, log_level=logging.INFO): classname = type(self).__name__ # print("classname: ", classname) if logger is None: self.logger = create_log(log_name=classname, level=log_level) else: self.logger = logger self.logger.info("\n-----------------") self.logger.info("Begin to init") self.logger.info("\n-----------------") self.sub = Subtitle(self.logger) self.sub.set_parse(True) self.files = dict() self.filetypes = ['srt', 'bak', 'm3u'] self.vediotypes = ['mkv', 'mp4', 'avi'] self.rmsrt = False self.is_m3u_only = False if self.sub.lexicon_path is None: self.sub.set_lexicon_file("lexicon/lexicon.xlsx") pass def setrm(self): self.rmsrt = True def set_debug(self): self.logger.setLevel(logging.DEBUG) self.sub.set_logger(self.logger) self.sub.set_debug(True) def set_m3u_only(self): self.is_m3u_only = True def set_logger(self, logger): self.logger = logger pass # save srt to srt.bak def srt_to_baks(self): for i in self.files["srt"]: # mv srt to srt.bak srt = os.path.join(i[0], i[1]) bak = srt + ".bak" base = os.path.splitext(srt)[0] if self.get_vedio_num() <= 0 : # 无视频的情况 print("move " + srt + " to " + bak) shutil.move(srt, bak) pass else: for postfix in self.vediotypes: vedio = '' if base.endswith(".en"): vedio = base[0:-3] + "." + postfix bak = base[0:-3] + ".srt.bak" else: vedio = base + "." + postfix if os.path.exists(vedio) and not os.path.exists(bak): print("move " + srt + " to " + bak) shutil.move(srt, bak) pass # get srt.bak file from mkv! def baks_mkv(self): failedfiles = [] print("------------") print('get srt.bak file from vedio!') for postfix in self.vediotypes: if postfix in self.files: for fi, f in enumerate(self.files[postfix]): mkvfile = os.path.join(f[0], f[1]) print("------------") print(fi, mkvfile) srtbak_path = os.path.splitext(mkvfile)[0] + ".srt.bak" if os.path.exists(srtbak_path): print(srtbak_path, "exists") print("------------") continue print("1. Check streams in mkv!") res = subprocess.run(["ffmpeg", "-i", mkvfile], stderr=subprocess.PIPE) """ print(res) print("------------") print(res.stderr) print(res.stdout) print(res.args) print("------------over") """ beg = False streams = [] stream = None for i, v in enumerate(res.stderr.decode("utf-8").split("\n")): if "Stream" in v: beg = True m = re.match(r" *Stream #(\d+):(\d+)(\(\w+\))?: (\w+): (\w+)", v) if m: # print(m.groups()[1:]) # c if stream is not None: streams.append(stream) stream = {"id": m.group(2), "type": m.group(4), "filetype": m.group(5)} if m.group(3) is not None: # subhead 副标题 stream['subhead'] = m.group(3) continue else: print("err: reg not work on this sentence") print(v) sys.exit(2) elif not beg: continue else: m = re.match(r" *title *: (.*)", v) if m: stream['title'] = m.group(1) pass pass streams_df = pd.DataFrame(streams) print("streams_df:\n", streams_df) # c # print(streams_df.dtypes) # c if len(streams_df) < 2 or len(streams_df[streams_df["type"] == "Subtitle"]) <= 0: print("not have enough stream") failedfiles.append(mkvfile) continue print("2. get srt.bak file from mkv!") engchs = [] if 'title' in streams_df: criterion = streams_df['title']. \ map(lambda t: re.match(r"(英.中)|(英中)|(中英字幕)", str(t)) is not None) engchs = streams_df[criterion] if len(engchs) > 0: # todo: deal engchs print("engchs:\n", engchs) # print( os.path.splitext(mkvfile)[0] + ".srt.bak") # c base = os.path.splitext(mkvfile)[0] srt = base + ".srt" if 'ass' == str(engchs.iloc[0].filetype): ass = base + ".ass" ass_file = open(ass) srt_str = asstosrt.convert(ass_file) f = open(srt, "w") f.write(srt_str) f.close() """ subprocess.run( ["mkvextract", "tracks", mkvfile, "{0}:{1}".format( str(engchs.iloc[0].id), ass ) ]) time.sleep(2) res = subprocess.run(["ffmpeg", "-i", ass, srt]) if os.path.getsize(srt) <= 10: print(res) print("------------") print(res.stderr) print(res.stdout) print(res.args) print("------------over") sys.exit(8) """ if os.path.exists(srt): shutil.move(srt, srtbak_path) else: # todo: log err print("error: {} failed to create!".format(srt)) failedfiles.append(mkvfile) pass elif 'subrip' == str(engchs.iloc[0].filetype): print("filetype:", str(engchs.iloc[0].filetype)) subprocess.run( ["mkvextract", "tracks", mkvfile, "{0}:{1}".format( str(engchs.iloc[0].id), srtbak_path ) ]) pass else: criterion = streams_df['subhead']. \ map(lambda t: re.match(r"eng", str(t)) is not None) eng = streams_df[ criterion & (streams_df.type == "Subtitle") ] if len(eng) > 0: # todo: deal eng print("eng:\n", eng) pass else: print('-------------') print(f[1], "has no suitable subtitle stream!") print(streams_df[streams_df.type == "Subtitle"]) failedfiles.append(mkvfile) print('-------------') pass print("files failed to create .srt.bak:\n") for f in failedfiles: print(f) print('-------------') pass # create and cat word.txt def words(self): pass def show_files(self): for k, v in self.files.items(): print(k + ": {}".format(len(v))) pass def check_types(self, path): self.files = dict() for root, dirs, files in os.walk(path): for file in files: for postfix in (self.filetypes + self.vediotypes): if file.endswith("." + postfix): if postfix not in self.files: self.files[postfix] = set() self.files[postfix].add((root, file)) break self.show_files() pass def rm_m3u(self, path): if 'm3u' in self.files: for f in self.files['m3u']: p = os.path.join(f[0], f[1]) os.remove(p) print(p + " was removed!") def rm_srt(self, path): if 'srt' in self.files: for f in self.files['srt']: p = os.path.join(f[0], f[1]) # 没有bak的情况下不要删除原始srt, 悲剧过了 if os.path.exists(p+".bak"): os.remove(p) print(p + " was removed!") self.rm_m3u(path) out = path+"/output.xlsx" if os.path.exists(out): os.remove(out) print(out + " was removed!") def get_vedio_num(self): num = 0 for postfix in self.vediotypes: if postfix in self.files: num += len(self.files[postfix]) return num def transfer_dir(self, path): self.logger.info("path: {}".format(path)) self.check_types(path) if self.rmsrt: self.rm_srt(path) print("\nafter remove str and m3u files:") self.check_types(path) old_bak = set() if 'bak' in self.files: old_bak = copy.deepcopy(self.files["bak"]) if self.get_vedio_num() > 0 and self.get_vedio_num() != len(old_bak): if "srt" in self.files and \ len(self.files["srt"]) > 0: self.srt_to_baks() pass print("------------") print("after move srt to bak") self.check_types(path) if "bak" in self.files: if self.get_vedio_num() > len(self.files["bak"]): print("There is more vedio than bak!") print("There is no enough .srt.bak!") self.baks_mkv() elif self.get_vedio_num() > 0: print("There is more vedio than bak(0)!") print("There is no enough .srt.bak!") self.baks_mkv() self.check_types(path) if "bak" in self.files and len(self.files["bak"]) == len(old_bak): print("baks num get no increase!") if 'bak' in self.files: old_bak = copy.deepcopy(self.files["bak"]) elif len(old_bak) > 0: print(".srt.bak exists!") pass elif self.get_vedio_num() <= 0 : # 有字幕,无视频的情况, 继续执行,背背单词 print("no .srt.bak at all!") if "srt" in self.files and \ len(self.files["srt"]) > 0: self.srt_to_baks() pass print("------------") print("after move srt to bak") self.check_types(path) if len(self.files["bak"]) == len(old_bak): print("baks num get no increase!") if 'bak' in self.files: old_bak = copy.deepcopy(self.files["bak"]) else: print("no .srt.bak at all!") pass srt_cre = [] if "srt" in self.files: srt_set = set([os.path.join(x[0], x[1]) for x in self.files["srt"]]) else: srt_set = set() if len(old_bak) > 0: for f in self.files["bak"]: bak = os.path.join(f[0], f[1]) bak_base = os.path.splitext(bak)[0] # will get *.srt # print(bak_base) if bak_base not in srt_set: srt_cre.append(bak) self.sub.load_old_data() self.sub.add_files(srt_cre) self.sub.check_all(encode='utf-8') # 在目标文件夹也放一份单词excel output_dir = "output/output.xlsx" if self.sub.words_len() > 0 and os.path.exists(output_dir): shutil.copy(output_dir, path) print("output.xlsx was copy to " + path) self.sub.show() failedfiles = [] srt_files = [] for f in srt_cre: base = os.path.splitext(f)[0] # will get *.srt print("base:", base) wordsrt = base + "_word.srt" srt = base print(wordsrt) if os.path.exists(wordsrt): shutil.move(wordsrt, srt) srt_files.append(srt) else: failedfiles.append(srt_cre) print("files failed to create .srt_word.srt:\n") for f in failedfiles: print(f) print('-------------') print("srt files created:\n") for f in srt_files: print(f) print('-------------') self.get_m3u(path) def get_m3u(self, path): self.check_types(path) old_m3u = set() if 'm3u' in self.files: old_m3u = copy.deepcopy(self.files['m3u']) else: old_m3u = [] for postfix in self.vediotypes: if postfix in self.files: for f in self.files[postfix]: vedio = os.path.join(f[0], f[1]) base = os.path.splitext(vedio)[0] # will get * without .mkv m3u = base + ".m3u" srt = base + ".srt" if not os.path.exists(m3u) and os.path.exists(srt): print(vedio, "is getting m3u...") set_m3u(vedio, self.logger) self.check_types(path) if 'm3u' in self.files and len(old_m3u) >= len(self.files['m3u']): print("old_m3u:\n", old_m3u) print("new_m3u:\n", self.files['m3u']) pass def transfer_paths(self, paths): self.logger.info("paths:{}".format(paths)) # print(paths) # for debug for p in paths: if os.path.isfile(p): print( "should be dir, don't input file:" + p) elif os.path.isdir(p): print("transfer dir:" + p) if self.is_m3u_only is True: self.check_types(p) self.rm_m3u(p) self.get_m3u(p) else: self.transfer_dir(p) else: print(p, "not exists!") pass