예제 #1
0
def main(argv=None, log_ger=None):
    if log_ger is None:
        log_ger = create_log(log_name="subtitle", level=logging.INFO)

    fname = None

    start_dtime = datetime.now()
    # print("Start time: "+str(start_dtime))#.strftime("%Y-%m-%d %H:%M:%S"))
    print()
    # sub=Subtitle(logging.getLogger())
    sub = Subtitle(log_ger)

    try:
        opts, args = getopt.getopt(
            argv,
            "hvf:w:t:d:e:p:s:b:?lm:WDc",
            ["help", "version", "parse",
             "checkup" "file=", "word=",
             "type=", "dir=", "pickle=",
             "limit=", "section=", "bigger="])
        # print opts, args
        log_ger.info("opts:{0};args:{1}".format(opts, args))
    except getopt.GetoptError as msg:
        print("error happened when get options!!! error:{0}".format(msg))
        usage()
        log_ger.error("getopt.GetoptError:{0}, exit!".format(msg))
        sys.exit(2)
    except Exception as msg:
        log_ger.error("error:{0}, exit!".format(msg))
        sys.exit(2)

    _is_lines_show = False
    _is_words_show = False
    sub_type = ""
    words_limit = None
    for opt, arg in opts:
        if opt in ("-?", "-h", "--help"):
            usage()
            sys.exit()
            pass
        elif opt in ("-v", "--version"):
            version()
            sys.exit()
            pass
        elif opt in ("-b", "--bigger"):
            sub.set_times_bigger(int(arg))
            pass
        elif opt in ("-c", "--checkup"):
            sub.checkup = True
            pass
        elif opt in ("-d", "--dir"):
            print("Sorry, -d --dir option still not offer")
            sys.exit()
            pass
        elif opt in ("-e", "--excel"):
            pkl = arg
            sub.set_lexicon_file(pkl)
            pass
        elif opt in ("-s", "--section"):
            if ',' in arg:
                section = arg.split(',')
                if len(section) == 2:
                    # print(section)
                    start, end = section
                    if len(start) != 0:
                        sub.set_start(int(start))
                    if len(end) != 0:
                        sub.set_end(int(end))
                    # print(start, end)
                else:
                    print("something wrong, with option -s --section:", arg)
                    sys.exit()
            else:
                print("something wrong, with option -s --section:", arg)
                sys.exit()

            pass
        elif opt in ('-f', "--file"):
            fname = arg
            sub.add_file(fname)
            pass
        elif opt in ('-p', "--parse"):
            sub.set_parse(True)
            pass

        elif opt == '-D':
            log_ger.setLevel(logging.DEBUG)
            sub.set_logger(log_ger)
            sub.set_debug(True)
            pass
        elif opt in ("-w", "--word"):
            word = arg
            sub.add_word(word)
            # 多用于测试,放弃写入
            sub.set_output(False)
            pass
        elif opt in ("-t", "--type"):
            sub_type = arg
            if sub_type not in ('save', 'scan', 'cloud'):
                usage()
                sys.exit()
                pass
            pass
        elif opt in ("-m", "--limit"):
            words_limit = int(arg)
            # print words_limit
            _is_words_show = True
            pass
        elif opt == '-l':
            # show lines
            _is_lines_show = True
            pass
        elif opt == '-W':
            # show words
            _is_words_show = True
            pass

    """
    if(len(sys.argv)<2):
      print "need args!!"
      log_ger.error("need args!!sys.argv:{0}".format(sys.argv))
      return None
      pass
    """
    # print sys.argv

    # sub.add_punctuation([',','!',';','.',':','>','<'])
    # sub.addLexicon(["hello", "world"])

    if sub.lexicon_path is None:
        sub.set_lexicon_file("lexicon/lexicon.xlsx")
    sub.load_old_data()

    sub.add_files(args)
    # sub.add_strings("hello world, I'm wang. Please call me wang.")

    sub.check_all(encode='utf-8')

    if _is_lines_show:
        sub.lines_show(words_limit)
        pass

    if _is_words_show:
        # print words_limit
        sub.words_show(words_limit)
        pass
    sub.show()

    if sub_type == 'save':
        sub.dump_data()
    elif sub_type == 'cloud':
        sub.cloud()

    print()
    end_dtime = datetime.now()
    # print("End time: "+str(end_dtime))
    timedelta = end_dtime - start_dtime
    print("Cost time: " + str(timedelta))

    # getChecksum(sys.argv[1])
    pass
예제 #2
0
class TransferBak(object):
    """
    # save srt to srt.bak
    # transfer all srt.bak files
    # mv wordch.srt to srt
    # create m3u from sub srt
    # create and cat word.txt
    """

    def __init__(self, logger=None, log_level=logging.INFO):
        classname = type(self).__name__
        # print("classname: ", classname)
        if logger is None:
            self.logger = create_log(log_name=classname, level=log_level)
        else:
            self.logger = logger

        self.logger.info("\n-----------------")
        self.logger.info("Begin to init")
        self.logger.info("\n-----------------")

        self.sub = Subtitle(self.logger)
        self.sub.set_parse(True)
        self.files = dict()
        self.filetypes = ['srt', 'bak', 'm3u']
        self.vediotypes = ['mkv', 'mp4', 'avi']
        self.rmsrt = False
        self.is_m3u_only = False

        if self.sub.lexicon_path is None:
            self.sub.set_lexicon_file("lexicon/lexicon.xlsx")

        pass

    def setrm(self):
        self.rmsrt = True

    def set_debug(self):
        self.logger.setLevel(logging.DEBUG)
        self.sub.set_logger(self.logger)
        self.sub.set_debug(True)

    def set_m3u_only(self):
        self.is_m3u_only = True

    def set_logger(self, logger):
        self.logger = logger
        pass

    # save srt to srt.bak
    def srt_to_baks(self):
        for i in self.files["srt"]:
            # mv srt to srt.bak
            srt = os.path.join(i[0], i[1])
            bak = srt + ".bak"
            base = os.path.splitext(srt)[0]

            if self.get_vedio_num() <= 0 :
                # 无视频的情况
                print("move " + srt + " to " + bak)
                shutil.move(srt, bak)
                pass
            else:
                for postfix in self.vediotypes:
                    vedio = ''
                    if base.endswith(".en"):
                        vedio = base[0:-3] + "." + postfix
                        bak = base[0:-3] + ".srt.bak"
                    else:
                        vedio = base + "." + postfix
                    if os.path.exists(vedio) and not os.path.exists(bak):
                        print("move " + srt + " to " + bak)
                        shutil.move(srt, bak)
        pass

    # get srt.bak file from mkv!
    def baks_mkv(self):
        failedfiles = []
        print("------------")
        print('get srt.bak file from vedio!')

        for postfix in self.vediotypes:
            if postfix in self.files:
                for fi, f in enumerate(self.files[postfix]):
                    mkvfile = os.path.join(f[0], f[1])
                    print("------------")
                    print(fi, mkvfile)
                    srtbak_path = os.path.splitext(mkvfile)[0] + ".srt.bak"
                    if os.path.exists(srtbak_path):
                        print(srtbak_path, "exists")
                        print("------------")
                        continue
                    print("1. Check streams in mkv!")
                    res = subprocess.run(["ffmpeg",
                                          "-i", mkvfile],
                                         stderr=subprocess.PIPE)
                    """
                    print(res)
                    print("------------")
                    print(res.stderr)
                    print(res.stdout)
                    print(res.args)
                    print("------------over")
                    """
                    beg = False
                    streams = []
                    stream = None
                    for i, v in enumerate(res.stderr.decode("utf-8").split("\n")):
                        if "Stream" in v:
                            beg = True
                            m = re.match(r" *Stream #(\d+):(\d+)(\(\w+\))?: (\w+): (\w+)", v)
                            if m:
                                # print(m.groups()[1:])  # c
                                if stream is not None:
                                    streams.append(stream)
                                stream = {"id": m.group(2), "type": m.group(4), "filetype": m.group(5)}
                                if m.group(3) is not None:
                                    # subhead 副标题
                                    stream['subhead'] = m.group(3)
                                continue
                            else:
                                print("err: reg not work on this sentence")
                                print(v)
                                sys.exit(2)
                        elif not beg:
                            continue
                        else:
                            m = re.match(r" *title *: (.*)", v)
                            if m:
                                stream['title'] = m.group(1)
                        pass
                    pass

                    streams_df = pd.DataFrame(streams)
                    print("streams_df:\n", streams_df)  # c
                    # print(streams_df.dtypes) # c
                    if len(streams_df) < 2 or len(streams_df[streams_df["type"] == "Subtitle"]) <= 0:
                        print("not have enough stream")
                        failedfiles.append(mkvfile)
                        continue

                    print("2. get srt.bak file from mkv!")

                    engchs = []
                    if 'title' in streams_df:
                        criterion = streams_df['title']. \
                            map(lambda t: re.match(r"(英.中)|(英中)|(中英字幕)", str(t)) is not None)
                        engchs = streams_df[criterion]
                    if len(engchs) > 0:
                        # todo: deal engchs
                        print("engchs:\n", engchs)

                        # print( os.path.splitext(mkvfile)[0] + ".srt.bak") # c
                        base = os.path.splitext(mkvfile)[0]
                        srt = base + ".srt"

                        if 'ass' == str(engchs.iloc[0].filetype):
                            ass = base + ".ass"

                            ass_file = open(ass)
                            srt_str = asstosrt.convert(ass_file)
                            f = open(srt, "w")
                            f.write(srt_str)
                            f.close()
                            """
                            subprocess.run(
                                ["mkvextract", "tracks",
                                 mkvfile,
                                 "{0}:{1}".format(
                                     str(engchs.iloc[0].id),
                                     ass
                                 )
                                 ])
                            time.sleep(2)
                            res = subprocess.run(["ffmpeg", "-i", ass, srt])
                            if os.path.getsize(srt) <= 10:
                                print(res)
                                print("------------")
                                print(res.stderr)
                                print(res.stdout)
                                print(res.args)
                                print("------------over")
                                sys.exit(8)
                            """
                            if os.path.exists(srt):
                                shutil.move(srt, srtbak_path)
                            else:
                                # todo: log err
                                print("error: {} failed to create!".format(srt))
                                failedfiles.append(mkvfile)
                            pass
                        elif 'subrip' == str(engchs.iloc[0].filetype):
                            print("filetype:", str(engchs.iloc[0].filetype))
                            subprocess.run(
                                ["mkvextract", "tracks",
                                 mkvfile,
                                 "{0}:{1}".format(
                                     str(engchs.iloc[0].id),
                                     srtbak_path
                                 )
                                 ])
                        pass
                    else:
                        criterion = streams_df['subhead']. \
                            map(lambda t: re.match(r"eng", str(t)) is not None)
                        eng = streams_df[
                            criterion &
                            (streams_df.type == "Subtitle")
                            ]
                        if len(eng) > 0:
                            # todo: deal eng
                            print("eng:\n", eng)
                            pass
                        else:
                            print('-------------')
                            print(f[1], "has no suitable subtitle stream!")
                            print(streams_df[streams_df.type == "Subtitle"])
                            failedfiles.append(mkvfile)
                            print('-------------')
                            pass

        print("files failed to create .srt.bak:\n")
        for f in failedfiles:
            print(f)
        print('-------------')
        pass

    # create and cat word.txt
    def words(self):
        pass

    def show_files(self):
        for k, v in self.files.items():
            print(k + ": {}".format(len(v)))
        pass

    def check_types(self, path):
        self.files = dict()
        for root, dirs, files in os.walk(path):
            for file in files:
                for postfix in (self.filetypes + self.vediotypes):
                    if file.endswith("." + postfix):
                        if postfix not in self.files:
                            self.files[postfix] = set()
                        self.files[postfix].add((root, file))
                        break
        self.show_files()
        pass

    def rm_m3u(self, path):
        if 'm3u' in self.files:
            for f in self.files['m3u']:
                p = os.path.join(f[0], f[1])
                os.remove(p)
                print(p + " was removed!")


    def rm_srt(self, path):
        if 'srt' in self.files:
            for f in self.files['srt']:
                p = os.path.join(f[0], f[1])
                # 没有bak的情况下不要删除原始srt, 悲剧过了
                if os.path.exists(p+".bak"):
                    os.remove(p)
                    print(p + " was removed!")
        self.rm_m3u(path)

        out = path+"/output.xlsx"
        if os.path.exists(out):
            os.remove(out)
            print(out + " was removed!")

    def get_vedio_num(self):
        num = 0
        for postfix in self.vediotypes:
            if postfix in self.files:
                num += len(self.files[postfix])
        return num

    def transfer_dir(self, path):
        self.logger.info("path: {}".format(path))
        self.check_types(path)
        if self.rmsrt:
            self.rm_srt(path)
            print("\nafter remove str and m3u files:")
            self.check_types(path)

        old_bak = set()
        if 'bak' in self.files:
            old_bak = copy.deepcopy(self.files["bak"])
        if self.get_vedio_num() > 0 and self.get_vedio_num() != len(old_bak):
            if "srt" in self.files and \
                    len(self.files["srt"]) > 0:
                self.srt_to_baks()
                pass
            print("------------")
            print("after move srt to bak")
            self.check_types(path)
            if "bak" in self.files:
                if self.get_vedio_num() > len(self.files["bak"]):
                    print("There is more vedio than bak!")
                    print("There is no enough .srt.bak!")
                    self.baks_mkv()
            elif self.get_vedio_num() > 0:
                    print("There is more vedio than bak(0)!")
                    print("There is no enough .srt.bak!")
                    self.baks_mkv()

            self.check_types(path)
            if "bak" in self.files and len(self.files["bak"]) == len(old_bak):
                print("baks num get no increase!")
            if 'bak' in self.files:
                old_bak = copy.deepcopy(self.files["bak"])

        elif len(old_bak) > 0:
            print(".srt.bak exists!")
            pass
        elif self.get_vedio_num() <= 0 :
            # 有字幕,无视频的情况, 继续执行,背背单词
            print("no .srt.bak at all!")
            if "srt" in self.files and \
                    len(self.files["srt"]) > 0:
                self.srt_to_baks()
                pass
            print("------------")
            print("after move srt to bak")
            self.check_types(path)
            if len(self.files["bak"]) == len(old_bak):
                print("baks num get no increase!")
            if 'bak' in self.files:
                old_bak = copy.deepcopy(self.files["bak"])
        else:
            print("no .srt.bak at all!")
            pass

        srt_cre = []
        if "srt" in self.files:
            srt_set = set([os.path.join(x[0], x[1]) for x in self.files["srt"]])
        else:
            srt_set = set()

        if len(old_bak) > 0:
            for f in self.files["bak"]:
                bak = os.path.join(f[0], f[1])
                bak_base = os.path.splitext(bak)[0]  # will get *.srt
                # print(bak_base)
                if bak_base not in srt_set:
                    srt_cre.append(bak)

        self.sub.load_old_data()
        self.sub.add_files(srt_cre)
        self.sub.check_all(encode='utf-8')
        # 在目标文件夹也放一份单词excel
        output_dir = "output/output.xlsx"
        if self.sub.words_len() > 0 and os.path.exists(output_dir):
            shutil.copy(output_dir, path)
            print("output.xlsx was copy to " + path)

        self.sub.show()

        failedfiles = []
        srt_files = []
        for f in srt_cre:
            base = os.path.splitext(f)[0]  # will get *.srt
            print("base:", base)
            wordsrt = base + "_word.srt"
            srt = base
            print(wordsrt)
            if os.path.exists(wordsrt):
                shutil.move(wordsrt, srt)
                srt_files.append(srt)
            else:
                failedfiles.append(srt_cre)

                print("files failed to create .srt_word.srt:\n")
                for f in failedfiles:
                    print(f)
                print('-------------')

        print("srt files created:\n")
        for f in srt_files:
            print(f)
        print('-------------')

        self.get_m3u(path)

    def get_m3u(self, path):
        self.check_types(path)
        old_m3u = set()
        if 'm3u' in self.files:
            old_m3u = copy.deepcopy(self.files['m3u'])
        else:
            old_m3u = []
        for postfix in self.vediotypes:
            if postfix in self.files:
                for f in self.files[postfix]:
                    vedio = os.path.join(f[0], f[1])
                    base = os.path.splitext(vedio)[0]  # will get * without .mkv
                    m3u = base + ".m3u"
                    srt = base + ".srt"

                    if not os.path.exists(m3u) and os.path.exists(srt):
                        print(vedio, "is getting m3u...")
                        set_m3u(vedio, self.logger)

        self.check_types(path)
        if 'm3u' in self.files and len(old_m3u) >= len(self.files['m3u']):
            print("old_m3u:\n", old_m3u)
            print("new_m3u:\n", self.files['m3u'])
        pass

    def transfer_paths(self, paths):
        self.logger.info("paths:{}".format(paths))
        # print(paths)  # for debug
        for p in paths:
            if os.path.isfile(p):
                print(
                    "should be dir, don't input file:" +
                    p)
            elif os.path.isdir(p):
                print("transfer dir:" + p)
                if self.is_m3u_only is True:
                    self.check_types(p)
                    self.rm_m3u(p)
                    self.get_m3u(p)
                else:
                    self.transfer_dir(p)
            else:
                print(p, "not exists!")
        pass