Example #1
0
def movie_lists(source_folder, regexstr: str) -> typing.List[str]:
    conf = config.getInstance()
    main_mode = conf.main_mode()
    debug = conf.debug()
    nfo_skip_days = conf.nfo_skip_days()
    link_mode = conf.link_mode()
    file_type = conf.media_type().lower().split(",")
    trailerRE = re.compile(r'-trailer\.', re.IGNORECASE)
    cliRE = None
    if isinstance(regexstr, str) and len(regexstr):
        try:
            cliRE = re.compile(regexstr, re.IGNORECASE)
        except:
            pass
    failed_list_txt_path = Path(
        conf.failed_folder()).resolve() / 'failed_list.txt'
    failed_set = set()
    if (main_mode == 3 or link_mode) and not conf.ignore_failed_list():
        try:
            flist = failed_list_txt_path.read_text(
                encoding='utf-8').splitlines()
            failed_set = set(flist)
            if len(flist) != len(
                    failed_set
            ):  # 检查去重并写回,但是不改变failed_list.txt内条目的先后次序,重复的只保留最后的
                fset = failed_set.copy()
                for i in range(len(flist) - 1, -1, -1):
                    fset.remove(flist[i]) if flist[i] in fset else flist.pop(i)
                failed_list_txt_path.write_text('\n'.join(flist) + '\n',
                                                encoding='utf-8')
                assert len(fset) == 0 and len(flist) == len(failed_set)
        except:
            pass
    if not Path(source_folder).is_dir():
        print('[-]Source folder not found!')
        return []
    total = []
    source = Path(source_folder).resolve()
    skip_failed_cnt, skip_nfo_days_cnt = 0, 0
    escape_folder_set = set(re.split("[,,]", conf.escape_folder()))
    for full_name in source.glob(r'**/*'):
        if main_mode != 3 and set(full_name.parent.parts) & escape_folder_set:
            continue
        if not full_name.suffix.lower() in file_type:
            continue
        absf = str(full_name)
        if absf in failed_set:
            skip_failed_cnt += 1
            if debug:
                print('[!]Skip failed movie:', absf)
            continue
        is_sym = full_name.is_symlink()
        if main_mode != 3 and (is_sym or (full_name.stat().st_nlink > 1
                                          and not conf.scan_hardlink())
                               ):  # 短路布尔 符号链接不取stat(),因为符号链接可能指向不存在目标
            continue  # 模式不等于3下跳过软连接和未配置硬链接刮削
        # 调试用0字节样本允许通过,去除小于120MB的广告'苍老师强力推荐.mp4'(102.2MB)'黑道总裁.mp4'(98.4MB)'有趣的妹子激情表演.MP4'(95MB)'有趣的臺灣妹妹直播.mp4'(15.1MB)
        movie_size = 0 if is_sym else full_name.stat(
        ).st_size  # 同上 符号链接不取stat()及st_size,直接赋0跳过小视频检测
        if 0 < movie_size < 125829120:  # 1024*1024*120=125829120
            continue
        if cliRE and not cliRE.search(absf) or trailerRE.search(
                full_name.name):
            continue
        if main_mode == 3:
            nfo = full_name.with_suffix('.nfo')
            if not nfo.is_file():
                if debug:
                    print(f"[!]Metadata {nfo.name} not found for '{absf}'")
            elif nfo_skip_days > 0 and file_modification_days(
                    nfo) <= nfo_skip_days:
                skip_nfo_days_cnt += 1
                if debug:
                    print(
                        f"[!]Skip movie by it's .nfo which modified within {nfo_skip_days} days: '{absf}'"
                    )
                continue
        total.append(absf)

    if skip_failed_cnt:
        print(
            f"[!]Skip {skip_failed_cnt} movies in failed list '{failed_list_txt_path}'."
        )
    if skip_nfo_days_cnt:
        print(
            f"[!]Skip {skip_nfo_days_cnt} movies in source folder '{source}' who's .nfo modified within {nfo_skip_days} days."
        )
    if nfo_skip_days <= 0 or not link_mode or main_mode == 3:
        return total
    # 软连接方式,已经成功削刮的也需要从成功目录中检查.nfo更新天数,跳过N天内更新过的
    skip_numbers = set()
    success_folder = Path(conf.success_folder()).resolve()
    for f in success_folder.glob(r'**/*'):
        if not re.match(r'\.nfo$', f.suffix, re.IGNORECASE):
            continue
        if file_modification_days(f) > nfo_skip_days:
            continue
        number = get_number(False, f.stem)
        if not number:
            continue
        skip_numbers.add(number.lower())

    rm_list = []
    for f in total:
        n_number = get_number(False, os.path.basename(f))
        if n_number and n_number.lower() in skip_numbers:
            rm_list.append(f)
    for f in rm_list:
        total.remove(f)
        if debug:
            print(
                f"[!]Skip file successfully processed within {nfo_skip_days} days: '{f}'"
            )
    if len(rm_list):
        print(
            f"[!]Skip {len(rm_list)} movies in success folder '{success_folder}' who's .nfo modified within {nfo_skip_days} days."
        )

    return total
Example #2
0
def main(args: tuple) -> Path:
    (single_file_path, custom_number, logdir, regexstr, zero_op,
     no_net_op) = args
    conf = config.getInstance()
    main_mode = conf.main_mode()
    folder_path = ""
    if main_mode not in (1, 2, 3):
        print(
            f"[-]Main mode must be 1 or 2 or 3! You can run '{os.path.basename(sys.argv[0])} --help' for more help."
        )
        os._exit(4)

    signal.signal(signal.SIGINT, signal_handler)
    if sys.platform == 'win32':
        signal.signal(signal.SIGBREAK, sigdebug_handler)
    else:
        signal.signal(signal.SIGWINCH, sigdebug_handler)
    dupe_stdout_to_logfile(logdir)

    platform_total = str(' - ' + platform.platform() + ' \n[*] - ' +
                         platform.machine() + ' - Python-' +
                         platform.python_version())

    print('[*]================= Movie Data Capture =================')
    print('[*]' + version.center(54))
    print('[*]======================================================')
    print('[*]' + platform_total)
    print('[*]======================================================')
    print('[*] - 严禁在墙内宣传本项目 - ')
    print('[*]======================================================')

    start_time = time.time()
    print('[+]Start at', time.strftime("%Y-%m-%d %H:%M:%S"))

    print(f"[+]Load Config file '{conf.ini_path}'.")
    if conf.debug():
        print('[+]Enable debug')
    if conf.link_mode() in (1, 2):
        print('[!]Enable {} link'.format(
            ('soft', 'hard')[conf.link_mode() - 1]))
    if len(sys.argv) > 1:
        print('[!]CmdLine:', " ".join(sys.argv[1:]))
    print('[+]Main Working mode ## {}: {} ## {}{}{}'.format(
        *(main_mode,
          ['Scraping', 'Organizing', 'Scraping in analysis folder'][main_mode -
                                                                    1],
          "" if not conf.multi_threading() else ", multi_threading on",
          "" if conf.nfo_skip_days() ==
          0 else f", nfo_skip_days={conf.nfo_skip_days()}",
          "" if conf.stop_counter() ==
          0 else f", stop_counter={conf.stop_counter()}"
          ) if not single_file_path else ('-', 'Single File', '', '', '')))

    if conf.update_check():
        try:
            check_update(version)
        except Exception as e:
            print('[-]Update check failed!', e)

    create_failed_folder(conf.failed_folder())

    # Download Mapping Table, parallel version
    def fmd(f) -> typing.Tuple[str, Path]:
        """

        """
        return (
            'https://raw.githubusercontent.com/yoshiko2/Movie_Data_Capture/master/MappingTable/'
            + f, Path.home() / '.local' / 'share' / 'mdc' / f)

    map_tab = (fmd('mapping_actor.xml'), fmd('mapping_info.xml'),
               fmd('c_number.json'))
    for k, v in map_tab:
        if v.exists():
            if file_modification_days(str(v)) >= conf.mapping_table_validity():
                print("[+]Mapping Table Out of date! Remove", str(v))
                os.remove(str(v))
    try:
        res = parallel_download_files(
            ((k, v) for k, v in map_tab if not v.exists()))
        for i, fp in enumerate(res, start=1):
            if fp and len(fp):
                print(f"[+] [{i}/{len(res)}] Mapping Table Downloaded to {fp}")
            else:
                print(f"[-] [{i}/{len(res)}] Mapping Table Download failed")
    except Exception as e:
        print("[!] ==================== ERROR ====================")
        print("[!] " + "Mapping Table Download FAILED".center(47))
        print("[!] " + "无法连接github".center(47))
        print("[!] " + "请过几小时再试试".center(47))
        print("[-] " + "------ AUTO EXIT AFTER 30s !!! ------ ".center(47))
        time.sleep(30)
        os._exit(-1)

    # create OpenCC converter
    ccm = conf.cc_convert_mode()
    try:
        oCC = None if ccm == 0 else OpenCC('t2s.json' if ccm ==
                                           1 else 's2t.json')
    except:
        # some OS no OpenCC cpython, try opencc-python-reimplemented.
        # pip uninstall opencc && pip install opencc-python-reimplemented
        oCC = None if ccm == 0 else OpenCC('t2s' if ccm == 1 else 's2t')

    if not single_file_path == '':  # Single File
        print('[+]==================== Single File =====================')
        if custom_number == '':
            create_data_and_move_with_custom_number(
                single_file_path,
                get_number(conf.debug(), os.path.basename(single_file_path)),
                oCC)
        else:
            create_data_and_move_with_custom_number(single_file_path,
                                                    custom_number, oCC)
    else:
        folder_path = conf.source_folder()
        if not isinstance(folder_path, str) or folder_path == '':
            folder_path = os.path.abspath(".")

        movie_list = movie_lists(folder_path, regexstr)

        count = 0
        count_all = str(len(movie_list))
        print('[+]Find', count_all, 'movies.')
        print('[*]======================================================')
        stop_count = conf.stop_counter()
        if stop_count < 1:
            stop_count = 999999
        else:
            count_all = str(min(len(movie_list), stop_count))

        for movie_path in movie_list:  # 遍历电影列表 交给core处理
            count = count + 1
            percentage = str(count / int(count_all) * 100)[:4] + '%'
            print('[!] {:>30}{:>21}'.format(
                '- ' + percentage + ' [' + str(count) + '/' + count_all +
                '] -', time.strftime("%H:%M:%S")))
            create_data_and_move(movie_path, zero_op, no_net_op, oCC)
            if count >= stop_count:
                print("[!]Stop counter triggered!")
                break

    if conf.del_empty_folder() and not zero_op:
        rm_empty_folder(conf.success_folder())
        rm_empty_folder(conf.failed_folder())
        if len(folder_path):
            rm_empty_folder(folder_path)

    end_time = time.time()
    total_time = str(timedelta(seconds=end_time - start_time))
    print("[+]Running time",
          total_time[:len(total_time) if total_time.rfind('.') < 0 else -3],
          " End at", time.strftime("%Y-%m-%d %H:%M:%S"))

    print("[+]All finished!!!")

    return close_logfile(logdir)
Example #3
0
def main(args: tuple) -> Path:
    (logdir, regexstr, zero_op, no_net_op, verrel, cpuinfo) = args
    conf = config.getInstance()
    main_mode = conf.main_mode()
    folder_path = ""
    if main_mode not in (1, 2, 3):
        print(
            f"[-]Main mode must be 1 or 2 or 3! You can run '{os.path.basename(sys.argv[0])} --help' for more help."
        )
        os._exit(4)

    signal.signal(signal.SIGINT, signal_handler)
    if sys.platform == 'win32':
        signal.signal(signal.SIGBREAK, sigdebug_handler)
    else:
        signal.signal(signal.SIGWINCH, sigdebug_handler)
    dupe_stdout_to_logfile(logdir)

    x86_64_cpu = cpuinfo['arch'] == 'X86_64'
    avx_cpu = x86_64_cpu and 'avx' in cpuinfo['flags']
    if x86_64_cpu and not avx_cpu:
        conf.set_override('face:locations_model=')

    running_env_info = f"""
[*]     OS: {platform.platform()}
[*]    CPU: {cpuinfo['brand_raw']}{' (avx)' if avx_cpu else ''}
[*] Python: {cpuinfo['python_version']}""".lstrip()

    print('[*]================== AV Data Capture ===================')
    print('[*]' + verrel.center(54))
    print('[*]======================================================')
    print(running_env_info)
    print('[*]======================================================')
    print('[*] - 严禁在墙内宣传本项目 - ')
    print('[*]======================================================')

    start_time = time.time()
    print('[+]Start at', time.strftime("%Y-%m-%d %H:%M:%S"))

    print(f"[+]Load Config file '{conf.ini_path}'.")
    if conf.debug():
        print('[+]Enable debug')
    if conf.link_mode() in (1, 2):
        print('[!]Enable {} link'.format(
            ('soft', 'hard')[conf.link_mode() - 1]))
    if len(sys.argv) > 1:
        print('[!]CmdLine:', " ".join(sys.argv[1:]))
    print('[+]Main Working mode ## {}: {} ## {}{}{}'.format(
        *(main_mode,
          ['Scraping', 'Organizing', 'Scraping in analysis folder'][main_mode -
                                                                    1],
          "" if not conf.multi_threading() else ", multi_threading on",
          "" if conf.nfo_skip_days() ==
          0 else f", nfo_skip_days={conf.nfo_skip_days()}",
          "" if conf.stop_counter() ==
          0 else f", stop_counter={conf.stop_counter()}")))

    if conf.update_check():
        try:
            check_update(version)
        except Exception as e:
            print('[-]Update check failed!', e)

    create_failed_folder(conf.failed_folder())

    # Download Mapping Table, parallel version
    def fmd(f) -> typing.Tuple[str, Path]:
        """

        """
        return (
            'https://raw.githubusercontent.com/yoshiko2/Movie_Data_Capture/master/MappingTable/'
            + f, Path.home() / '.local' / 'share' / 'avdc' / f)

    map_tab = (fmd('mapping_actor.xml'), fmd('mapping_info.xml'),
               fmd('c_number.json'))
    for k, v in map_tab:
        if v.exists():
            if file_modification_days(str(v)) >= conf.mapping_table_validity():
                print("[+]Mapping Table Out of date! Remove", str(v))
                os.remove(str(v))
    try:
        res = parallel_download_files(
            ((k, v) for k, v in map_tab if not v.exists()))
        for i, fp in enumerate(res, start=1):
            if fp and len(fp):
                print(f"[+] [{i}/{len(res)}] Mapping Table Downloaded to {fp}")
            else:
                print(f"[-] [{i}/{len(res)}] Mapping Table Download failed")
    except Exception as e:
        print("[!] ==================== ERROR ====================")
        print("[!] " + "Mapping Table Download FAILED".center(47))
        print("[!] " + "无法连接github".center(47))
        print("[!] " + "请过几小时再试试".center(47))
        print("[!]", e)
        print("[-] " + "------ AUTO EXIT AFTER 30s !!! ------ ".center(47))
        time.sleep(30)
        os._exit(-1)

    # create OpenCC converter
    ccm = conf.cc_convert_mode()
    try:
        oCC = None if ccm == 0 else OpenCC('t2s.json' if ccm ==
                                           1 else 's2t.json')
    except:
        # some OS no OpenCC cpython, try opencc-python-reimplemented.
        # pip uninstall opencc && pip install opencc-python-reimplemented
        oCC = None if ccm == 0 else OpenCC('t2s' if ccm == 1 else 's2t')

    folder_path = conf.source_folder()
    if not isinstance(folder_path, str) or folder_path == '':
        folder_path = os.path.abspath(".")

    movie_list = movie_lists(folder_path, regexstr)

    count = 0
    count_all = str(len(movie_list))
    print('[+]Find', count_all, 'movies.')
    print('[*]======================================================')
    stop_count = conf.stop_counter()
    if stop_count < 1:
        stop_count = 999999
    else:
        count_all = str(min(len(movie_list), stop_count))

    for movie_path in movie_list:  # 遍历电影列表 交给core处理
        count = count + 1
        percentage = str(count / int(count_all) * 100)[:4] + '%'
        print('[!] {:>30}{:>21}'.format(
            '- ' + percentage + ' [' + str(count) + '/' + count_all + '] -',
            time.strftime("%H:%M:%S")))
        create_data_and_move(movie_path, zero_op, no_net_op, oCC)
        if count >= stop_count:
            print("[!]Stop counter triggered!")
            break
        if interval_delay := conf.interval_delay():
            time.sleep(interval_delay + secrets.randbelow(30) / 10)