Example #1
0
def __main__():
    logger = Logger()
    logger.set_debug_level('info')

    config= Config()
    u_base_path = config.base_path

    hddb = HoerdatDB(config.hoerdat_db_path, {'debug': 0})

    dupmerge = DupMerge(
        config.md5_db_path,  u_base_path,
        {'logger': logger},
    )

    hd = Hoerdat(hddb)

    for file, md5 in dupmerge.all_files_md5():
        if not hd.set_filename(file, md5):
            continue
        if DONT_RESCAN_PRESENT and hd.data['hoerdat'].get_data('hoerdat_id'):
            continue
        print file, hd.data['hoerdat'].get_data('hoerdat_id')
        fetch_result = hd.fetch_data()
        best_match = hd.get_best_match(fetch_result)
        if best_match:
            hd.merge_data(best_match[0], best_match[1])
            hd.update()
            print "Best rank:", best_match[1]
            print "\t", '"' + hd.data['hoerdat'].get_data('title') + '"'
Example #2
0
def __main__():
    logger = Logger()
    logger.set_debug_level('info')

    config= Config()
    u_base_path = config.base_path

    dupmerge = DupMerge(
        config.md5_db_path,  u_base_path,
        {'logger': logger},
    )

    re_ignore = [
        re.compile(_)
            for _ in [
                '^' + u_base_path + '/dbs',
                '^' + u_base_path + '/lost\+found',
                '^' + config.ftp_log_path,
                '^' + u_base_path + '/scripts',
                '^' + u_base_path + '/temp',
                '^' + u_base_path + '/txt',
                '^' + u_base_path + '/semaphores',
                '\.tmp$',
                '\.tmp\.\d+$',
                '\.db$'
            ]
    ]

    for root, dirs, files in os.walk(u_base_path):
        full_path = lambda u_name: os.path.join(root, u_name)
        if len(dirs) == 0 and len(files) == 0:
            logger.warning("Removing empty dir '" + root + "'...")
            os.removedirs(root);
            continue;
        dirs.sort()
        files.sort()
        for u_dir in dirs:
            if [ _ for _ in re_ignore if _.search(full_path(u_dir))]:
                dirs.remove(u_dir)
        logger.info("Scanning '" + root + "'...")
        for u_file in files:
            if [ _ for _ in re_ignore if _.search(full_path(u_file))]:
                continue
            dupmerge.file_exists(full_path(u_file))

    logger.info("Doing reverse scan...")

    for u_file in [ _ for _ in dupmerge.all_files() if not os.path.isfile(_)]:
        dupmerge.file_exists(u_file)