Ejemplo n.º 1
0
def output_change(edit, config):
    global lock
    file1, file2, distance = edit

    path1, path2 = file1.path, file2.path
    mime1, mime2 = file1.type, file2.type

    if mime1 == mime2:
        mime = "{} ({})".format(mime1["full"], mime1["mime"])
    else:
        mime = "{} ({}) / {} ({})".format(mime1["full"], mime1["mime"],
                                          mime2["full"], mime2["mime"])

    lock.acquire()

    if config.compute_distance:
        Logger.output("\nFile1: {}\nFile2: {}\nMime: {}\nDistance: {}".format(
            path1, path2, mime, distance))
    else:
        Logger.output("\nFile1: {}\nFile2: {}\nMime: {}".format(
            path1, path2, mime))

    Logger.flush_output()
    lock.release()
Ejemplo n.º 2
0
def compare_files(file_set1, file_set2, data_folder_1, data_folder_2, config):
    # Print info about the compared files
    Logger.output("Directory1: {}\nDirectory2: {}".format(
        data_folder_1, data_folder_2))

    # Make sure this is flushed before multiprocessing starts, otherwise
    # it may be written multiple times
    # Passing "flush=True" is not enough if the output target is a file
    Logger.flush_output()

    comparator = FilesetComparator(files1, files2, config)
    pairs = comparator.get_files_to_compare()

    # When sorting , every value has to be computed before starting printing
    delay_output = False
    if config.sort_order.lower() == "distance":
        delay_output = True
    elif config.sort_order.lower() == "path":
        delay_output = True

    # Build a partial func by passing config and delay output, so the result
    # can be used by pool.map
    func = partial(_compare, config, delay_output)

    # Use lock so lines aren't mixed up in output
    lock = multiprocessing.Lock()

    with multiprocessing.Pool(config.jobs,
                              initializer=_init_process,
                              initargs=(lock, )) as pool:
        edits = pool.map(func, pairs)
        edits = [edit for edit in edits if edit is not None]

    # If necessary, sort and then output the result
    Logger.progress("Generating output...")
    if config.sort_order.lower() == "distance":
        edits.sort(key=operator.itemgetter(2), reverse=True)
    elif config.sort_order.lower() == "path":
        edits.sort(key=operator.itemgetter(0), reverse=True)

    if delay_output:
        for edit in edits:
            output_change(edit, config)

    # Print info about the added and removed files
    added_count = 0
    for added in comparator.added_files:
        added_count += 1
        mime = "{} ({})".format(added.type["full"], added.type["mime"])
        Logger.output("\nAdded: {}\nMime: {}".format(added.path, mime))

    removed_count = 0
    for removed in comparator.removed_files:
        removed_count += 1
        mime = "{} ({})".format(removed.type["full"], removed.type["mime"])
        Logger.output("\nRemoved: {}\nMime: {}".format(removed.path, mime))

    # Print overall statistics
    Logger.info(
        "\nFound {} added files, {} removed files and {} changed files ({} files in total)"
        .format(added_count, removed_count, len(edits),
                added_count + removed_count + len(edits)))