def output_change(edit, config): global lock file1, file2, distance = edit path1, path2 = file1.path, file2.path mime1, mime2 = file1.type, file2.type if mime1 == mime2: mime = "{} ({})".format(mime1["full"], mime1["mime"]) else: mime = "{} ({}) / {} ({})".format(mime1["full"], mime1["mime"], mime2["full"], mime2["mime"]) lock.acquire() if config.compute_distance: Logger.output("\nFile1: {}\nFile2: {}\nMime: {}\nDistance: {}".format( path1, path2, mime, distance)) else: Logger.output("\nFile1: {}\nFile2: {}\nMime: {}".format( path1, path2, mime)) Logger.flush_output() lock.release()
def compare_files(file_set1, file_set2, data_folder_1, data_folder_2, config): # Print info about the compared files Logger.output("Directory1: {}\nDirectory2: {}".format( data_folder_1, data_folder_2)) # Make sure this is flushed before multiprocessing starts, otherwise # it may be written multiple times # Passing "flush=True" is not enough if the output target is a file Logger.flush_output() comparator = FilesetComparator(files1, files2, config) pairs = comparator.get_files_to_compare() # When sorting , every value has to be computed before starting printing delay_output = False if config.sort_order.lower() == "distance": delay_output = True elif config.sort_order.lower() == "path": delay_output = True # Build a partial func by passing config and delay output, so the result # can be used by pool.map func = partial(_compare, config, delay_output) # Use lock so lines aren't mixed up in output lock = multiprocessing.Lock() with multiprocessing.Pool(config.jobs, initializer=_init_process, initargs=(lock, )) as pool: edits = pool.map(func, pairs) edits = [edit for edit in edits if edit is not None] # If necessary, sort and then output the result Logger.progress("Generating output...") if config.sort_order.lower() == "distance": edits.sort(key=operator.itemgetter(2), reverse=True) elif config.sort_order.lower() == "path": edits.sort(key=operator.itemgetter(0), reverse=True) if delay_output: for edit in edits: output_change(edit, config) # Print info about the added and removed files added_count = 0 for added in comparator.added_files: added_count += 1 mime = "{} ({})".format(added.type["full"], added.type["mime"]) Logger.output("\nAdded: {}\nMime: {}".format(added.path, mime)) removed_count = 0 for removed in comparator.removed_files: removed_count += 1 mime = "{} ({})".format(removed.type["full"], removed.type["mime"]) Logger.output("\nRemoved: {}\nMime: {}".format(removed.path, mime)) # Print overall statistics Logger.info( "\nFound {} added files, {} removed files and {} changed files ({} files in total)" .format(added_count, removed_count, len(edits), added_count + removed_count + len(edits)))