Example #1
0
def main():
    # FIXME: this is a hack, see find_duplicates_fd() above
    sys.setrecursionlimit(10000)

    args = parse_args()

    fileinfos = []
    for path in args.FILES:
        for fileinfo in generate_fileinfos(path, relative=False, prefix=None, checksums=False):
            if fileinfo.kind == "file":
                fileinfos.append(fileinfo)
            else:
                pass  # ignore non-files

    fileinfos_by_size = {}
    for fileinfo in fileinfos:
        if fileinfo.size not in fileinfos_by_size:
            fileinfos_by_size[fileinfo.size] = []
        fileinfos_by_size[fileinfo.size].append(fileinfo)

    for size, fileinfos in fileinfos_by_size.items():
        if len(fileinfos) > 1 and size >= args.limit:
            if args.verbose:
                print("potential duplicates: {} bytes".format(size))
                for fileinfo in fileinfos:
                    print("  {}".format(fileinfo.path))
                print()

            groups = find_duplicates([p.path for p in fileinfos])
            for g in groups:
                if len(g) > 1:
                    print("duplicates:")
                    for f in g:
                        print("  {}".format(f))
                    print()
Example #2
0
def fileinfos_from_path(path):
    """Read FileInfo objects from path, which can be a .sbtr, .sbtr.gz or directory"""
    if os.path.isdir(path):
        return {fileinfo.path: fileinfo for
                fileinfo in generate_fileinfos(path, checksums=True)}
    else:
        return fileinfos_from_sbtr(path)
Example #3
0
def process_directory(directory, checksums, relative, prefix,
                      on_report_cb):
    if prefix is not None:
        relative = True

    for fileinfo in generate_fileinfos(directory,
                                       relative=relative,
                                       prefix=prefix,
                                       checksums=checksums,
                                       onerror=on_error):
        on_report_cb(fileinfo)
Example #4
0
 def test_generator(self):
     output = ""
     for fileinfo in generate_fileinfos("tests/"):
         output += fileinfo.json()
     # FIXME: insert some proper check for validity
     self.assertTrue(True)