Ejemplo n.º 1
0
 def __call__(self, fobj):
     for (chunk1, chunk2) in zip_longest(self.filehash,
                                         hash_chunks(fobj.path),
                                         fillvalue=''):
         if chunk1 != chunk2:
             return False
     return True
Ejemplo n.º 2
0
def group_by_hash(fsobjects):
    hashes = {}
    for fobj in fsobjects:
        chunks = hash_chunks(fobj.path)
        chunk = next(chunks)
        while chunk in hashes:
            for dup in hashes[chunk]:
                _, dup_chunks = dup
                try:
                    hashes[next(dup_chunks)] = [dup]
                    hashes[chunk].remove(dup)
                except StopIteration:
                    pass
            try:
                chunk = next(chunks)
            except StopIteration:
                hashes[chunk].append((fobj, chunks))
                break
        else:
            hashes[chunk] = [(fobj, chunks)]

    groups = []
    for dups in hashes.values():
        group = []
        for (dup, _) in dups:
            group.append(dup)
        if group:
            groups.append(group)

    return groups
Ejemplo n.º 3
0
 def __init__(self, filepath=None):
     if filepath is None:
         self.filepath = self.fm.thisfile.path
     else:
         self.filepath = filepath
     if self.filepath is None:
         self.fm.notify("Error: No file selected for hashing!", bad=True)
     # TODO: Lazily generated list would be more efficient, a generator
     #       isn't enough because this object is reused for every fsobject
     #       in the current directory.
     self.filehash = list(hash_chunks(abspath(self.filepath)))