def hash_file(path, block_size, hash_func): hash = hash_func() with open(path, "rb") as stream: while True: # Read a block of character data. data = stream.read(block_size) if not data: return encode(hash.hexdigest()) # Update hash. hash.update(data)
def _product_hash_rec(root, resolve_root, resolve_links, hash_func, block_size): if os.path.islink(root) and not (resolve_root or resolve_links): # Hash link _contents_. return hash_string(path_utf8(os.readlink(root)), hash_func) elif os.path.isfile(root): # Hash file contents. return hash_file(root, block_size, hash_func) elif os.path.isdir(root): # Create a fingerprint of the directory by computing the hash of (for each entry in the directory) the hash # of the entry name, the type of entry (link, file, or directory), and the hash of the contents of the # entry. hash = hash_func() for basename in sorted(os.listdir(root)): hash.update(hash_string(path_utf8(basename), hash_func)) path = os.path.join(root, basename) if os.path.islink(path) and not (resolve_root or resolve_links): hash.update(b"l") elif os.path.isdir(path): hash.update(b"d") else: hash.update(b"f") hash.update( _product_hash_rec(path, False, resolve_links, hash_func, block_size)) return encode(hash.hexdigest()) else: raise IOError( "path does not refer to a regular file or directory: %s" % root)
def hash_string(string, hash_func): hash = hash_func() hash.update(string) return encode(hash.hexdigest())