def md5_update_from_file(filename: Union[str, Path], hash: Hash) -> Hash: # https://stackoverflow.com/questions/24937495/how-can-i-calculate-a-hash-for-a-filesystem-directory-using-python assert Path(filename).is_file() with open(str(filename), "rb") as f: for chunk in iter(lambda: f.read(4096), b""): hash.update(chunk) return hash
def md5_update_from_file(filename: Union[str, Path], hash: Hash): assert Path(filename).is_file() hash.update(filename.encode()) with open(str(filename), "rb") as f: for chunk in iter(lambda: f.read(4096), b""): hash.update(chunk) return hash
def chunked_hasher(hash_object: HASH, file_handle) -> str: """Calculate the hash of a file without reading it into memory completely""" while True: data = file_handle.read(BUF_SIZE) if not data: break hash_object.update(data) return hash_object.hexdigest()
def md5_update_from_file(filename: Union[str, Path], hash: Hash, ignore) -> Hash: if str(filename) in ignore: return hash assert Path(filename).is_file() with open(str(filename), "rb") as f: for chunk in iter(lambda: f.read(4096), b""): hash.update(chunk) return hash
def md5_update_from_dir(directory: Union[str, Path], hash: Hash) -> Hash: assert Path(directory).is_dir() for path in sorted(Path(directory).iterdir()): hash.update(path.name.encode()) if path.is_file(): hash = md5_update_from_file(path, hash) elif path.is_dir(): hash = md5_update_from_dir(path, hash) return hash
def hash_file(hash_object: _hashlib.HASH, file_name: str) -> None: # This adds the content of the file to the provided _hashlib.HASH object. # It does not return a hash of the file contents. with open(file_name, 'rb') as file: while True: data = file.read(4096 * 4) if not data: break hash_object.update(data)
def md5_update_from_dir(directory: Union[str, Path], hash: Hash) -> Hash: assert Path(directory).is_dir() for path in sorted(Path(directory).iterdir(), key=lambda p: str(p).lower()): hash.update(path.name.encode()) if path.is_file(): hash = TarFilesSimilarity.md5_update_from_file(path, hash) elif path.is_dir(): hash = TarFilesSimilarity.md5_update_from_dir(path, hash) return hash
def md5_update_from_dir(directory: Union[str, Path], hash: Hash, ignore) -> Hash: if str(directory) in ignore: return hash assert Path(directory).is_dir() for path in sorted(Path(directory).iterdir(), key=lambda p: str(p).lower()): hash.update(path.name.encode()) if path.is_file(): hash = md5_update_from_file(path, hash, ignore) elif path.is_dir(): hash = md5_update_from_dir(path, hash, ignore) return hash
def get_sha1_hash_from_dir(directory: Union[str, Path], hash: Hash = None) -> str: assert Path(directory).is_dir() if hash is None: hash = hashlib.sha1() for path in sorted(Path(directory).iterdir()): hash.update(path.name.encode()) if path.is_file(): hash = get_sha1_from_file(path, hash) elif path.is_dir(): hash = get_sha1_hash_from_dir(path, hash) return hash.hexdigest()
def compute_digest_from_filelike_and_callback(filelike, h:_hashlib.HASH, bufferSize=STATIC_DEFAULT_BUFFER_SIZE, cback=None): """ Accessory method used to compute the digest of an input file-like object """ buf = filelike.read(bufferSize) while len(buf) > 0: h.update(buf) if cback: cback(buf) buf = filelike.read(bufferSize) return h.digest()
def _hash_file_or_dir(path: str, md5: Hash) -> Hash: """Updates the inputted Hash with the contents of the current path. Args: path: path of file or directory Returns: str: The MD5 hash of the file or directory """ if isinstance(path, str) and path.lower().startswith("file://"): path = unquote(urlparse(path).path) md5.update(path.encode()) if Path(path).is_dir(): md5 = _hash_dir(path, md5) elif Path(path).is_file(): md5 = _hash_file(path, md5) return md5
def _hash_dir(directory: Union[str, Path], md5: Hash) -> Hash: """Updates the inputted Hash with the contents of the current path. Args: directory: path of the directory Returns: str: The MD5 hash of the directory """ if not Path(directory).is_dir(): raise ValueError(str(directory) + " is not a valid directory") for path in sorted(Path(directory).iterdir()): md5.update(path.name.encode()) if path.is_file(): md5 = _hash_file(path, md5) elif path.is_dir(): md5 = _hash_dir(path, md5) return md5
def calculate_hash2(self, part1: HASH) -> bytes: """Return the hash of the transaction, starting from a partial hash The hash of the transactions is the `sha256(sha256(bytes(tx))`. :param part1: A partial hash of the transaction, usually from `calculate_hash1` :type part1: :py:class:`_hashlib.HASH` :return: The transaction hash :rtype: bytes """ part1.update( self.nonce.to_bytes(self.HASH_NONCE_SIZE, byteorder='big', signed=False)) # SHA256D gets the hash in littlean format. Reverse the bytes to get the big-endian representation. return hashlib.sha256(part1.digest()).digest()[::-1]
def _hash_file(file: Union[str, Path], md5: Hash) -> Hash: """Updates the inputted Hash with the contents of the current path. Args: file: path of the file Returns: str: The MD5 hash of the file """ if isinstance(file, str) and file.lower().startswith("file://"): file = unquote(urlparse(file).path) if not Path(file).is_file(): raise ValueError(str(file) + " is not a valid file") with open(file, "rb") as f: while True: data = f.read(BUF_SIZE) if not data: break md5.update(data) return md5
def md5_update_from_string_list(stringlist, hash: Hash): for string in stringlist: hash.update(string.encode()) return hash
def get_sha1_from_file(filename: Union[str, Path], hash: Hash) -> Hash: assert Path(filename).is_file() with open(str(filename), "rb") as f: for chunk in iter(lambda: f.read(4096), b""): hash.update(chunk) return hash