def get_file_hash(self, path_info): hash_info = HashInfo(self.PARAM_CHECKSUM, file_md5(path_info)[0],) if hash_info: hash_info.size = os.path.getsize(path_info) return hash_info
def get_file_hash(self, path_info): if path_info.scheme != self.scheme: raise NotImplementedError with self.ssh(path_info) as ssh: hash_info = HashInfo(self.PARAM_CHECKSUM, ssh.md5(path_info.path),) if hash_info: hash_info.size = ssh.getsize(path_info.path) return hash_info
def get_file_hash(self, path_info): # NOTE: pyarrow doesn't support checksum, so we need to use hadoop regex = r".*\t.*\t(?P<checksum>.*)" stdout = self.hadoop_fs(f"checksum {path_info.url}", user=path_info.user) hash_info = HashInfo( self.PARAM_CHECKSUM, self._group(regex, stdout, "checksum"), ) with self.hdfs(path_info) as hdfs: file_info = hdfs.get_file_info(path_info.path) hash_info.size = file_info.size return hash_info
def get_dir_hash(path_info, fs, name, odb, state, **kwargs): from . import Tree value = fs.info(path_info).get(name) if value: hash_info = HashInfo(name, value) try: Tree.load(odb, hash_info) return hash_info except FileNotFoundError: pass dir_info = _collect_dir(path_info, fs, name, state, **kwargs) hash_info = Tree.save_dir_info(fs.repo.odb.local, dir_info) hash_info.size = dir_info.size hash_info.dir_info = dir_info return hash_info
def get_file_hash(self, path_info): checksum = self.hdfs_client.checksum(path_info.path) hash_info = HashInfo(self.PARAM_CHECKSUM, checksum["bytes"]) hash_info.size = self.hdfs_client.status(path_info.path)["length"] return hash_info