Example #1
0
    def get_file_hash(self, path_info):
        hash_info = HashInfo(self.PARAM_CHECKSUM, file_md5(path_info)[0],)

        if hash_info:
            hash_info.size = os.path.getsize(path_info)

        return hash_info
Example #2
0
    def get_file_hash(self, path_info):
        if path_info.scheme != self.scheme:
            raise NotImplementedError

        with self.ssh(path_info) as ssh:
            hash_info = HashInfo(self.PARAM_CHECKSUM, ssh.md5(path_info.path),)

            if hash_info:
                hash_info.size = ssh.getsize(path_info.path)

            return hash_info
Example #3
0
File: hdfs.py Project: yyqgood/dvc
    def get_file_hash(self, path_info):
        # NOTE: pyarrow doesn't support checksum, so we need to use hadoop
        regex = r".*\t.*\t(?P<checksum>.*)"
        stdout = self.hadoop_fs(f"checksum {path_info.url}",
                                user=path_info.user)
        hash_info = HashInfo(
            self.PARAM_CHECKSUM,
            self._group(regex, stdout, "checksum"),
        )

        with self.hdfs(path_info) as hdfs:
            file_info = hdfs.get_file_info(path_info.path)
            hash_info.size = file_info.size

        return hash_info
Example #4
0
def get_dir_hash(path_info, fs, name, odb, state, **kwargs):
    from . import Tree

    value = fs.info(path_info).get(name)
    if value:
        hash_info = HashInfo(name, value)
        try:
            Tree.load(odb, hash_info)
            return hash_info
        except FileNotFoundError:
            pass

    dir_info = _collect_dir(path_info, fs, name, state, **kwargs)
    hash_info = Tree.save_dir_info(fs.repo.odb.local, dir_info)
    hash_info.size = dir_info.size
    hash_info.dir_info = dir_info
    return hash_info
Example #5
0
    def get_file_hash(self, path_info):
        checksum = self.hdfs_client.checksum(path_info.path)
        hash_info = HashInfo(self.PARAM_CHECKSUM, checksum["bytes"])

        hash_info.size = self.hdfs_client.status(path_info.path)["length"]
        return hash_info