Python FileInfo Examples, mlflow.entities.FileInfo Python Examples

Example #1

0

Show file

File: azure_blob_artifact_repo.py Project: ShivaniMehrotra4/mlflow-1

 def list_artifacts(self, path=None):
     from azure.storage.blob._models import BlobPrefix
     (container, _, artifact_path) = self.parse_wasbs_uri(self.artifact_uri)
     container_client = self.client.get_container_client(container)
     dest_path = artifact_path
     if path:
         dest_path = posixpath.join(dest_path, path)
     infos = []
     prefix = dest_path + "/"
     results = container_client.walk_blobs(name_starts_with=prefix)
     for r in results:
         if not r.name.startswith(artifact_path):
             raise MlflowException(
                 "The name of the listed Azure blob does not begin with the specified"
                 " artifact path. Artifact path: {artifact_path}. Blob name:"
                 " {blob_name}".format(artifact_path=artifact_path,
                                       blob_name=r.name))
         if isinstance(r, BlobPrefix
                       ):  # This is a prefix for items in a subdirectory
             subdir = posixpath.relpath(path=r.name, start=artifact_path)
             if subdir.endswith("/"):
                 subdir = subdir[:-1]
             infos.append(FileInfo(subdir, True, None))
         else:  # Just a plain old blob
             file_name = posixpath.relpath(path=r.name, start=artifact_path)
             infos.append(FileInfo(file_name, False, r.size))
     return sorted(infos, key=lambda f: f.path)

Example #2

0

Show file

    def list_artifacts(self, path=None):
        (bucket, artifact_path) = self.parse_oss_uri(self.artifact_uri)
        dest_path = artifact_path
        if path:
            dest_path = posixpath.join(dest_path, path)
        infos = []
        prefix = dest_path + "/" if dest_path else ""
        self._get_oss_bucket(bucket)
        results = self.oss_bucket.list_objects(prefix=prefix, delimiter='/')

        for obj in results.object_list:
            # is file
            file_path = obj.key
            self._verify_listed_object_contains_artifact_path_prefix(
                listed_object_path=file_path, artifact_path=artifact_path)
            file_rel_path = posixpath.relpath(path=file_path,
                                              start=artifact_path)
            file_size = obj.size
            infos.append(FileInfo(file_rel_path, False, file_size))

        for subdir_path in results.prefix_list:
            # is dir
            self._verify_listed_object_contains_artifact_path_prefix(
                listed_object_path=subdir_path, artifact_path=artifact_path)
            subdir_rel_path = posixpath.relpath(path=subdir_path,
                                                start=artifact_path)
            infos.append(FileInfo(subdir_rel_path, True, None))
        return sorted(infos, key=lambda f: f.path)

Example #3

0

Show file

 def list_artifacts(self, path=None):
     (bucket, artifact_path) = data.parse_s3_uri(self.artifact_uri)
     dest_path = artifact_path
     if path:
         dest_path = build_path(dest_path, path)
     infos = []
     prefix = dest_path + "/"
     s3_client = self._get_s3_client()
     paginator = s3_client.get_paginator("list_objects_v2")
     results = paginator.paginate(Bucket=bucket,
                                  Prefix=prefix,
                                  Delimiter='/')
     for result in results:
         # Subdirectories will be listed as "common prefixes" due to the way we made the request
         for obj in result.get("CommonPrefixes", []):
             subdir = obj.get("Prefix")[len(artifact_path) + 1:]
             if subdir.endswith("/"):
                 subdir = subdir[:-1]
             infos.append(FileInfo(subdir, True, None))
         # Objects listed directly will be files
         for obj in result.get('Contents', []):
             name = obj.get("Key")[len(artifact_path) + 1:]
             size = int(obj.get('Size'))
             infos.append(FileInfo(name, False, size))
     return sorted(infos, key=lambda f: f.path)

Example #4

0

Show file

File: azure_blob_artifact_repo.py Project: zorrotrying/mlflow

 def list_artifacts(self, path=None):
     from azure.storage.blob.models import BlobPrefix
     (container, _, artifact_path) = self.parse_wasbs_uri(self.artifact_uri)
     dest_path = artifact_path
     if path:
         # Separator needs to be fixed as '/' because of azure blob storage pattern.
         # Do not change to os.path.join because in Windows system path separator is '\'
         dest_path = posixpath.join(dest_path, path)
     infos = []
     prefix = dest_path + "/"
     marker = None  # Used to make next list request if this one exceeded the result limit
     while True:
         results = self.client.list_blobs(container,
                                          prefix=prefix,
                                          delimiter='/',
                                          marker=marker)
         for r in results:
             if isinstance(
                     r, BlobPrefix
             ):  # This is a prefix for items in a subdirectory
                 subdir = r.name[len(artifact_path) + 1:]
                 if subdir.endswith("/"):
                     subdir = subdir[:-1]
                 infos.append(FileInfo(subdir, True, None))
             else:  # Just a plain old blob
                 file_name = r.name[len(artifact_path) + 1:]
                 infos.append(
                     FileInfo(file_name, False,
                              r.properties.content_length))
         # Check whether a new marker is returned, meaning we have to make another request
         if results.next_marker:
             marker = results.next_marker
         else:
             break
     return sorted(infos, key=lambda f: f.path)

Example #5

0

Show file

File: s3_artifact_repo.py Project: wwjiang007/mlflow

 def list_artifacts(self, path=None):
     (bucket, artifact_path) = data.parse_s3_uri(self.artifact_uri)
     dest_path = artifact_path
     if path:
         dest_path = posixpath.join(dest_path, path)
     infos = []
     prefix = dest_path + "/" if dest_path else ""
     s3_client = self._get_s3_client()
     paginator = s3_client.get_paginator("list_objects_v2")
     results = paginator.paginate(Bucket=bucket,
                                  Prefix=prefix,
                                  Delimiter="/")
     for result in results:
         # Subdirectories will be listed as "common prefixes" due to the way we made the request
         for obj in result.get("CommonPrefixes", []):
             subdir_path = obj.get("Prefix")
             self._verify_listed_object_contains_artifact_path_prefix(
                 listed_object_path=subdir_path,
                 artifact_path=artifact_path)
             subdir_rel_path = posixpath.relpath(path=subdir_path,
                                                 start=artifact_path)
             if subdir_rel_path.endswith("/"):
                 subdir_rel_path = subdir_rel_path[:-1]
             infos.append(FileInfo(subdir_rel_path, True, None))
         # Objects listed directly will be files
         for obj in result.get("Contents", []):
             file_path = obj.get("Key")
             self._verify_listed_object_contains_artifact_path_prefix(
                 listed_object_path=file_path, artifact_path=artifact_path)
             file_rel_path = posixpath.relpath(path=file_path,
                                               start=artifact_path)
             file_size = int(obj.get("Size"))
             infos.append(FileInfo(file_rel_path, False, file_size))
     return sorted(infos, key=lambda f: f.path)

Example #6

0

Show file

File: test_hdfs_artifact_repo.py Project: sar2160/mlflow

def test_list_artifacts_nested(hdfs_system_mock):
    repo = HdfsArtifactRepository('hdfs:://host/some/path')

    expected = [
        FileInfo('model/conda.yaml', False, 33),
        FileInfo('model/model.pkl', False, 33),
        FileInfo('model/MLmodel', False, 33)
    ]

    hdfs_system_mock.return_value.ls.return_value = [{
        'kind': 'file',
        'name': 'hdfs://host/some/path/model/conda.yaml',
        'size': 33,
    }, {
        'kind': 'file',
        'name': 'hdfs://host/some/path/model/model.pkl',
        'size': 33,
    }, {
        'kind': 'file',
        'name': 'hdfs://host/some/path/model/MLmodel',
        'size': 33,
    }]

    actual = repo.list_artifacts('model')

    assert actual == expected

Example #7

0

Show file

File: plugins.py Project: benjaminramirezg/mlflow_artifact_file_repository

    def list_artifacts(self, path=None):
        """Returns saved artifacts for current artifact uri"""

        artifacts_info = self.get_artifacts_info(self.artifact_uri)
        artifacts_under_path_info = None
        if path:
            artifacts_under_path_info = list(
                filter(lambda a: a[0].startswith(path), artifacts_info))
        else:
            artifacts_under_path_info = artifacts_info

        already_seen_paths, file_infos = [], []
        path_len = 0 if path is None else len(path) + 1

        for artifact_under_path in artifacts_under_path_info:
            file_size = artifact_under_path[2]
            relative_path = artifact_under_path[0][path_len:]
            relative_path_steps = relative_path.split('/')
            next_step = relative_path_steps[0]
            if next_step in already_seen_paths:
                continue
            already_seen_paths.append(next_step)
            file_info_path = next_step if path is None else '/'.join(
                [path, next_step])
            if len(relative_path_steps) == 1:
                file_infos.append(FileInfo(file_info_path, False, file_size))
            else:
                file_infos.append(FileInfo(file_info_path, True, None))

        return file_infos

Example #8

0

Show file

File: test_hdfs_artifact_repo.py Project: bkbonde/mlflow

def test_list_artifacts_nested(hdfs_system_mock):
    repo = HdfsArtifactRepository("hdfs:://host/some/path")

    expected = [
        FileInfo("model/conda.yaml", False, 33),
        FileInfo("model/model.pkl", False, 33),
        FileInfo("model/MLmodel", False, 33),
    ]

    hdfs_system_mock.return_value.ls.return_value = [
        {
            "kind": "file",
            "name": "hdfs://host/some/path/model/conda.yaml",
            "size": 33
        },
        {
            "kind": "file",
            "name": "hdfs://host/some/path/model/model.pkl",
            "size": 33
        },
        {
            "kind": "file",
            "name": "hdfs://host/some/path/model/MLmodel",
            "size": 33
        },
    ]

    actual = repo.list_artifacts("model")

    assert actual == expected

Example #9

0

Show file

 def list_artifacts(path):
     fullpath = posixpath.join(base_uri, path)
     if fullpath.endswith("model") or fullpath.endswith("model/"):
         return [FileInfo(item, False, 123) for item in list_return_val]
     elif fullpath.endswith("12345") or fullpath.endswith("12345/"):
         return [FileInfo(posixpath.join(path, "model"), True, 0)]
     else:
         return []

Example #10

0

Show file

 def list_artifacts(path):
     fullpath = posixpath.join(base_uri, path)
     if fullpath.endswith(_MODEL_DIR) or fullpath.endswith(_MODEL_DIR + "/"):
         return [FileInfo(item, False, _DUMMY_FILE_SIZE) for item in list_return_val]
     elif fullpath.endswith(_PARENT_MODEL_DIR) or fullpath.endswith(_PARENT_MODEL_DIR + "/"):
         return [FileInfo(posixpath.join(path, _MODEL_DIR), True, _EMPTY_FILE_SIZE)]
     else:
         return []

Example #11

0

Show file

File: test_cli.py Project: zwd1990/mlflow

def test_file_info_to_json():
    file_infos = [
        FileInfo("/my/file", False, 123),
        FileInfo("/my/dir", True, None),
    ]
    info_str = _file_infos_to_json(file_infos)
    assert json.loads(info_str) == [
        {"path": "/my/file", "is_dir": False, "file_size": "123"},
        {"path": "/my/dir", "is_dir": True},
    ]

Example #12

0

Show file

 def list_artifacts(path):
     if path.endswith(_MODEL_DIR):
         return [
             FileInfo(item, item.endswith(_EMPTY_DIR), _DUMMY_FILE_SIZE)
             for item in list_return_val
         ]
     elif path.endswith(_PARENT_DIR) or path.endswith(_PARENT_DIR + "/"):
         return [FileInfo(_PARENT_MODEL_DIR, True, _EMPTY_FILE_SIZE)]
     else:
         return []

Example #13

0

Show file

 def list_artifacts(path):
     if path.endswith("model"):
         return [
             FileInfo(item, item.endswith("emptydir"), 123)
             for item in list_return_val
         ]
     elif path.endswith("12345") or path.endswith("12345/"):
         return [FileInfo("12345/model", True, 0)]
     else:
         return []

Example #14

0

Show file

def get_file_info(path, rel_path):
    """
    Returns file meta data : location, size, ... etc

    :param path: Path to artifact

    :return: `FileInfo` object
    """
    if is_directory(path):
        return FileInfo(rel_path, True, None)
    else:
        return FileInfo(rel_path, False, os.path.getsize(path))

Example #15

0

Show file

 def list_artifacts(self, path=None):
     artifact_dir = self.path
     list_dir = os.path.join(artifact_dir, path) if path else artifact_dir
     artifact_files = self.sftp.listdir(list_dir)
     infos = []
     for file_name in artifact_files:
         file_path = file_name if path is None else os.path.join(path, file_name)
         full_file_path = os.path.join(list_dir, file_name)
         if self.sftp.isdir(full_file_path):
             infos.append(FileInfo(file_path, True, None))
         else:
             infos.append(FileInfo(file_path, False, self.sftp.stat(full_file_path).st_size))
     return infos

Example #16

0

Show file

    def list_artifacts(self, path):
        """
        Return all the artifacts for this run_id directly under path.

        If path is a file, returns an empty list. Will error if path is neither a
        file nor directory. Note that list_artifacts will not return valid
        artifact sizes from Azure.

        :param path: Relative source path that contain desired artifacts
        :type path: str
        :return: List of artifacts as FileInfo listed directly under path.
        """
        # get and filter by paths

        if path and self.artifacts.path and not path.startswith(
                self.artifacts.path):
            path = self._get_full_artifact_path(
                path
            )  # Adds prefix if called directly and it is not already set

        path_tokens = path.split("/") if path else []
        path_depth = len(path_tokens)
        artifacts = []
        for file_path in self.artifacts.get_file_paths():
            if path is None or file_path[:len(path)] == path and len(
                    file_path) > len(path):
                artifacts.append(file_path)

        file_infos = []
        for artifact in artifacts:
            artifact_tokens = artifact.split("/")
            if len(artifact_tokens) == path_depth + 1:  # is a file
                file_infos.append(
                    FileInfo(
                        path=artifact,
                        is_dir=False,
                        file_size=
                        -1  # TODO: artifact size retrieval is not supported in Azure
                    ))
            else:  # is a directory
                file_infos.append(
                    FileInfo(
                        path="/".join(artifact_tokens[:path_depth + 1]),
                        is_dir=True,
                        file_size=
                        -1  # TODO: artifact size retrieval is not supported in Azure
                    ))

        return file_infos

Example #17

0

Show file

    def test_creation_and_hydration(self):
        path = random_str(random_int(10, 50))
        is_dir = random_int(10, 2500) % 2 == 0
        size_in_bytes = random_int(1, 10000)
        fi1 = FileInfo(path, is_dir, size_in_bytes)
        self._check(fi1, path, is_dir, size_in_bytes)

        as_dict = {"path": path, "is_dir": is_dir, "file_size": size_in_bytes}
        self.assertEqual(dict(fi1), as_dict)

        proto = fi1.to_proto()
        fi2 = FileInfo.from_proto(proto)
        self._check(fi2, path, is_dir, size_in_bytes)

        fi3 = FileInfo.from_dictionary(as_dict)
        self._check(fi3, path, is_dir, size_in_bytes)

Example #18

0

Show file

 def list_artifacts(self, path=None):
     infos = []
     page_token = None
     if not path:
         path = ""
     while True:
         json_body = self._make_json_body(path, page_token)
         response = self._call_endpoint(json_body,
                                        REGISTRY_LIST_ARTIFACTS_ENDPOINT)
         try:
             response.raise_for_status()
             json_response = json.loads(response.text)
         except Exception:
             raise MlflowException(
                 "API request to list files under path `%s` failed with status code %s. "
                 "Response body: %s" %
                 (path, response.status_code, response.text))
         artifact_list = json_response.get("files", [])
         next_page_token = json_response.get("next_page_token", None)
         # If `path` is a file, ListArtifacts returns a single list element with the
         # same name as `path`. The list_artifacts API expects us to return an empty list in this
         # case, so we do so here.
         if (len(artifact_list) == 1 and artifact_list[0]["path"] == path
                 and not artifact_list[0]["is_dir"]):
             return []
         for output_file in artifact_list:
             artifact_size = None if output_file["is_dir"] else output_file[
                 "file_size"]
             infos.append(
                 FileInfo(output_file["path"], output_file["is_dir"],
                          artifact_size))
         if len(artifact_list) == 0 or not next_page_token:
             break
         page_token = next_page_token
     return infos

Example #19

0

Show file

File: dbfs_artifact_repo.py Project: xingyong/mlflow

 def list_artifacts(self, path=None):
     if path:
         dbfs_path = self._get_dbfs_path(path)
     else:
         dbfs_path = self._get_dbfs_path('')
     dbfs_list_json = {'path': dbfs_path}
     response = self._dbfs_list_api(dbfs_list_json)
     try:
         json_response = json.loads(response.text)
     except ValueError:
         raise MlflowException(
             "API request to list files under DBFS path %s failed with status code %s. "
             "Response body: %s" % (dbfs_path, response.status_code, response.text))
     # /api/2.0/dbfs/list will not have the 'files' key in the response for empty directories.
     infos = []
     artifact_prefix = strip_prefix(self.artifact_uri, 'dbfs:')
     if json_response.get('error_code', None) == RESOURCE_DOES_NOT_EXIST:
         return []
     dbfs_files = json_response.get('files', [])
     for dbfs_file in dbfs_files:
         stripped_path = strip_prefix(dbfs_file['path'], artifact_prefix + '/')
         # If `path` is a file, the DBFS list API returns a single list element with the
         # same name as `path`. The list_artifacts API expects us to return an empty list in this
         # case, so we do so here.
         if stripped_path == path:
             return []
         is_dir = dbfs_file['is_dir']
         artifact_size = None if is_dir else dbfs_file['file_size']
         infos.append(FileInfo(stripped_path, is_dir, artifact_size))
     return sorted(infos, key=lambda f: f.path)

Example #20

0

Show file

    def list_artifacts(self, path=None):
        """
            Lists files and directories under artifacts directory for the current run_id.
            (self.path contains the base path - hdfs:/some/path/run_id/artifacts)

            :param path: Relative source path. Possible subdirectory existing under
                         hdfs:/some/path/run_id/artifacts
            :return: List of FileInfos under given path
        """
        hdfs_base_path = _resolve_base_path(self.path, path)

        with hdfs_system(scheme=self.scheme, host=self.host,
                         port=self.port) as hdfs:
            paths = []
            if hdfs.exists(hdfs_base_path):
                for file_detail in hdfs.ls(hdfs_base_path, detail=True):
                    file_name = file_detail.get("name")
                    # Strip off anything that comes before the artifact root e.g. hdfs://name
                    offset = file_name.index(self.path)
                    rel_path = _relative_path_remote(self.path,
                                                     file_name[offset:])
                    is_dir = file_detail.get("kind") == "directory"
                    size = file_detail.get("size")
                    paths.append(FileInfo(rel_path, is_dir, size))
            return sorted(paths, key=lambda f: paths)

Example #21

0

Show file

File: handlers.py Project: szczeles/mlflow

def _list_artifacts_for_proxied_run_artifact_root(proxied_artifact_root,
                                                  relative_path=None):
    """
    Lists artifacts from the specified ``relative_path`` within the specified proxied Run artifact
    root (i.e. a Run artifact root with scheme ``http``, ``https``, or ``mlflow-artifacts``).

    :param proxied_artifact_root: The Run artifact root location (URI) with scheme ``http``,
                                  ``https``, or ``mlflow-artifacts`` that can be resolved by the
                                  MLflow server to a concrete storage location.
    :param relative_path: The relative path within the specified ``proxied_artifact_root`` under
                          which to list artifact contents. If ``None``, artifacts are listed from
                          the ``proxied_artifact_root`` directory.
    """
    parsed_proxied_artifact_root = urllib.parse.urlparse(proxied_artifact_root)
    assert parsed_proxied_artifact_root.scheme in [
        "http", "https", "mlflow-artifacts"
    ]

    artifact_destination_repo = _get_artifact_repo_mlflow_artifacts()
    artifact_destination_path = _get_proxied_run_artifact_destination_path(
        proxied_artifact_root=proxied_artifact_root,
        relative_path=relative_path,
    )

    artifact_entities = []
    for file_info in artifact_destination_repo.list_artifacts(
            artifact_destination_path):
        basename = posixpath.basename(file_info.path)
        run_relative_artifact_path = (posixpath.join(relative_path, basename)
                                      if relative_path else basename)
        artifact_entities.append(
            FileInfo(run_relative_artifact_path, file_info.is_dir,
                     file_info.file_size))

    return artifact_entities

Example #22

0

Show file

    def _list_folders(self, bkt, prefix, artifact_path):
        results = bkt.list_blobs(prefix=prefix, delimiter="/")
        dir_paths = set()
        for page in results.pages:
            dir_paths.update(page.prefixes)

        return [FileInfo(path[len(artifact_path) + 1:-1], True, None) for path in dir_paths]

Example #23

0

Show file

File: artifact_repo.py Project: dbczumar/mlflow-databricks-artifacts

    def list_artifacts(self, path=None):
        if path:
            run_relative_path = posixpath.join(
                self.run_relative_artifact_repo_root_path, path)
        else:
            run_relative_path = self.run_relative_artifact_repo_root_path

        json_body = message_to_json(
            ListArtifacts(run_id=self.run_id, path=run_relative_path))
        response = self._call_endpoint(MlflowService, ListArtifacts, json_body)
        artifact_list = response.files
        # If `path` is a file, ListArtifacts returns a single list element with the
        # same name as `path`. The list_artifacts API expects us to return an empty list in this
        # case, so we do so here.
        if (len(artifact_list) == 1
                and artifact_list[0].path == run_relative_path
                and not artifact_list[0].is_dir):
            return []
        infos = []
        for output_file in artifact_list:
            file_rel_path = posixpath.relpath(
                path=output_file.path,
                start=self.run_relative_artifact_repo_root_path)
            artifact_size = None if output_file.is_dir else output_file.file_size
            infos.append(
                FileInfo(file_rel_path, output_file.is_dir, artifact_size))
        return infos

Example #24

0

Show file

File: artifact_store.py Project: jizezhang/kubeflow-exploration

 def list_artifacts(self, path=None):
     # TODO: pagination
     bucket, ns, artifact_path = self.parse_os_uri(self.artifact_uri)
     dest_path = artifact_path
     if path:
         dest_path = os.path.join(dest_path, path)
     infos = []
     prefix = dest_path + "/" if dest_path else ""
     os_client = self._get_os_client()
     results = os_client.list_objects(ns, bucket, prefix=prefix, delimiter='/').data
     for subdir_path in results.prefixes:
         subdir_rel_path = os.path.relpath(path=subdir_path, start=artifact_path)
         infos.append(FileInfo(subdir_rel_path, True, None))
     for obj in results.objects:
         file_rel_path = os.path.relpath(path=obj.name, start=artifact_path)
         infos.append(FileInfo(file_rel_path, False, obj.size))
     return sorted(infos, key=lambda f: f.path)

Example #25

0

Show file

File: test_converters.py Project: jeffmaxey/mlflow-faculty

def test_faculty_object_to_mlflow_file_info(
    datasets_path, artifact_path, is_directory, artifact_root
):
    obj = FacultyObject(datasets_path, 1234, "an etag", DATETIME)
    expected = FileInfo(
        artifact_path, is_directory, None if is_directory else 1234
    )
    assert faculty_object_to_mlflow_file_info(obj, artifact_root) == expected

Example #26

0

Show file

File: handlers.py Project: TheVinhLuong102/mlflow

def _list_artifacts_mlflow_artifacts():
    """
    A request handler for `GET /mlflow-artifacts/artifacts?path=<value>` to list artifacts in `path`
    (a relative path from the root artifact directory).
    """
    request_message = _get_request_message(ListArtifactsMlflowArtifacts())
    path = request_message.path if request_message.HasField("path") else None
    artifact_repo = _get_artifact_repo_mlflow_artifacts()
    files = []
    for file_info in artifact_repo.list_artifacts(path):
        basename = posixpath.basename(file_info.path)
        new_file_info = FileInfo(basename, file_info.is_dir, file_info.file_size)
        files.append(new_file_info.to_proto())
    response_message = ListArtifacts.Response()
    response_message.files.extend(files)
    response = Response(mimetype="application/json")
    response.set_data(message_to_json(response_message))
    return response

Example #27

0

Show file

    def to_file_info(self):
        """
        Convert DB model to corresponding FileInfo object.

        :return: :py:class:`mlflow.entities.FileInfo`.
        """
        return FileInfo(path=os.path.join(self.group_path, self.artifact_name),
                        is_dir=False,
                        file_size=self.artifact_initial_size)

Example #28

0

Show file

File: test_hdfs_artifact_repo.py Project: yuecong/mlflow

def test_list_artifacts(hdfs_system_mock):
    repo = HdfsArtifactRepository('hdfs:/some/path')

    expected = [
        FileInfo('conda.yaml', False, 33),
        FileInfo('model.pkl', False, 33),
        FileInfo('MLmodel', False, 33)
    ]

    hdfs_system_mock.return_value.walk.return_value = [
        ('/some/path', False, ['conda.yaml', 'model.pkl', 'MLmodel'])
    ]
    hdfs_system_mock.return_value.info.return_value.get.return_value = 33
    hdfs_system_mock.return_value.isdir.side_effect = [
        True, False, False, False
    ]

    actual = repo.list_artifacts()

    assert actual == expected

Example #29

0

Show file

    def list_artifacts(self, path=None):
        # Newer versions of `azure-storage-blob` (>= 12.4.0) provide a public
        # `azure.storage.blob.BlobPrefix` object to signify that a blob is a directory,
        # while older versions only expose this API internally as
        # `azure.storage.blob._models.BlobPrefix`
        try:
            from azure.storage.blob import BlobPrefix
        except ImportError:
            from azure.storage.blob._models import BlobPrefix

        (container, _, artifact_path) = self.parse_wasbs_uri(self.artifact_uri)
        container_client = self.client.get_container_client(container)
        dest_path = artifact_path
        if path:
            dest_path = posixpath.join(dest_path, path)
        infos = []
        prefix = dest_path if dest_path.endswith("/") else dest_path + "/"
        results = container_client.walk_blobs(name_starts_with=prefix)
        for r in results:
            if not r.name.startswith(artifact_path):
                raise MlflowException(
                    "The name of the listed Azure blob does not begin with the specified"
                    " artifact path. Artifact path: {artifact_path}. Blob name:"
                    " {blob_name}".format(artifact_path=artifact_path,
                                          blob_name=r.name))
            if isinstance(r, BlobPrefix
                          ):  # This is a prefix for items in a subdirectory
                subdir = posixpath.relpath(path=r.name, start=artifact_path)
                if subdir.endswith("/"):
                    subdir = subdir[:-1]
                infos.append(FileInfo(subdir, True, None))
            else:  # Just a plain old blob
                file_name = posixpath.relpath(path=r.name, start=artifact_path)
                infos.append(FileInfo(file_name, False, r.size))
        # The list_artifacts API expects us to return an empty list if the
        # the path references a single file.
        rel_path = dest_path[len(artifact_path) + 1:]
        if (len(infos) == 1) and not infos[0].is_dir and (infos[0].path
                                                          == rel_path):
            return []
        return sorted(infos, key=lambda f: f.path)

Example #30

0

Show file

File: azure_blob_artifact_repo.py Project: yang-analytics/mlflow-1

 def list_artifacts(self, path=None):
     from azure.storage.blob.models import BlobPrefix
     (container, _, artifact_path) = self.parse_wasbs_uri(self.artifact_uri)
     dest_path = artifact_path
     if path:
         dest_path = posixpath.join(dest_path, path)
     infos = []
     prefix = dest_path + "/"
     marker = None  # Used to make next list request if this one exceeded the result limit
     while True:
         results = self.client.list_blobs(container,
                                          prefix=prefix,
                                          delimiter='/',
                                          marker=marker)
         for r in results:
             if not r.name.startswith(artifact_path):
                 raise MlflowException(
                     "The name of the listed Azure blob does not begin with the specified"
                     " artifact path. Artifact path: {artifact_path}. Blob name:"
                     " {blob_name}".format(artifact_path=artifact_path,
                                           blob_name=r.name))
             if isinstance(
                     r, BlobPrefix
             ):  # This is a prefix for items in a subdirectory
                 subdir = posixpath.relpath(path=r.name,
                                            start=artifact_path)
                 if subdir.endswith("/"):
                     subdir = subdir[:-1]
                 infos.append(FileInfo(subdir, True, None))
             else:  # Just a plain old blob
                 file_name = posixpath.relpath(path=r.name,
                                               start=artifact_path)
                 infos.append(
                     FileInfo(file_name, False,
                              r.properties.content_length))
         # Check whether a new marker is returned, meaning we have to make another request
         if results.next_marker:
             marker = results.next_marker
         else:
             break
     return sorted(infos, key=lambda f: f.path)