Ejemplo n.º 1
0
def _get_artifact_repo(run):
    if run.info.artifact_uri:
        return ArtifactRepository.from_artifact_uri(run.info.artifact_uri)

    # TODO(aaron) Remove this once everyone locally only has runs from after
    # the introduction of "artifact_uri".
    uri = os.path.join(_get_store().root_directory, str(run.info.experiment_id),
                       run.info.run_uuid, "artifacts")
    return ArtifactRepository.from_artifact_uri(uri)
Ejemplo n.º 2
0
def _download_artifact_from_uri(artifact_uri, output_path=None):
    """
    :param artifact_uri: The *absolute* URI of the artifact to download.
    :param output_path: The local filesystem path to which to download the artifact. If unspecified,
                        a local output path will be created.
    """
    store = _get_store()
    artifact_path_module =\
        ArtifactRepository.from_artifact_uri(artifact_uri, store).get_path_module()
    artifact_src_dir = artifact_path_module.dirname(artifact_uri)
    artifact_src_relative_path = artifact_path_module.basename(artifact_uri)
    artifact_repo = ArtifactRepository.from_artifact_uri(
            artifact_uri=artifact_src_dir, store=store)
    return artifact_repo.download_artifacts(
            artifact_path=artifact_src_relative_path, dst_path=output_path)
Ejemplo n.º 3
0
 def __init__(self, run_info, store):
     self.store = store
     self.run_info = run_info
     if run_info.artifact_uri:
         self.artifact_repo = ArtifactRepository.from_artifact_uri(run_info.artifact_uri)
     else:
         self.artifact_repo = _get_legacy_artifact_repo(store, run_info)
Ejemplo n.º 4
0
 def log_artifacts(self, artifact_uri, local_dir, artifact_path=None):
     """Writes a directory of files to the remote artifact_uri.
     :param: local_dir of the file to write
     :param: artifact_path If provided, will be directory in artifact_uri to write to"""
     artifact_repo = ArtifactRepository.from_artifact_uri(
         artifact_uri, self.store)
     artifact_repo.log_artifacts(local_dir, artifact_path)
Ejemplo n.º 5
0
def get_artifact_uri(run_id, artifact_path=None):
    """
    Get the absolute URI of the specified artifact in the specified run. If `path` is not specified,
    the artifact root URI of the specified run will be returned; calls to ``log_artifact``
    and ``log_artifacts`` write artifact(s) to subdirectories of the artifact root URI.

    :param run_id: The ID of the run for which to obtain an absolute artifact URI.
    :param artifact_path: The run-relative artifact path. For example,
                          ``path/to/artifact``. If unspecified, the artifact root URI for the
                          specified run will be returned.
    :return: An *absolute* URI referring to the specified artifact or the specified run's artifact
             root. For example, if an artifact path is provided and the specified run uses an
             S3-backed  store, this may be a uri of the form
             ``s3://<bucket_name>/path/to/artifact/root/path/to/artifact``. If an artifact path
             is not provided and the specified run uses an S3-backed store, this may be a URI of
             the form ``s3://<bucket_name>/path/to/artifact/root``.
    """
    if not run_id:
        raise MlflowException(
                message="A run_id must be specified in order to obtain an artifact uri!",
                error_code=INVALID_PARAMETER_VALUE)

    store = _get_store()
    run = store.get_run(run_id)
    if artifact_path is None:
        return run.info.artifact_uri
    else:
        # Path separators may not be consistent across all artifact repositories. Therefore, when
        # joining the run's artifact root directory with the artifact's relative path, we use the
        # path module defined by the appropriate artifact repository
        artifact_path_module =\
            ArtifactRepository.from_artifact_uri(run.info.artifact_uri, store).get_path_module()
        return artifact_path_module.join(run.info.artifact_uri, artifact_path)
Ejemplo n.º 6
0
def _get_model_log_dir(model_name, run_id):
    if not run_id:
        raise Exception("Must specify a run_id to get logging directory for a model.")
    store = _get_store()
    run = store.get_run(run_id)
    artifact_repo = ArtifactRepository.from_artifact_uri(run.info.artifact_uri, store)
    return artifact_repo.download_artifacts(model_name)
Ejemplo n.º 7
0
    def test_basic_functions(self):
        with TempDir() as tmp:
            # Create a mock S3 bucket in moto
            # Note that we must set these as environment variables in case users
            # so that boto does not attempt to assume credentials from the ~/.aws/config
            # or IAM role. moto does not correctly pass the arguments to boto3.client().
            os.environ["AWS_ACCESS_KEY_ID"] = "a"
            os.environ["AWS_SECRET_ACCESS_KEY"] = "b"
            s3 = boto3.client("s3")
            s3.create_bucket(Bucket="test_bucket")

            repo = ArtifactRepository.from_artifact_uri(
                "s3://test_bucket/some/path")
            self.assertIsInstance(repo, S3ArtifactRepository)
            self.assertListEqual(repo.list_artifacts(), [])
            with self.assertRaises(Exception):
                open(repo.download_artifacts("test.txt")).read()

            # Create and log a test.txt file directly
            with open(tmp.path("test.txt"), "w") as f:
                f.write("Hello world!")
            repo.log_artifact(tmp.path("test.txt"))
            text = open(repo.download_artifacts("test.txt")).read()
            self.assertEqual(text, "Hello world!")
            # Check that it actually made it to S3
            obj = s3.get_object(Bucket="test_bucket", Key="some/path/test.txt")
            text = obj["Body"].read().decode('utf-8')
            self.assertEqual(text, "Hello world!")

            # Create a subdirectory for log_artifacts
            os.mkdir(tmp.path("subdir"))
            os.mkdir(tmp.path("subdir", "nested"))
            with open(tmp.path("subdir", "a.txt"), "w") as f:
                f.write("A")
            with open(tmp.path("subdir", "b.txt"), "w") as f:
                f.write("B")
            with open(tmp.path("subdir", "nested", "c.txt"), "w") as f:
                f.write("C")
            repo.log_artifacts(tmp.path("subdir"))
            text = open(repo.download_artifacts("a.txt")).read()
            self.assertEqual(text, "A")
            text = open(repo.download_artifacts("b.txt")).read()
            self.assertEqual(text, "B")
            text = open(repo.download_artifacts("nested/c.txt")).read()
            self.assertEqual(text, "C")
            infos = sorted([(f.path, f.is_dir, f.file_size)
                            for f in repo.list_artifacts()])
            self.assertListEqual(infos, [("a.txt", False, 1),
                                         ("b.txt", False, 1),
                                         ("nested", True, None),
                                         ("test.txt", False, 12)])
            infos = sorted([(f.path, f.is_dir, f.file_size)
                            for f in repo.list_artifacts("nested")])
            self.assertListEqual(infos, [("nested/c.txt", False, 1)])

            # Download a subdirectory
            downloaded_dir = repo.download_artifacts("nested")
            self.assertEqual(os.path.basename(downloaded_dir), "nested")
            text = open(os.path.join(downloaded_dir, "c.txt")).read()
            self.assertEqual(text, "C")
Ejemplo n.º 8
0
    def log_artifact(self, artifact_uri, local_path, artifact_path=None):
        """Writes a local file to the remote artifact_uri.

        :param local_path: of the file to write
        :param artifact_path: If provided, will be directory in artifact_uri to write to"""
        artifact_repo = ArtifactRepository.from_artifact_uri(
            artifact_uri, self.store)
        artifact_repo.log_artifact(local_path, artifact_path)
Ejemplo n.º 9
0
def test_artifact_uri_factory(mock_client):
    # pylint: disable=unused-argument
    # We pass in the mock_client here to clear Azure environment variables, but we don't use it;
    # We do need to set up a fake access key for the code to run though
    os.environ['AZURE_STORAGE_ACCESS_KEY'] = ''
    repo = ArtifactRepository.from_artifact_uri(TEST_URI, mock.Mock())
    assert isinstance(repo, AzureBlobArtifactRepository)
    del os.environ['AZURE_STORAGE_ACCESS_KEY']
Ejemplo n.º 10
0
def _get_model_log_dir(model_name, run_id):
    if not run_id:
        raise Exception("Must specify a run_id to get logging directory for a model.")
    run = _get_store().get_run(run_id)
    if run.info.artifact_uri:
        artifact_repo = ArtifactRepository.from_artifact_uri(run.info.artifact_uri)
    else:
        artifact_repo = _get_legacy_artifact_repo(_get_store(), run.info)
    return artifact_repo.download_artifacts(model_name)
Ejemplo n.º 11
0
    def log_artifacts(self, run_id, local_dir, artifact_path=None):
        """
        Write a directory of files to the remote ``artifact_uri``.

        :param local_dir: Path to the directory of files to write.
        :param artifact_path: If provided, the directory in ``artifact_uri`` to write to.
        """
        run = self.get_run(run_id)
        artifact_repo = ArtifactRepository.from_artifact_uri(run.info.artifact_uri, self.store)
        artifact_repo.log_artifacts(local_dir, artifact_path)
Ejemplo n.º 12
0
    def log_artifacts(self, run_id, local_dir, artifact_path=None):
        """Writes a directory of files to the run's artifact directory.

        :param local_dir: of the files to write
        :param artifact_path: If provided, will be directory in the run to write into
        """
        run = self.get_run(run_id)
        artifact_repo = ArtifactRepository.from_artifact_uri(
            run.info.artifact_uri, self.store)
        artifact_repo.log_artifacts(local_dir, artifact_path)
Ejemplo n.º 13
0
    def test_basic_functions(self):
        with TempDir() as tmp:
            # Create a mock S3 bucket in moto
            s3 = boto3.client("s3")
            s3.create_bucket(Bucket="test_bucket")

            repo = ArtifactRepository.from_artifact_uri(
                "s3://test_bucket/some/path")
            self.assertIsInstance(repo, S3ArtifactRepository)
            self.assertListEqual(repo.list_artifacts(), [])
            with self.assertRaises(Exception):
                open(repo.download_artifacts("test.txt")).read()

            # Create and log a test.txt file directly
            with open(tmp.path("test.txt"), "w") as f:
                f.write("Hello world!")
            repo.log_artifact(tmp.path("test.txt"))
            text = open(repo.download_artifacts("test.txt")).read()
            self.assertEqual(text, "Hello world!")
            # Check that it actually made it to S3
            obj = s3.get_object(Bucket="test_bucket", Key="some/path/test.txt")
            text = obj["Body"].read().decode('utf-8')
            self.assertEqual(text, "Hello world!")

            # Create a subdirectory for log_artifacts
            os.mkdir(tmp.path("subdir"))
            os.mkdir(tmp.path("subdir", "nested"))
            with open(tmp.path("subdir", "a.txt"), "w") as f:
                f.write("A")
            with open(tmp.path("subdir", "b.txt"), "w") as f:
                f.write("B")
            with open(tmp.path("subdir", "nested", "c.txt"), "w") as f:
                f.write("C")
            repo.log_artifacts(tmp.path("subdir"))
            text = open(repo.download_artifacts("a.txt")).read()
            self.assertEqual(text, "A")
            text = open(repo.download_artifacts("b.txt")).read()
            self.assertEqual(text, "B")
            text = open(repo.download_artifacts("nested/c.txt")).read()
            self.assertEqual(text, "C")
            infos = sorted([(f.path, f.is_dir, f.file_size)
                            for f in repo.list_artifacts()])
            self.assertListEqual(infos, [("a.txt", False, 1),
                                         ("b.txt", False, 1),
                                         ("nested", True, None),
                                         ("test.txt", False, 12)])
            infos = sorted([(f.path, f.is_dir, f.file_size)
                            for f in repo.list_artifacts("nested")])
            self.assertListEqual(infos, [("nested/c.txt", False, 1)])

            # Download a subdirectory
            downloaded_dir = repo.download_artifacts("nested")
            self.assertEqual(os.path.basename(downloaded_dir), "nested")
            text = open(os.path.join(downloaded_dir, "c.txt")).read()
            self.assertEqual(text, "C")
Ejemplo n.º 14
0
def list_artifacts(run_id, artifact_path):
    """
    Return all the artifacts directly under run's root artifact directory,
    or a sub-directory. The output is a JSON-formatted list.
    """
    artifact_path = artifact_path if artifact_path is not None else ""
    store = _get_store()
    artifact_uri = store.get_run(run_id).info.artifact_uri
    artifact_repo = ArtifactRepository.from_artifact_uri(artifact_uri, store)
    file_infos = artifact_repo.list_artifacts(artifact_path)
    print(_file_infos_to_json(file_infos))
Ejemplo n.º 15
0
def download_artifacts(run_id, artifact_path):
    """
    Download an artifact file or directory to a local directory.
    The output is the name of the file or directory on the local disk.
    """
    artifact_path = artifact_path if artifact_path is not None else ""
    store = _get_store()
    artifact_uri = store.get_run(run_id).info.artifact_uri
    artifact_repo = ArtifactRepository.from_artifact_uri(artifact_uri, store)
    artifact_location = artifact_repo.download_artifacts(artifact_path)
    print(artifact_location)
Ejemplo n.º 16
0
def log_artifacts(local_dir, run_id, artifact_path):
    """
    Logs the files within a local directory as an artifact of a run, optionally
    within a run-specific artifact path. Run artifacts can be organized into
    directories, so you can place the artifact in a directory this way.
    """
    store = _get_store()
    artifact_uri = store.get_run(run_id).info.artifact_uri
    artifact_repo = ArtifactRepository.from_artifact_uri(artifact_uri, store)
    artifact_repo.log_artifacts(local_dir, artifact_path)
    _logger.info("Logged artifact from local dir %s to artifact_path=%s",
                 local_dir, artifact_path)
Ejemplo n.º 17
0
    def download_artifacts(self, run_id, path):
        """
        Download an artifact file or directory from a run to a local directory if applicable,
        and return a local path for it.

        :param run_id: The run to download artifacts from.
        :param path: Relative source path to the desired artifact.
        :return: Local path of desired artifact.
        """
        run = self.get_run(run_id)
        artifact_root = run.info.artifact_uri
        artifact_repo = ArtifactRepository.from_artifact_uri(artifact_root, self.store)
        return artifact_repo.download_artifacts(path)
Ejemplo n.º 18
0
    def list_artifacts(self, run_id, path=None):
        """
        List the artifacts for a run.

        :param run_id: The run to list artifacts from.
        :param path: The run's relative artifact path to list from. By default it is set to None
                     or the root artifact path.
        :return: List of :py:class:`mlflow.entities.FileInfo`
        """
        run = self.get_run(run_id)
        artifact_root = run.info.artifact_uri
        artifact_repo = ArtifactRepository.from_artifact_uri(artifact_root, self.store)
        return artifact_repo.list_artifacts(path)
    def test_basic_functions(self):
        with TempDir() as test_root, TempDir() as tmp:
            repo = ArtifactRepository.from_artifact_uri(
                test_root.path(), Mock())
            self.assertIsInstance(repo, LocalArtifactRepository)
            self.assertListEqual(repo.list_artifacts(), [])
            with self.assertRaises(Exception):
                open(repo.download_artifacts("test.txt")).read()

            # Create and log a test.txt file directly
            with open(tmp.path("test.txt"), "w") as f:
                f.write("Hello world!")
            repo.log_artifact(tmp.path("test.txt"))
            text = open(repo.download_artifacts("test.txt")).read()
            self.assertEqual(text, "Hello world!")
            # Check that it actually got written in the expected place
            text = open(os.path.join(test_root.path(), "test.txt")).read()
            self.assertEqual(text, "Hello world!")

            # Create a subdirectory for log_artifacts
            os.mkdir(tmp.path("subdir"))
            os.mkdir(tmp.path("subdir", "nested"))
            with open(tmp.path("subdir", "a.txt"), "w") as f:
                f.write("A")
            with open(tmp.path("subdir", "b.txt"), "w") as f:
                f.write("B")
            with open(tmp.path("subdir", "nested", "c.txt"), "w") as f:
                f.write("C")
            repo.log_artifacts(tmp.path("subdir"))
            text = open(repo.download_artifacts("a.txt")).read()
            self.assertEqual(text, "A")
            text = open(repo.download_artifacts("b.txt")).read()
            self.assertEqual(text, "B")
            text = open(repo.download_artifacts("nested/c.txt")).read()
            self.assertEqual(text, "C")
            infos = sorted([(f.path, f.is_dir, f.file_size)
                            for f in repo.list_artifacts()])
            self.assertListEqual(infos, [("a.txt", False, 1),
                                         ("b.txt", False, 1),
                                         ("nested", True, None),
                                         ("test.txt", False, 12)])
            infos = sorted([(f.path, f.is_dir, f.file_size)
                            for f in repo.list_artifacts("nested")])
            self.assertListEqual(infos, [("nested/c.txt", False, 1)])

            # Download a subdirectory
            downloaded_dir = repo.download_artifacts("nested")
            self.assertEqual(os.path.basename(downloaded_dir), "nested")
            text = open(os.path.join(downloaded_dir, "c.txt")).read()
            self.assertEqual(text, "C")
Ejemplo n.º 20
0
    def log_artifacts(self, run_id, local_dir, artifact_path=None):
        """
        Write a directory of files to the remote ``artifact_uri``.

        :param local_dir: Path to the directory of files to write.
        :param artifact_path: If provided, the directory in ``artifact_uri`` to write to.
        """
        run = self.get_run(run_id)
        artifact_repo = ArtifactRepository.from_artifact_uri(
            run.info.artifact_uri, self.store)
        artifact_repo.log_artifacts(local_dir, artifact_path)

        self.set_tag(
            run_id, "Dir uploaded", "Dir: " + local_dir + ", uploaded into: " +
            run.info.artifact_uri + " at time: " +
            str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) +
            ". Access link: " +
            "https://s3.console.aws.amazon.com/s3/object/" +
            run.info.artifact_uri[5:])
Ejemplo n.º 21
0
    def test_basic_functions(self):
        with TempDir() as test_root, TempDir() as tmp:
            repo = ArtifactRepository.from_artifact_uri(
                test_root.path(), Mock())
            self.assertIsInstance(repo, LocalArtifactRepository)
            self.assertListEqual(repo.list_artifacts(), [])
            with self.assertRaises(Exception):
                open(repo.download_artifacts("test.txt")).read()

            # Create and log a test.txt file directly
            artifact_name = "test.txt"
            local_file = tmp.path(artifact_name)
            with open(local_file, "w") as f:
                f.write("Hello world!")
            repo.log_artifact(local_file)
            text = open(repo.download_artifacts(artifact_name)).read()
            self.assertEqual(text, "Hello world!")
            # Check that it actually got written in the expected place
            text = open(os.path.join(test_root.path(), artifact_name)).read()
            self.assertEqual(text, "Hello world!")

            # log artifact in subdir
            repo.log_artifact(local_file, "aaa")
            text = open(
                repo.download_artifacts(os.path.join("aaa",
                                                     artifact_name))).read()
            self.assertEqual(text, "Hello world!")

            # log a hidden artifact
            hidden_file = tmp.path(".mystery")
            with open(hidden_file, 'w') as f:
                f.write("42")
            repo.log_artifact(hidden_file, "aaa")
            hidden_text = open(
                repo.download_artifacts(os.path.join("aaa",
                                                     hidden_file))).read()
            self.assertEqual(hidden_text, "42")

            # log artifacts in deep nested subdirs
            nested_subdir = "bbb/ccc/ddd/eee/fghi"
            repo.log_artifact(local_file, nested_subdir)
            text = open(
                repo.download_artifacts(
                    os.path.join(nested_subdir, artifact_name))).read()
            self.assertEqual(text, "Hello world!")

            for bad_path in [
                    "/", "//", "/tmp", "/bad_path", ".", "../terrible_path"
            ]:
                with self.assertRaises(Exception):
                    repo.log_artifact(local_file, bad_path)

            # Create a subdirectory for log_artifacts
            os.mkdir(tmp.path("subdir"))
            os.mkdir(tmp.path("subdir", "nested"))
            with open(tmp.path("subdir", "a.txt"), "w") as f:
                f.write("A")
            with open(tmp.path("subdir", "b.txt"), "w") as f:
                f.write("B")
            with open(tmp.path("subdir", "nested", "c.txt"), "w") as f:
                f.write("C")
            repo.log_artifacts(tmp.path("subdir"))
            text = open(repo.download_artifacts("a.txt")).read()
            self.assertEqual(text, "A")
            text = open(repo.download_artifacts("b.txt")).read()
            self.assertEqual(text, "B")
            text = open(repo.download_artifacts("nested/c.txt")).read()
            self.assertEqual(text, "C")
            infos = self._get_contents(repo, None)
            self.assertListEqual(infos, [
                ("a.txt", False, 1),
                ("aaa", True, None),
                ("b.txt", False, 1),
                ("bbb", True, None),
                ("nested", True, None),
                ("test.txt", False, 12),
            ])

            # Verify contents of subdirectories
            self.assertListEqual(self._get_contents(repo, "nested"),
                                 [("nested/c.txt", False, 1)])

            infos = self._get_contents(repo, "aaa")
            self.assertListEqual(infos, [("aaa/.mystery", False, 2),
                                         ("aaa/test.txt", False, 12)])
            self.assertListEqual(self._get_contents(repo, "bbb"),
                                 [("bbb/ccc", True, None)])
            self.assertListEqual(self._get_contents(repo, "bbb/ccc"),
                                 [("bbb/ccc/ddd", True, None)])

            infos = self._get_contents(repo, "bbb/ccc/ddd/eee")
            self.assertListEqual(infos, [("bbb/ccc/ddd/eee/fghi", True, None)])

            infos = self._get_contents(repo, "bbb/ccc/ddd/eee/fghi")
            self.assertListEqual(
                infos, [("bbb/ccc/ddd/eee/fghi/test.txt", False, 12)])

            # Download a subdirectory
            downloaded_dir = repo.download_artifacts("nested")
            self.assertEqual(os.path.basename(downloaded_dir), "nested")
            text = open(os.path.join(downloaded_dir, "c.txt")).read()
            self.assertEqual(text, "C")
Ejemplo n.º 22
0
def test_artifact_uri_factory():
    repo = ArtifactRepository.from_artifact_uri("gs://test_bucket/some/path")
    assert isinstance(repo, GCSArtifactRepository)
Ejemplo n.º 23
0
def get_artifact_repository(run_uuid, store=None):
    from mlflow.store.artifact_repo import ArtifactRepository

    store = _ensure_store(store)
    run = store.get_run(run_uuid)
    return ArtifactRepository.from_artifact_uri(run.info.artifact_uri, store)
Ejemplo n.º 24
0
def test_artifact_uri_factory():
    repo = ArtifactRepository.from_artifact_uri(
        "ftp://*****:*****@test_ftp:123/some/path", Mock())
    assert isinstance(repo, FTPArtifactRepository)
Ejemplo n.º 25
0
def _get_legacy_artifact_repo(file_store, run_info):
    # TODO(aaron) Remove this once everyone locally only has runs from after
    # the introduction of "artifact_uri".
    uri = os.path.join(file_store.root_directory, str(run_info.experiment_id),
                       run_info.run_uuid, "artifacts")
    return ArtifactRepository.from_artifact_uri(uri, file_store)
Ejemplo n.º 26
0
def _get_artifact_repo(run_info, store):
    if run_info.artifact_uri:
        return ArtifactRepository.from_artifact_uri(run_info.artifact_uri,
                                                    store)
    else:
        return _get_legacy_artifact_repo(store, run_info)
Ejemplo n.º 27
0
def test_artifact_uri_factory():
    from paramiko.ssh_exception import SSHException
    with pytest.raises(SSHException):
        ArtifactRepository.from_artifact_uri(
            "sftp://*****:*****@test_sftp:123/some/path", Mock())
Ejemplo n.º 28
0
def _get_artifact_repo(run):
    store = _get_store()
    return ArtifactRepository.from_artifact_uri(run.info.artifact_uri, store)