def _get_artifact_repo(run): if run.info.artifact_uri: return ArtifactRepository.from_artifact_uri(run.info.artifact_uri) # TODO(aaron) Remove this once everyone locally only has runs from after # the introduction of "artifact_uri". uri = os.path.join(_get_store().root_directory, str(run.info.experiment_id), run.info.run_uuid, "artifacts") return ArtifactRepository.from_artifact_uri(uri)
def _download_artifact_from_uri(artifact_uri, output_path=None): """ :param artifact_uri: The *absolute* URI of the artifact to download. :param output_path: The local filesystem path to which to download the artifact. If unspecified, a local output path will be created. """ store = _get_store() artifact_path_module =\ ArtifactRepository.from_artifact_uri(artifact_uri, store).get_path_module() artifact_src_dir = artifact_path_module.dirname(artifact_uri) artifact_src_relative_path = artifact_path_module.basename(artifact_uri) artifact_repo = ArtifactRepository.from_artifact_uri( artifact_uri=artifact_src_dir, store=store) return artifact_repo.download_artifacts( artifact_path=artifact_src_relative_path, dst_path=output_path)
def __init__(self, run_info, store): self.store = store self.run_info = run_info if run_info.artifact_uri: self.artifact_repo = ArtifactRepository.from_artifact_uri(run_info.artifact_uri) else: self.artifact_repo = _get_legacy_artifact_repo(store, run_info)
def log_artifacts(self, artifact_uri, local_dir, artifact_path=None): """Writes a directory of files to the remote artifact_uri. :param: local_dir of the file to write :param: artifact_path If provided, will be directory in artifact_uri to write to""" artifact_repo = ArtifactRepository.from_artifact_uri( artifact_uri, self.store) artifact_repo.log_artifacts(local_dir, artifact_path)
def get_artifact_uri(run_id, artifact_path=None): """ Get the absolute URI of the specified artifact in the specified run. If `path` is not specified, the artifact root URI of the specified run will be returned; calls to ``log_artifact`` and ``log_artifacts`` write artifact(s) to subdirectories of the artifact root URI. :param run_id: The ID of the run for which to obtain an absolute artifact URI. :param artifact_path: The run-relative artifact path. For example, ``path/to/artifact``. If unspecified, the artifact root URI for the specified run will be returned. :return: An *absolute* URI referring to the specified artifact or the specified run's artifact root. For example, if an artifact path is provided and the specified run uses an S3-backed store, this may be a uri of the form ``s3://<bucket_name>/path/to/artifact/root/path/to/artifact``. If an artifact path is not provided and the specified run uses an S3-backed store, this may be a URI of the form ``s3://<bucket_name>/path/to/artifact/root``. """ if not run_id: raise MlflowException( message="A run_id must be specified in order to obtain an artifact uri!", error_code=INVALID_PARAMETER_VALUE) store = _get_store() run = store.get_run(run_id) if artifact_path is None: return run.info.artifact_uri else: # Path separators may not be consistent across all artifact repositories. Therefore, when # joining the run's artifact root directory with the artifact's relative path, we use the # path module defined by the appropriate artifact repository artifact_path_module =\ ArtifactRepository.from_artifact_uri(run.info.artifact_uri, store).get_path_module() return artifact_path_module.join(run.info.artifact_uri, artifact_path)
def _get_model_log_dir(model_name, run_id): if not run_id: raise Exception("Must specify a run_id to get logging directory for a model.") store = _get_store() run = store.get_run(run_id) artifact_repo = ArtifactRepository.from_artifact_uri(run.info.artifact_uri, store) return artifact_repo.download_artifacts(model_name)
def test_basic_functions(self): with TempDir() as tmp: # Create a mock S3 bucket in moto # Note that we must set these as environment variables in case users # so that boto does not attempt to assume credentials from the ~/.aws/config # or IAM role. moto does not correctly pass the arguments to boto3.client(). os.environ["AWS_ACCESS_KEY_ID"] = "a" os.environ["AWS_SECRET_ACCESS_KEY"] = "b" s3 = boto3.client("s3") s3.create_bucket(Bucket="test_bucket") repo = ArtifactRepository.from_artifact_uri( "s3://test_bucket/some/path") self.assertIsInstance(repo, S3ArtifactRepository) self.assertListEqual(repo.list_artifacts(), []) with self.assertRaises(Exception): open(repo.download_artifacts("test.txt")).read() # Create and log a test.txt file directly with open(tmp.path("test.txt"), "w") as f: f.write("Hello world!") repo.log_artifact(tmp.path("test.txt")) text = open(repo.download_artifacts("test.txt")).read() self.assertEqual(text, "Hello world!") # Check that it actually made it to S3 obj = s3.get_object(Bucket="test_bucket", Key="some/path/test.txt") text = obj["Body"].read().decode('utf-8') self.assertEqual(text, "Hello world!") # Create a subdirectory for log_artifacts os.mkdir(tmp.path("subdir")) os.mkdir(tmp.path("subdir", "nested")) with open(tmp.path("subdir", "a.txt"), "w") as f: f.write("A") with open(tmp.path("subdir", "b.txt"), "w") as f: f.write("B") with open(tmp.path("subdir", "nested", "c.txt"), "w") as f: f.write("C") repo.log_artifacts(tmp.path("subdir")) text = open(repo.download_artifacts("a.txt")).read() self.assertEqual(text, "A") text = open(repo.download_artifacts("b.txt")).read() self.assertEqual(text, "B") text = open(repo.download_artifacts("nested/c.txt")).read() self.assertEqual(text, "C") infos = sorted([(f.path, f.is_dir, f.file_size) for f in repo.list_artifacts()]) self.assertListEqual(infos, [("a.txt", False, 1), ("b.txt", False, 1), ("nested", True, None), ("test.txt", False, 12)]) infos = sorted([(f.path, f.is_dir, f.file_size) for f in repo.list_artifacts("nested")]) self.assertListEqual(infos, [("nested/c.txt", False, 1)]) # Download a subdirectory downloaded_dir = repo.download_artifacts("nested") self.assertEqual(os.path.basename(downloaded_dir), "nested") text = open(os.path.join(downloaded_dir, "c.txt")).read() self.assertEqual(text, "C")
def log_artifact(self, artifact_uri, local_path, artifact_path=None): """Writes a local file to the remote artifact_uri. :param local_path: of the file to write :param artifact_path: If provided, will be directory in artifact_uri to write to""" artifact_repo = ArtifactRepository.from_artifact_uri( artifact_uri, self.store) artifact_repo.log_artifact(local_path, artifact_path)
def test_artifact_uri_factory(mock_client): # pylint: disable=unused-argument # We pass in the mock_client here to clear Azure environment variables, but we don't use it; # We do need to set up a fake access key for the code to run though os.environ['AZURE_STORAGE_ACCESS_KEY'] = '' repo = ArtifactRepository.from_artifact_uri(TEST_URI, mock.Mock()) assert isinstance(repo, AzureBlobArtifactRepository) del os.environ['AZURE_STORAGE_ACCESS_KEY']
def _get_model_log_dir(model_name, run_id): if not run_id: raise Exception("Must specify a run_id to get logging directory for a model.") run = _get_store().get_run(run_id) if run.info.artifact_uri: artifact_repo = ArtifactRepository.from_artifact_uri(run.info.artifact_uri) else: artifact_repo = _get_legacy_artifact_repo(_get_store(), run.info) return artifact_repo.download_artifacts(model_name)
def log_artifacts(self, run_id, local_dir, artifact_path=None): """ Write a directory of files to the remote ``artifact_uri``. :param local_dir: Path to the directory of files to write. :param artifact_path: If provided, the directory in ``artifact_uri`` to write to. """ run = self.get_run(run_id) artifact_repo = ArtifactRepository.from_artifact_uri(run.info.artifact_uri, self.store) artifact_repo.log_artifacts(local_dir, artifact_path)
def log_artifacts(self, run_id, local_dir, artifact_path=None): """Writes a directory of files to the run's artifact directory. :param local_dir: of the files to write :param artifact_path: If provided, will be directory in the run to write into """ run = self.get_run(run_id) artifact_repo = ArtifactRepository.from_artifact_uri( run.info.artifact_uri, self.store) artifact_repo.log_artifacts(local_dir, artifact_path)
def test_basic_functions(self): with TempDir() as tmp: # Create a mock S3 bucket in moto s3 = boto3.client("s3") s3.create_bucket(Bucket="test_bucket") repo = ArtifactRepository.from_artifact_uri( "s3://test_bucket/some/path") self.assertIsInstance(repo, S3ArtifactRepository) self.assertListEqual(repo.list_artifacts(), []) with self.assertRaises(Exception): open(repo.download_artifacts("test.txt")).read() # Create and log a test.txt file directly with open(tmp.path("test.txt"), "w") as f: f.write("Hello world!") repo.log_artifact(tmp.path("test.txt")) text = open(repo.download_artifacts("test.txt")).read() self.assertEqual(text, "Hello world!") # Check that it actually made it to S3 obj = s3.get_object(Bucket="test_bucket", Key="some/path/test.txt") text = obj["Body"].read().decode('utf-8') self.assertEqual(text, "Hello world!") # Create a subdirectory for log_artifacts os.mkdir(tmp.path("subdir")) os.mkdir(tmp.path("subdir", "nested")) with open(tmp.path("subdir", "a.txt"), "w") as f: f.write("A") with open(tmp.path("subdir", "b.txt"), "w") as f: f.write("B") with open(tmp.path("subdir", "nested", "c.txt"), "w") as f: f.write("C") repo.log_artifacts(tmp.path("subdir")) text = open(repo.download_artifacts("a.txt")).read() self.assertEqual(text, "A") text = open(repo.download_artifacts("b.txt")).read() self.assertEqual(text, "B") text = open(repo.download_artifacts("nested/c.txt")).read() self.assertEqual(text, "C") infos = sorted([(f.path, f.is_dir, f.file_size) for f in repo.list_artifacts()]) self.assertListEqual(infos, [("a.txt", False, 1), ("b.txt", False, 1), ("nested", True, None), ("test.txt", False, 12)]) infos = sorted([(f.path, f.is_dir, f.file_size) for f in repo.list_artifacts("nested")]) self.assertListEqual(infos, [("nested/c.txt", False, 1)]) # Download a subdirectory downloaded_dir = repo.download_artifacts("nested") self.assertEqual(os.path.basename(downloaded_dir), "nested") text = open(os.path.join(downloaded_dir, "c.txt")).read() self.assertEqual(text, "C")
def list_artifacts(run_id, artifact_path): """ Return all the artifacts directly under run's root artifact directory, or a sub-directory. The output is a JSON-formatted list. """ artifact_path = artifact_path if artifact_path is not None else "" store = _get_store() artifact_uri = store.get_run(run_id).info.artifact_uri artifact_repo = ArtifactRepository.from_artifact_uri(artifact_uri, store) file_infos = artifact_repo.list_artifacts(artifact_path) print(_file_infos_to_json(file_infos))
def download_artifacts(run_id, artifact_path): """ Download an artifact file or directory to a local directory. The output is the name of the file or directory on the local disk. """ artifact_path = artifact_path if artifact_path is not None else "" store = _get_store() artifact_uri = store.get_run(run_id).info.artifact_uri artifact_repo = ArtifactRepository.from_artifact_uri(artifact_uri, store) artifact_location = artifact_repo.download_artifacts(artifact_path) print(artifact_location)
def log_artifacts(local_dir, run_id, artifact_path): """ Logs the files within a local directory as an artifact of a run, optionally within a run-specific artifact path. Run artifacts can be organized into directories, so you can place the artifact in a directory this way. """ store = _get_store() artifact_uri = store.get_run(run_id).info.artifact_uri artifact_repo = ArtifactRepository.from_artifact_uri(artifact_uri, store) artifact_repo.log_artifacts(local_dir, artifact_path) _logger.info("Logged artifact from local dir %s to artifact_path=%s", local_dir, artifact_path)
def download_artifacts(self, run_id, path): """ Download an artifact file or directory from a run to a local directory if applicable, and return a local path for it. :param run_id: The run to download artifacts from. :param path: Relative source path to the desired artifact. :return: Local path of desired artifact. """ run = self.get_run(run_id) artifact_root = run.info.artifact_uri artifact_repo = ArtifactRepository.from_artifact_uri(artifact_root, self.store) return artifact_repo.download_artifacts(path)
def list_artifacts(self, run_id, path=None): """ List the artifacts for a run. :param run_id: The run to list artifacts from. :param path: The run's relative artifact path to list from. By default it is set to None or the root artifact path. :return: List of :py:class:`mlflow.entities.FileInfo` """ run = self.get_run(run_id) artifact_root = run.info.artifact_uri artifact_repo = ArtifactRepository.from_artifact_uri(artifact_root, self.store) return artifact_repo.list_artifacts(path)
def test_basic_functions(self): with TempDir() as test_root, TempDir() as tmp: repo = ArtifactRepository.from_artifact_uri( test_root.path(), Mock()) self.assertIsInstance(repo, LocalArtifactRepository) self.assertListEqual(repo.list_artifacts(), []) with self.assertRaises(Exception): open(repo.download_artifacts("test.txt")).read() # Create and log a test.txt file directly with open(tmp.path("test.txt"), "w") as f: f.write("Hello world!") repo.log_artifact(tmp.path("test.txt")) text = open(repo.download_artifacts("test.txt")).read() self.assertEqual(text, "Hello world!") # Check that it actually got written in the expected place text = open(os.path.join(test_root.path(), "test.txt")).read() self.assertEqual(text, "Hello world!") # Create a subdirectory for log_artifacts os.mkdir(tmp.path("subdir")) os.mkdir(tmp.path("subdir", "nested")) with open(tmp.path("subdir", "a.txt"), "w") as f: f.write("A") with open(tmp.path("subdir", "b.txt"), "w") as f: f.write("B") with open(tmp.path("subdir", "nested", "c.txt"), "w") as f: f.write("C") repo.log_artifacts(tmp.path("subdir")) text = open(repo.download_artifacts("a.txt")).read() self.assertEqual(text, "A") text = open(repo.download_artifacts("b.txt")).read() self.assertEqual(text, "B") text = open(repo.download_artifacts("nested/c.txt")).read() self.assertEqual(text, "C") infos = sorted([(f.path, f.is_dir, f.file_size) for f in repo.list_artifacts()]) self.assertListEqual(infos, [("a.txt", False, 1), ("b.txt", False, 1), ("nested", True, None), ("test.txt", False, 12)]) infos = sorted([(f.path, f.is_dir, f.file_size) for f in repo.list_artifacts("nested")]) self.assertListEqual(infos, [("nested/c.txt", False, 1)]) # Download a subdirectory downloaded_dir = repo.download_artifacts("nested") self.assertEqual(os.path.basename(downloaded_dir), "nested") text = open(os.path.join(downloaded_dir, "c.txt")).read() self.assertEqual(text, "C")
def log_artifacts(self, run_id, local_dir, artifact_path=None): """ Write a directory of files to the remote ``artifact_uri``. :param local_dir: Path to the directory of files to write. :param artifact_path: If provided, the directory in ``artifact_uri`` to write to. """ run = self.get_run(run_id) artifact_repo = ArtifactRepository.from_artifact_uri( run.info.artifact_uri, self.store) artifact_repo.log_artifacts(local_dir, artifact_path) self.set_tag( run_id, "Dir uploaded", "Dir: " + local_dir + ", uploaded into: " + run.info.artifact_uri + " at time: " + str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) + ". Access link: " + "https://s3.console.aws.amazon.com/s3/object/" + run.info.artifact_uri[5:])
def test_basic_functions(self): with TempDir() as test_root, TempDir() as tmp: repo = ArtifactRepository.from_artifact_uri( test_root.path(), Mock()) self.assertIsInstance(repo, LocalArtifactRepository) self.assertListEqual(repo.list_artifacts(), []) with self.assertRaises(Exception): open(repo.download_artifacts("test.txt")).read() # Create and log a test.txt file directly artifact_name = "test.txt" local_file = tmp.path(artifact_name) with open(local_file, "w") as f: f.write("Hello world!") repo.log_artifact(local_file) text = open(repo.download_artifacts(artifact_name)).read() self.assertEqual(text, "Hello world!") # Check that it actually got written in the expected place text = open(os.path.join(test_root.path(), artifact_name)).read() self.assertEqual(text, "Hello world!") # log artifact in subdir repo.log_artifact(local_file, "aaa") text = open( repo.download_artifacts(os.path.join("aaa", artifact_name))).read() self.assertEqual(text, "Hello world!") # log a hidden artifact hidden_file = tmp.path(".mystery") with open(hidden_file, 'w') as f: f.write("42") repo.log_artifact(hidden_file, "aaa") hidden_text = open( repo.download_artifacts(os.path.join("aaa", hidden_file))).read() self.assertEqual(hidden_text, "42") # log artifacts in deep nested subdirs nested_subdir = "bbb/ccc/ddd/eee/fghi" repo.log_artifact(local_file, nested_subdir) text = open( repo.download_artifacts( os.path.join(nested_subdir, artifact_name))).read() self.assertEqual(text, "Hello world!") for bad_path in [ "/", "//", "/tmp", "/bad_path", ".", "../terrible_path" ]: with self.assertRaises(Exception): repo.log_artifact(local_file, bad_path) # Create a subdirectory for log_artifacts os.mkdir(tmp.path("subdir")) os.mkdir(tmp.path("subdir", "nested")) with open(tmp.path("subdir", "a.txt"), "w") as f: f.write("A") with open(tmp.path("subdir", "b.txt"), "w") as f: f.write("B") with open(tmp.path("subdir", "nested", "c.txt"), "w") as f: f.write("C") repo.log_artifacts(tmp.path("subdir")) text = open(repo.download_artifacts("a.txt")).read() self.assertEqual(text, "A") text = open(repo.download_artifacts("b.txt")).read() self.assertEqual(text, "B") text = open(repo.download_artifacts("nested/c.txt")).read() self.assertEqual(text, "C") infos = self._get_contents(repo, None) self.assertListEqual(infos, [ ("a.txt", False, 1), ("aaa", True, None), ("b.txt", False, 1), ("bbb", True, None), ("nested", True, None), ("test.txt", False, 12), ]) # Verify contents of subdirectories self.assertListEqual(self._get_contents(repo, "nested"), [("nested/c.txt", False, 1)]) infos = self._get_contents(repo, "aaa") self.assertListEqual(infos, [("aaa/.mystery", False, 2), ("aaa/test.txt", False, 12)]) self.assertListEqual(self._get_contents(repo, "bbb"), [("bbb/ccc", True, None)]) self.assertListEqual(self._get_contents(repo, "bbb/ccc"), [("bbb/ccc/ddd", True, None)]) infos = self._get_contents(repo, "bbb/ccc/ddd/eee") self.assertListEqual(infos, [("bbb/ccc/ddd/eee/fghi", True, None)]) infos = self._get_contents(repo, "bbb/ccc/ddd/eee/fghi") self.assertListEqual( infos, [("bbb/ccc/ddd/eee/fghi/test.txt", False, 12)]) # Download a subdirectory downloaded_dir = repo.download_artifacts("nested") self.assertEqual(os.path.basename(downloaded_dir), "nested") text = open(os.path.join(downloaded_dir, "c.txt")).read() self.assertEqual(text, "C")
def test_artifact_uri_factory(): repo = ArtifactRepository.from_artifact_uri("gs://test_bucket/some/path") assert isinstance(repo, GCSArtifactRepository)
def get_artifact_repository(run_uuid, store=None): from mlflow.store.artifact_repo import ArtifactRepository store = _ensure_store(store) run = store.get_run(run_uuid) return ArtifactRepository.from_artifact_uri(run.info.artifact_uri, store)
def test_artifact_uri_factory(): repo = ArtifactRepository.from_artifact_uri( "ftp://*****:*****@test_ftp:123/some/path", Mock()) assert isinstance(repo, FTPArtifactRepository)
def _get_legacy_artifact_repo(file_store, run_info): # TODO(aaron) Remove this once everyone locally only has runs from after # the introduction of "artifact_uri". uri = os.path.join(file_store.root_directory, str(run_info.experiment_id), run_info.run_uuid, "artifacts") return ArtifactRepository.from_artifact_uri(uri, file_store)
def _get_artifact_repo(run_info, store): if run_info.artifact_uri: return ArtifactRepository.from_artifact_uri(run_info.artifact_uri, store) else: return _get_legacy_artifact_repo(store, run_info)
def test_artifact_uri_factory(): from paramiko.ssh_exception import SSHException with pytest.raises(SSHException): ArtifactRepository.from_artifact_uri( "sftp://*****:*****@test_sftp:123/some/path", Mock())
def _get_artifact_repo(run): store = _get_store() return ArtifactRepository.from_artifact_uri(run.info.artifact_uri, store)