def test_log_artifacts():
    for artifact_path in [None, "sub_dir", "very/nested/sub/dir"]:
        file_content_1 = 'A simple test artifact\nThe artifact is located in: ' + str(
            artifact_path)
        file_content_2 = os.urandom(300)

        file1 = "meta.yaml"
        directory = "saved_model"
        file2 = "sk_model.pickle"
        with TempDir() as local, TempDir() as remote:
            with open(os.path.join(local.path(), file1), "w") as f:
                f.write(file_content_1)
            os.mkdir(os.path.join(local.path(), directory))
            with open(os.path.join(local.path(), directory, file2), "wb") as f:
                f.write(file_content_2)

            sftp_path = "sftp://" + remote.path()
            store = SFTPArtifactRepository(sftp_path)
            store.log_artifacts(local.path(), artifact_path)

            remote_dir = os.path.join(
                remote.path(), '.' if artifact_path is None else artifact_path)
            assert os.path.isdir(remote_dir)
            assert os.path.isdir(os.path.join(remote_dir, directory))
            assert os.path.isfile(os.path.join(remote_dir, file1))
            assert os.path.isfile(os.path.join(remote_dir, directory, file2))

            with open(os.path.join(remote_dir, file1), 'r') as remote_content:
                assert remote_content.read() == file_content_1

            with open(os.path.join(remote_dir, directory, file2),
                      'rb') as remote_content:
                assert remote_content.read() == file_content_2
Beispiel #2
0
def test_list_artifacts(sftp_mock):
    artifact_root_path = "/experiment_id/run_id/"
    repo = SFTPArtifactRepository("sftp://test_sftp"+artifact_root_path, sftp_mock)

    # mocked file structure
    #  |- file
    #  |- model
    #     |- model.pb

    file_path = "file"
    file_size = 678
    dir_path = "model"
    sftp_mock.isdir = MagicMock(side_effect=lambda path: {
            (artifact_root_path+file_path): False,
            (artifact_root_path+dir_path): True
        }[path])
    sftp_mock.listdir = MagicMock(return_value=[file_path, dir_path])

    file_stat = MagicMock()
    file_stat.configure_mock(st_size=file_size)
    sftp_mock.stat = MagicMock(return_value=file_stat)

    artifacts = repo.list_artifacts(path=None)

    sftp_mock.listdir.assert_called_once_with(artifact_root_path)
    sftp_mock.stat.assert_called_once_with(artifact_root_path + file_path)

    assert len(artifacts) == 2
    assert artifacts[0].path == file_path
    assert artifacts[0].is_dir is False
    assert artifacts[0].file_size == file_size
    assert artifacts[1].path == dir_path
    assert artifacts[1].is_dir is True
    assert artifacts[1].file_size is None
Beispiel #3
0
 def from_artifact_uri(artifact_uri, store):
     """
     Given an artifact URI for an Experiment Run (e.g., /local/file/path or s3://my/bucket),
     returns an ArtifactReposistory instance capable of logging and downloading artifacts
     on behalf of this URI.
     :param store: An instance of AbstractStore which the artifacts are registered in.
     """
     if artifact_uri.startswith("s3:/"):
         # Import these locally to avoid creating a circular import loop
         from mlflow.store.s3_artifact_repo import S3ArtifactRepository
         return S3ArtifactRepository(artifact_uri)
     elif artifact_uri.startswith("gs:/"):
         from mlflow.store.gcs_artifact_repo import GCSArtifactRepository
         return GCSArtifactRepository(artifact_uri)
     elif artifact_uri.startswith("wasbs:/"):
         from mlflow.store.azure_blob_artifact_repo import AzureBlobArtifactRepository
         return AzureBlobArtifactRepository(artifact_uri)
     elif artifact_uri.startswith("sftp:/"):
         from mlflow.store.sftp_artifact_repo import SFTPArtifactRepository
         return SFTPArtifactRepository(artifact_uri)
     elif artifact_uri.startswith("dbfs:/"):
         from mlflow.store.dbfs_artifact_repo import DbfsArtifactRepository
         if not isinstance(store, DatabricksStore):
             raise MlflowException(
                 '`store` must be an instance of DatabricksStore.')
         return DbfsArtifactRepository(artifact_uri,
                                       store.http_request_kwargs)
     else:
         from mlflow.store.local_artifact_repo import LocalArtifactRepository
         return LocalArtifactRepository(artifact_uri)
def test_log_artifact():
    for artifact_path in [None, "sub_dir", "very/nested/sub/dir"]:
        file_content = 'A simple test artifact\nThe artifact is located in: ' + str(
            artifact_path)
        with NamedTemporaryFile(mode="w") as local, TempDir() as remote:
            local.write(file_content)
            local.flush()

            sftp_path = "sftp://" + remote.path()
            store = SFTPArtifactRepository(sftp_path)
            store.log_artifact(local.name, artifact_path)

            remote_file = os.path.join(
                remote.path(), '.' if artifact_path is None else artifact_path,
                os.path.basename(local.name))
            assert os.path.isfile(remote_file)

            with open(remote_file, 'r') as remote_content:
                assert remote_content.read() == file_content
def test_list_artifacts_with_subdir(sftp_mock):
    artifact_root_path = "/experiment_id/run_id/"
    repo = SFTPArtifactRepository("sftp://test_sftp" + artifact_root_path,
                                  sftp_mock)

    # mocked file structure
    #  |- model
    #     |- model.pb
    #     |- variables
    dir_name = 'model'

    # list artifacts at sub directory level
    file_path = 'model.pb'
    file_size = 345
    subdir_name = 'variables'

    sftp_mock.listdir = MagicMock(return_value=[file_path, subdir_name])

    sftp_mock.isdir = MagicMock(
        side_effect=lambda path: {
            posixpath.join(artifact_root_path, dir_name): True,
            posixpath.join(artifact_root_path, dir_name, file_path): False,
            posixpath.join(artifact_root_path, dir_name, subdir_name): True,
        }[path])

    file_stat = MagicMock()
    file_stat.configure_mock(st_size=file_size)
    sftp_mock.stat = MagicMock(return_value=file_stat)

    artifacts = repo.list_artifacts(path=dir_name)

    sftp_mock.listdir.assert_called_once_with(artifact_root_path + dir_name)
    sftp_mock.stat.assert_called_once_with(artifact_root_path + dir_name +
                                           '/' + file_path)

    assert len(artifacts) == 2
    assert artifacts[0].path == posixpath.join(dir_name, file_path)
    assert artifacts[0].is_dir is False
    assert artifacts[0].file_size == file_size
    assert artifacts[1].path == posixpath.join(dir_name, subdir_name)
    assert artifacts[1].is_dir is True
    assert artifacts[1].file_size is None
def test_list_artifacts_empty(sftp_mock):
    repo = SFTPArtifactRepository("sftp://test_sftp/some/path", sftp_mock)
    sftp_mock.listdir = MagicMock(return_value=[])
    assert repo.list_artifacts() == []
    sftp_mock.listdir.assert_called_once_with("/some/path")