Example #1
0
def test_get_metrics(asyn_repo_w_mock_filesystem):
    experiment = _create_experiment_domain()
    written_metrics = [_create_metric_domain(experiment=experiment)[1] for _ in range(0, 3)]

    metric_root = f"{slugify(experiment.project_name)}/experiments/{experiment.id}/metrics"
    metric_dirs = [
        f"{asyn_repo_w_mock_filesystem.root_dir}/{metric_root}/{m.id}" for m in written_metrics
    ]

    asyn_repo_w_mock_filesystem.filesystem._ls.return_value = [
        {"name": path, "StorageClass": "DIRECTORY"} for path in metric_dirs
    ]
    asyn_repo_w_mock_filesystem.filesystem._cat_file.side_effect = [
        json.dumps(m) for m in written_metrics
    ]

    metrics = asyncio.run(
        asyn_repo_w_mock_filesystem.get_metrics(experiment.project_name, experiment.id)
    )

    assert len(metrics) == 3

    metric_ids = [m.id for m in written_metrics]
    for metric in metrics:
        assert metric.id in metric_ids
        metric_ids.remove(metric.id)
Example #2
0
def test_get_features(asyn_repo_w_mock_filesystem):
    experiment = _create_experiment_domain()
    written_features = [_create_feature_domain(experiment=experiment)[1] for _ in range(0, 3)]

    feature_root = f"{slugify(experiment.project_name)}/experiments/{experiment.id}/features"
    feature_dirs = [
        f"{asyn_repo_w_mock_filesystem.root_dir}/{feature_root}/{f.id}" for f in written_features
    ]

    asyn_repo_w_mock_filesystem.filesystem._ls.return_value = [
        {"name": path, "StorageClass": "DIRECTORY"} for path in feature_dirs
    ]
    asyn_repo_w_mock_filesystem.filesystem._cat_file.side_effect = [
        json.dumps(f) for f in written_features
    ]

    features = asyncio.run(
        asyn_repo_w_mock_filesystem.get_features(experiment.project_name, experiment.id)
    )

    assert len(features) == 3

    feature_ids = [f.id for f in written_features]
    for feature in features:
        assert feature.id in feature_ids
        feature_ids.remove(feature.id)
Example #3
0
def test_can_serialize_datetime():
    now = datetime.utcnow()
    to_serialize = {"date": now, "other": None}
    serialized = json.dumps(to_serialize)

    assert "datetime" in serialized
    assert str(now) in serialized
Example #4
0
def test_get_parameters(asyn_repo_w_mock_filesystem):
    experiment = _create_experiment_domain()
    written_parameters = [_create_parameter_domain(experiment=experiment)[1] for _ in range(0, 3)]

    parameter_root = f"{slugify(experiment.project_name)}/experiments/{experiment.id}/parameters"
    parameter_dirs = [
        f"{asyn_repo_w_mock_filesystem.root_dir}/{parameter_root}/{p.id}"
        for p in written_parameters
    ]

    asyn_repo_w_mock_filesystem.filesystem._ls.return_value = [
        {"name": path, "StorageClass": "DIRECTORY"} for path in parameter_dirs
    ]
    asyn_repo_w_mock_filesystem.filesystem._cat_file.side_effect = [
        json.dumps(p) for p in written_parameters
    ]

    parameters = asyncio.run(
        asyn_repo_w_mock_filesystem.get_parameters(experiment.project_name, experiment.id)
    )

    assert len(parameters) == 3

    parameter_ids = [p.id for p in written_parameters]
    for parameter in parameters:
        assert parameter.id in parameter_ids
        parameter_ids.remove(parameter.id)
Example #5
0
    def _persist_domain(self, domain, path):
        """Persists the Rubicon object `domain` to the local
        path defined by `path`.
        """
        self.filesystem.mkdirs(os.path.dirname(path), exist_ok=True)

        with self.filesystem.open(path, "w") as f:
            f.write(json.dumps(domain))
Example #6
0
    def _persist_domain(self, domain, path):
        """Persists the Rubicon object `domain` to the S3
        bucket defined by `path`.
        """
        json_domain = json.dumps(domain)

        with self.filesystem.open(path, "w") as f:
            f.write(json_domain)
Example #7
0
def test_get_project(asyn_repo_w_mock_filesystem):
    written_project = domain.Project(f"Test Project {uuid.uuid4()}")
    asyn_repo_w_mock_filesystem.filesystem._cat_file.return_value = json.dumps(written_project)

    project = asyncio.run(asyn_repo_w_mock_filesystem.get_project(written_project.name))

    assert project.id == written_project.id
    assert project.name == written_project.name
Example #8
0
def test_can_serialize_set():
    tags = ["tag-a", "tag-b"]
    to_serialize = {"tags": set(tags), "other": None}
    serialized = json.dumps(to_serialize)

    assert "tags" in serialized
    assert "tag-a" in serialized
    assert "tag-b" in serialized
Example #9
0
def test_get_dataframe_metadata(asyn_repo_w_mock_filesystem):
    project, written_dataframe = _create_dataframe_domain()
    asyn_repo_w_mock_filesystem.filesystem._cat_file.return_value = json.dumps(written_dataframe)

    dataframe = asyncio.run(
        asyn_repo_w_mock_filesystem.get_dataframe_metadata(project.name, written_dataframe.id)
    )

    assert dataframe.id == written_dataframe.id
    assert project.id == written_dataframe.parent_id
Example #10
0
def test_get_artifact_metadata(asyn_repo_w_mock_filesystem):
    project, written_artifact = _create_artifact_domain()
    asyn_repo_w_mock_filesystem.filesystem._cat_file.return_value = json.dumps(written_artifact)

    artifact = asyncio.run(
        asyn_repo_w_mock_filesystem.get_artifact_metadata(project.name, written_artifact.id)
    )

    assert artifact.id == written_artifact.id
    assert project.id == written_artifact.parent_id
Example #11
0
def test_persist_domain(asyn_s3_repo_w_mock_filesystem):
    project = domain.Project(f"Test Project {uuid.uuid4()}")
    project_metadata_path = f"s3://bucket/root/{slugify(project.name)}/metadata.json"

    asyncio.run(
        asyn_s3_repo_w_mock_filesystem._persist_domain(project,
                                                       project_metadata_path))

    expected = [call._pipe_file(project_metadata_path, json.dumps(project))]

    assert asyn_s3_repo_w_mock_filesystem.filesystem.mock_calls == expected
Example #12
0
def test_get_metric(asyn_repo_w_mock_filesystem):
    experiment, written_metric = _create_metric_domain()
    asyn_repo_w_mock_filesystem.filesystem._cat_file.return_value = json.dumps(written_metric)

    metric = asyncio.run(
        asyn_repo_w_mock_filesystem.get_metric(
            experiment.project_name, experiment.id, written_metric.name
        )
    )

    assert metric.id == written_metric.id
    assert metric.name == written_metric.name
Example #13
0
def test_can_serialize_training_metadata():
    training_metadata = TrainingMetadata(
        [("test/path", "SELECT * FROM test"), ("test/other/path", "SELECT * FROM test")]
    )
    to_serialize = {"training_metadata": training_metadata}
    serialized = json.dumps(to_serialize)

    assert "training_metadata" in serialized
    assert (
        '[["test/path", "SELECT * FROM test"], ["test/other/path", "SELECT * FROM test"]]'
        in serialized
    )
Example #14
0
def test_get_parameter(asyn_repo_w_mock_filesystem):
    experiment, written_parameter = _create_parameter_domain()
    asyn_repo_w_mock_filesystem.filesystem._cat_file.return_value = json.dumps(written_parameter)

    parameter = asyncio.run(
        asyn_repo_w_mock_filesystem.get_parameter(
            experiment.project_name, experiment.id, written_parameter.name
        )
    )

    assert parameter.id == written_parameter.id
    assert parameter.name == written_parameter.name
Example #15
0
def test_get_feature(asyn_repo_w_mock_filesystem):
    experiment, written_feature = _create_feature_domain()
    asyn_repo_w_mock_filesystem.filesystem._cat_file.return_value = json.dumps(written_feature)

    feature = asyncio.run(
        asyn_repo_w_mock_filesystem.get_feature(
            experiment.project_name, experiment.id, written_feature.name
        )
    )

    assert feature.id == written_feature.id
    assert feature.name == written_feature.name
Example #16
0
def test_get_experiment(asyn_repo_w_mock_filesystem):
    written_experiment = _create_experiment_domain()
    asyn_repo_w_mock_filesystem.filesystem._cat_file.return_value = json.dumps(written_experiment)

    experiment = asyncio.run(
        asyn_repo_w_mock_filesystem.get_experiment(
            written_experiment.project_name, written_experiment.id
        )
    )

    assert experiment.id == written_experiment.id
    assert experiment.name == written_experiment.name
    assert experiment.project_name == written_experiment.project_name
Example #17
0
def test_get_projects(asyn_repo_w_mock_filesystem):
    written_projects = [domain.Project(f"Test Project {uuid.uuid4()}") for _ in range(0, 3)]

    project_dirs = [
        f"{asyn_repo_w_mock_filesystem.root_dir}/{slugify(p.name)}" for p in written_projects
    ]

    asyn_repo_w_mock_filesystem.filesystem._ls.return_value = [
        {"name": path, "StorageClass": "DIRECTORY"} for path in project_dirs
    ]
    asyn_repo_w_mock_filesystem.filesystem._cat_file.side_effect = [
        json.dumps(e) for e in written_projects
    ]

    projects = asyncio.run(asyn_repo_w_mock_filesystem.get_projects())

    assert len(projects) == 3

    project_ids = [p.id for p in written_projects]
    for project in projects:
        assert project.id in project_ids
        project_ids.remove(project.id)
Example #18
0
def test_get_dataframes_metadata(asyn_repo_w_mock_filesystem):
    project = domain.Project(f"Test Project {uuid.uuid4()}")
    written_dataframes = [_create_dataframe_domain(project=project)[1] for _ in range(0, 3)]

    dataframe_dirs = [
        f"{asyn_repo_w_mock_filesystem.root_dir}/{slugify(project.name)}/dataframes/{d.id}"
        for d in written_dataframes
    ]

    asyn_repo_w_mock_filesystem.filesystem._ls.return_value = [
        {"name": path, "StorageClass": "DIRECTORY"} for path in dataframe_dirs
    ]
    asyn_repo_w_mock_filesystem.filesystem._cat_file.side_effect = [
        json.dumps(d) for d in written_dataframes
    ]

    dataframes = asyncio.run(asyn_repo_w_mock_filesystem.get_dataframes_metadata(project.name))

    assert len(dataframes) == 3

    dataframe_ids = [d.id for d in written_dataframes]
    for dataframe in dataframes:
        assert dataframe.id in dataframe_ids
        dataframe_ids.remove(dataframe.id)
Example #19
0
def test_get_artifacts_metadata(asyn_repo_w_mock_filesystem):
    project = domain.Project(f"Test Project {uuid.uuid4()}")
    written_artifacts = [_create_artifact_domain(project=project)[1] for _ in range(0, 3)]

    artifact_dirs = [
        f"{asyn_repo_w_mock_filesystem.root_dir}/{slugify(project.name)}/artifacts/{a.id}"
        for a in written_artifacts
    ]

    asyn_repo_w_mock_filesystem.filesystem._ls.return_value = [
        {"name": path, "StorageClass": "DIRECTORY"} for path in artifact_dirs
    ]
    asyn_repo_w_mock_filesystem.filesystem._cat_file.side_effect = [
        json.dumps(a) for a in written_artifacts
    ]

    artifacts = asyncio.run(asyn_repo_w_mock_filesystem.get_artifacts_metadata(project.name))

    assert len(artifacts) == 3

    artifact_ids = [a.id for a in written_artifacts]
    for artifact in artifacts:
        assert artifact.id in artifact_ids
        artifact_ids.remove(artifact.id)
Example #20
0
def test_get_experiments(asyn_repo_w_mock_filesystem):
    project = domain.Project(f"Test Project {uuid.uuid4()}")
    written_experiments = [_create_experiment_domain(project=project) for _ in range(0, 3)]

    experiment_dirs = [
        f"{asyn_repo_w_mock_filesystem.root_dir}/{slugify(e.project_name)}/experiments/{e.id}"
        for e in written_experiments
    ]

    asyn_repo_w_mock_filesystem.filesystem._ls.return_value = [
        {"name": path, "StorageClass": "DIRECTORY"} for path in experiment_dirs
    ]
    asyn_repo_w_mock_filesystem.filesystem._cat_file.side_effect = [
        json.dumps(e) for e in written_experiments
    ]

    experiments = asyncio.run(asyn_repo_w_mock_filesystem.get_experiments(project.name))

    assert len(experiments) == 3

    experiment_ids = [e.id for e in written_experiments]
    for experiment in experiments:
        assert experiment.id in experiment_ids
        experiment_ids.remove(experiment.id)
Example #21
0
File: s3.py Project: mbseid/rubicon
 async def _persist_domain(self, domain, path):
     """Asynchronously persists the Rubicon object `domain`
     to the S3 bucket defined by `path`.
     """
     await self.filesystem._pipe_file(path, json.dumps(domain))