def test_get_metrics(asyn_repo_w_mock_filesystem): experiment = _create_experiment_domain() written_metrics = [_create_metric_domain(experiment=experiment)[1] for _ in range(0, 3)] metric_root = f"{slugify(experiment.project_name)}/experiments/{experiment.id}/metrics" metric_dirs = [ f"{asyn_repo_w_mock_filesystem.root_dir}/{metric_root}/{m.id}" for m in written_metrics ] asyn_repo_w_mock_filesystem.filesystem._ls.return_value = [ {"name": path, "StorageClass": "DIRECTORY"} for path in metric_dirs ] asyn_repo_w_mock_filesystem.filesystem._cat_file.side_effect = [ json.dumps(m) for m in written_metrics ] metrics = asyncio.run( asyn_repo_w_mock_filesystem.get_metrics(experiment.project_name, experiment.id) ) assert len(metrics) == 3 metric_ids = [m.id for m in written_metrics] for metric in metrics: assert metric.id in metric_ids metric_ids.remove(metric.id)
def test_get_features(asyn_repo_w_mock_filesystem): experiment = _create_experiment_domain() written_features = [_create_feature_domain(experiment=experiment)[1] for _ in range(0, 3)] feature_root = f"{slugify(experiment.project_name)}/experiments/{experiment.id}/features" feature_dirs = [ f"{asyn_repo_w_mock_filesystem.root_dir}/{feature_root}/{f.id}" for f in written_features ] asyn_repo_w_mock_filesystem.filesystem._ls.return_value = [ {"name": path, "StorageClass": "DIRECTORY"} for path in feature_dirs ] asyn_repo_w_mock_filesystem.filesystem._cat_file.side_effect = [ json.dumps(f) for f in written_features ] features = asyncio.run( asyn_repo_w_mock_filesystem.get_features(experiment.project_name, experiment.id) ) assert len(features) == 3 feature_ids = [f.id for f in written_features] for feature in features: assert feature.id in feature_ids feature_ids.remove(feature.id)
def test_can_serialize_datetime(): now = datetime.utcnow() to_serialize = {"date": now, "other": None} serialized = json.dumps(to_serialize) assert "datetime" in serialized assert str(now) in serialized
def test_get_parameters(asyn_repo_w_mock_filesystem): experiment = _create_experiment_domain() written_parameters = [_create_parameter_domain(experiment=experiment)[1] for _ in range(0, 3)] parameter_root = f"{slugify(experiment.project_name)}/experiments/{experiment.id}/parameters" parameter_dirs = [ f"{asyn_repo_w_mock_filesystem.root_dir}/{parameter_root}/{p.id}" for p in written_parameters ] asyn_repo_w_mock_filesystem.filesystem._ls.return_value = [ {"name": path, "StorageClass": "DIRECTORY"} for path in parameter_dirs ] asyn_repo_w_mock_filesystem.filesystem._cat_file.side_effect = [ json.dumps(p) for p in written_parameters ] parameters = asyncio.run( asyn_repo_w_mock_filesystem.get_parameters(experiment.project_name, experiment.id) ) assert len(parameters) == 3 parameter_ids = [p.id for p in written_parameters] for parameter in parameters: assert parameter.id in parameter_ids parameter_ids.remove(parameter.id)
def _persist_domain(self, domain, path): """Persists the Rubicon object `domain` to the local path defined by `path`. """ self.filesystem.mkdirs(os.path.dirname(path), exist_ok=True) with self.filesystem.open(path, "w") as f: f.write(json.dumps(domain))
def _persist_domain(self, domain, path): """Persists the Rubicon object `domain` to the S3 bucket defined by `path`. """ json_domain = json.dumps(domain) with self.filesystem.open(path, "w") as f: f.write(json_domain)
def test_get_project(asyn_repo_w_mock_filesystem): written_project = domain.Project(f"Test Project {uuid.uuid4()}") asyn_repo_w_mock_filesystem.filesystem._cat_file.return_value = json.dumps(written_project) project = asyncio.run(asyn_repo_w_mock_filesystem.get_project(written_project.name)) assert project.id == written_project.id assert project.name == written_project.name
def test_can_serialize_set(): tags = ["tag-a", "tag-b"] to_serialize = {"tags": set(tags), "other": None} serialized = json.dumps(to_serialize) assert "tags" in serialized assert "tag-a" in serialized assert "tag-b" in serialized
def test_get_dataframe_metadata(asyn_repo_w_mock_filesystem): project, written_dataframe = _create_dataframe_domain() asyn_repo_w_mock_filesystem.filesystem._cat_file.return_value = json.dumps(written_dataframe) dataframe = asyncio.run( asyn_repo_w_mock_filesystem.get_dataframe_metadata(project.name, written_dataframe.id) ) assert dataframe.id == written_dataframe.id assert project.id == written_dataframe.parent_id
def test_get_artifact_metadata(asyn_repo_w_mock_filesystem): project, written_artifact = _create_artifact_domain() asyn_repo_w_mock_filesystem.filesystem._cat_file.return_value = json.dumps(written_artifact) artifact = asyncio.run( asyn_repo_w_mock_filesystem.get_artifact_metadata(project.name, written_artifact.id) ) assert artifact.id == written_artifact.id assert project.id == written_artifact.parent_id
def test_persist_domain(asyn_s3_repo_w_mock_filesystem): project = domain.Project(f"Test Project {uuid.uuid4()}") project_metadata_path = f"s3://bucket/root/{slugify(project.name)}/metadata.json" asyncio.run( asyn_s3_repo_w_mock_filesystem._persist_domain(project, project_metadata_path)) expected = [call._pipe_file(project_metadata_path, json.dumps(project))] assert asyn_s3_repo_w_mock_filesystem.filesystem.mock_calls == expected
def test_get_metric(asyn_repo_w_mock_filesystem): experiment, written_metric = _create_metric_domain() asyn_repo_w_mock_filesystem.filesystem._cat_file.return_value = json.dumps(written_metric) metric = asyncio.run( asyn_repo_w_mock_filesystem.get_metric( experiment.project_name, experiment.id, written_metric.name ) ) assert metric.id == written_metric.id assert metric.name == written_metric.name
def test_can_serialize_training_metadata(): training_metadata = TrainingMetadata( [("test/path", "SELECT * FROM test"), ("test/other/path", "SELECT * FROM test")] ) to_serialize = {"training_metadata": training_metadata} serialized = json.dumps(to_serialize) assert "training_metadata" in serialized assert ( '[["test/path", "SELECT * FROM test"], ["test/other/path", "SELECT * FROM test"]]' in serialized )
def test_get_parameter(asyn_repo_w_mock_filesystem): experiment, written_parameter = _create_parameter_domain() asyn_repo_w_mock_filesystem.filesystem._cat_file.return_value = json.dumps(written_parameter) parameter = asyncio.run( asyn_repo_w_mock_filesystem.get_parameter( experiment.project_name, experiment.id, written_parameter.name ) ) assert parameter.id == written_parameter.id assert parameter.name == written_parameter.name
def test_get_feature(asyn_repo_w_mock_filesystem): experiment, written_feature = _create_feature_domain() asyn_repo_w_mock_filesystem.filesystem._cat_file.return_value = json.dumps(written_feature) feature = asyncio.run( asyn_repo_w_mock_filesystem.get_feature( experiment.project_name, experiment.id, written_feature.name ) ) assert feature.id == written_feature.id assert feature.name == written_feature.name
def test_get_experiment(asyn_repo_w_mock_filesystem): written_experiment = _create_experiment_domain() asyn_repo_w_mock_filesystem.filesystem._cat_file.return_value = json.dumps(written_experiment) experiment = asyncio.run( asyn_repo_w_mock_filesystem.get_experiment( written_experiment.project_name, written_experiment.id ) ) assert experiment.id == written_experiment.id assert experiment.name == written_experiment.name assert experiment.project_name == written_experiment.project_name
def test_get_projects(asyn_repo_w_mock_filesystem): written_projects = [domain.Project(f"Test Project {uuid.uuid4()}") for _ in range(0, 3)] project_dirs = [ f"{asyn_repo_w_mock_filesystem.root_dir}/{slugify(p.name)}" for p in written_projects ] asyn_repo_w_mock_filesystem.filesystem._ls.return_value = [ {"name": path, "StorageClass": "DIRECTORY"} for path in project_dirs ] asyn_repo_w_mock_filesystem.filesystem._cat_file.side_effect = [ json.dumps(e) for e in written_projects ] projects = asyncio.run(asyn_repo_w_mock_filesystem.get_projects()) assert len(projects) == 3 project_ids = [p.id for p in written_projects] for project in projects: assert project.id in project_ids project_ids.remove(project.id)
def test_get_dataframes_metadata(asyn_repo_w_mock_filesystem): project = domain.Project(f"Test Project {uuid.uuid4()}") written_dataframes = [_create_dataframe_domain(project=project)[1] for _ in range(0, 3)] dataframe_dirs = [ f"{asyn_repo_w_mock_filesystem.root_dir}/{slugify(project.name)}/dataframes/{d.id}" for d in written_dataframes ] asyn_repo_w_mock_filesystem.filesystem._ls.return_value = [ {"name": path, "StorageClass": "DIRECTORY"} for path in dataframe_dirs ] asyn_repo_w_mock_filesystem.filesystem._cat_file.side_effect = [ json.dumps(d) for d in written_dataframes ] dataframes = asyncio.run(asyn_repo_w_mock_filesystem.get_dataframes_metadata(project.name)) assert len(dataframes) == 3 dataframe_ids = [d.id for d in written_dataframes] for dataframe in dataframes: assert dataframe.id in dataframe_ids dataframe_ids.remove(dataframe.id)
def test_get_artifacts_metadata(asyn_repo_w_mock_filesystem): project = domain.Project(f"Test Project {uuid.uuid4()}") written_artifacts = [_create_artifact_domain(project=project)[1] for _ in range(0, 3)] artifact_dirs = [ f"{asyn_repo_w_mock_filesystem.root_dir}/{slugify(project.name)}/artifacts/{a.id}" for a in written_artifacts ] asyn_repo_w_mock_filesystem.filesystem._ls.return_value = [ {"name": path, "StorageClass": "DIRECTORY"} for path in artifact_dirs ] asyn_repo_w_mock_filesystem.filesystem._cat_file.side_effect = [ json.dumps(a) for a in written_artifacts ] artifacts = asyncio.run(asyn_repo_w_mock_filesystem.get_artifacts_metadata(project.name)) assert len(artifacts) == 3 artifact_ids = [a.id for a in written_artifacts] for artifact in artifacts: assert artifact.id in artifact_ids artifact_ids.remove(artifact.id)
def test_get_experiments(asyn_repo_w_mock_filesystem): project = domain.Project(f"Test Project {uuid.uuid4()}") written_experiments = [_create_experiment_domain(project=project) for _ in range(0, 3)] experiment_dirs = [ f"{asyn_repo_w_mock_filesystem.root_dir}/{slugify(e.project_name)}/experiments/{e.id}" for e in written_experiments ] asyn_repo_w_mock_filesystem.filesystem._ls.return_value = [ {"name": path, "StorageClass": "DIRECTORY"} for path in experiment_dirs ] asyn_repo_w_mock_filesystem.filesystem._cat_file.side_effect = [ json.dumps(e) for e in written_experiments ] experiments = asyncio.run(asyn_repo_w_mock_filesystem.get_experiments(project.name)) assert len(experiments) == 3 experiment_ids = [e.id for e in written_experiments] for experiment in experiments: assert experiment.id in experiment_ids experiment_ids.remove(experiment.id)
async def _persist_domain(self, domain, path): """Asynchronously persists the Rubicon object `domain` to the S3 bucket defined by `path`. """ await self.filesystem._pipe_file(path, json.dumps(domain))