Ejemplo n.º 1
0
def test_create_project_throws_error_if_duplicate(asyn_repo_w_mock_filesystem):
    asyn_repo_w_mock_filesystem.filesystem._exists.return_value = True

    project = domain.Project(f"Test Project {uuid.uuid4()}")

    with pytest.raises(RubiconException) as e:
        asyncio.run(asyn_repo_w_mock_filesystem.create_project(domain.Project(project.name)))

    assert f"'{project.name}' already exists" in str(e)
Ejemplo n.º 2
0
def test_get_dataframes_metadata_with_no_results(asyn_repo_w_mock_filesystem):
    asyn_repo_w_mock_filesystem.filesystem._ls.side_effect = FileNotFoundError()

    project = domain.Project(f"Test Project {uuid.uuid4()}")
    dataframes = asyncio.run(asyn_repo_w_mock_filesystem.get_dataframes_metadata(project.name))

    assert dataframes == []
Ejemplo n.º 3
0
def _create_experiment_domain(project=None, tags=[]):
    if project is None:
        project = domain.Project(f"Test Project {uuid.uuid4()}")

    return domain.Experiment(
        name=f"Test Experiment {uuid.uuid4()}", project_name=project.name, tags=[]
    )
Ejemplo n.º 4
0
def test_get_experiments_with_no_results(asyn_repo_w_mock_filesystem):
    asyn_repo_w_mock_filesystem.filesystem._ls.side_effect = FileNotFoundError()

    project = domain.Project(f"Test Project {uuid.uuid4()}")
    experiments = asyncio.run(asyn_repo_w_mock_filesystem.get_experiments(project.name))

    assert experiments == []
Ejemplo n.º 5
0
def test_create_project_throws_error_if_duplicate(memory_repository):
    repository = memory_repository
    project = _create_project(repository)

    with pytest.raises(RubiconException) as e:
        repository.create_project(domain.Project(project.name))

    assert f"'{project.name}' already exists" in str(e)
Ejemplo n.º 6
0
def test_get_project(asyn_repo_w_mock_filesystem):
    written_project = domain.Project(f"Test Project {uuid.uuid4()}")
    asyn_repo_w_mock_filesystem.filesystem._cat_file.return_value = json.dumps(written_project)

    project = asyncio.run(asyn_repo_w_mock_filesystem.get_project(written_project.name))

    assert project.id == written_project.id
    assert project.name == written_project.name
Ejemplo n.º 7
0
def test_persist_domain(mock_open):
    project = domain.Project(f"Test Project {uuid.uuid4()}")
    project_metadata_path = f"s3://bucket/root/{slugify(project.name)}/metadata.json"

    s3_repo = S3Repository(root_dir="s3://bucket/root")
    s3_repo._persist_domain(project, project_metadata_path)

    mock_open.assert_called_once_with(project_metadata_path, "w")
Ejemplo n.º 8
0
def test_sync_from_local(mock_get_project, mock_run):
    rubicon = Rubicon(persistence="filesystem", root_dir="./local/path")
    project_name = "Sync Test Project"
    mock_get_project.return_value = client.Project(
        domain.Project(project_name))

    rubicon.sync(project_name, "s3://test/path")

    assert "aws s3 sync ./local/path/sync-test-project s3://test/path" in str(
        mock_run._mock_call_args_list)
Ejemplo n.º 9
0
def test_delete_artifact_throws_error_if_not_found(asyn_repo_w_mock_filesystem):
    asyn_repo_w_mock_filesystem.filesystem.rm.side_effect = FileNotFoundError()

    project = domain.Project(f"Test Project {uuid.uuid4()}")
    missing_artifact_id = uuid.uuid4()

    with pytest.raises(RubiconException) as e:
        asyncio.run(asyn_repo_w_mock_filesystem.delete_artifact(project.name, missing_artifact_id))

    assert f"No artifact with id `{missing_artifact_id}`" in str(e)
Ejemplo n.º 10
0
def test_persist_domain(mock_mkdirs, mock_open):
    project = domain.Project(f"Test Project {uuid.uuid4()}")
    project_metadata_path = f"/local/root/{slugify(project.name)}/metadata.json"

    local_repo = LocalRepository(root_dir="/local/root")
    local_repo._persist_domain(project, project_metadata_path)

    mock_mkdirs.assert_called_once_with(os.path.dirname(project_metadata_path),
                                        exist_ok=True)
    mock_open.assert_called_once_with(project_metadata_path, "w")
Ejemplo n.º 11
0
def test_persist_domain(asyn_s3_repo_w_mock_filesystem):
    project = domain.Project(f"Test Project {uuid.uuid4()}")
    project_metadata_path = f"s3://bucket/root/{slugify(project.name)}/metadata.json"

    asyncio.run(
        asyn_s3_repo_w_mock_filesystem._persist_domain(project,
                                                       project_metadata_path))

    expected = [call._pipe_file(project_metadata_path, json.dumps(project))]

    assert asyn_s3_repo_w_mock_filesystem.filesystem.mock_calls == expected
Ejemplo n.º 12
0
def test_get_dataframe_metadata_throws_error_if_not_found(asyn_repo_w_mock_filesystem):
    asyn_repo_w_mock_filesystem.filesystem._cat_file.side_effect = FileNotFoundError()

    project = domain.Project(f"Test Project {uuid.uuid4()}")
    missing_dataframe_id = uuid.uuid4()

    with pytest.raises(RubiconException) as e:
        asyncio.run(
            asyn_repo_w_mock_filesystem.get_dataframe_metadata(project.name, missing_dataframe_id)
        )

    assert f"No dataframe with id `{missing_dataframe_id}`" in str(e)
Ejemplo n.º 13
0
def test_persist_domain_throws_error(mock_mkdirs, mock_open):
    not_serializable = str

    project = domain.Project(f"Test Project {uuid.uuid4()}",
                             description=not_serializable)
    project_metadata_path = f"/local/root/{slugify(project.name)}/metadata.json"

    local_repo = LocalRepository(root_dir="/local/root")
    with pytest.raises(TypeError):
        local_repo._persist_domain(project, project_metadata_path)

    mock_mkdirs.assert_not_called()
    mock_open.assert_not_called()
Ejemplo n.º 14
0
def test_sync_from_local_error(mock_get_project, mock_run):
    rubicon = Rubicon(persistence="filesystem", root_dir="./local/path")
    project_name = "Sync Test Project"
    mock_get_project.return_value = client.Project(
        domain.Project(project_name))
    mock_run.side_effect = subprocess.CalledProcessError(
        cmd="aws cli sync",
        stderr="Some error. I bet it was proxy tho.",
        returncode=1)

    with pytest.raises(RubiconException) as e:
        rubicon.sync(project_name, "s3://test/path")

    assert "Some error. I bet it was proxy tho." in str(e)
def test_get_or_create_project_get(asyn_client_w_mock_repo):
    rubicon = asyn_client_w_mock_repo

    project_name = f"Test Project {uuid.uuid4()}"
    project_domain = domain.Project(name=project_name)

    rubicon.repository.get_project.return_value = project_domain

    project = asyncio.run(rubicon.get_or_create_project(project_name))

    expected = [call.get_project(project.name)]

    assert project.name == project_name
    assert rubicon.repository.mock_calls == expected
Ejemplo n.º 16
0
def test_create_project(asyn_repo_w_mock_filesystem):
    asyn_repo_w_mock_filesystem.filesystem._exists.return_value = False

    project = domain.Project(f"Test Project {uuid.uuid4()}")

    project_dir = slugify(project.name)
    project_metadata_path = f"{asyn_repo_w_mock_filesystem.root_dir}/{project_dir}/metadata.json"

    asyncio.run(asyn_repo_w_mock_filesystem.create_project(project))

    filesystem_expected = [call._exists(project_metadata_path), call.invalidate_cache()]
    repo_expected = [call._persist_domain(project, project_metadata_path)]

    assert asyn_repo_w_mock_filesystem.filesystem.mock_calls == filesystem_expected
    assert asyn_repo_w_mock_filesystem._persist_domain.mock_calls == repo_expected
Ejemplo n.º 17
0
    def _create_project_domain(self, name, description, github_url,
                               training_metadata):
        """Instantiates and returns a project domain object."""
        if self.config.is_auto_git_enabled and github_url is None:
            github_url = self._get_github_url()

        if training_metadata is not None:
            training_metadata = domain.utils.TrainingMetadata(
                training_metadata)

        return domain.Project(
            name,
            description=description,
            github_url=github_url,
            training_metadata=training_metadata,
        )
Ejemplo n.º 18
0
def test_properties():
    domain_project = domain.Project(
        "Test Project",
        description="a test project",
        github_url="github.com",
        training_metadata=domain.utils.TrainingMetadata([
            ("test/path", "SELECT * FROM test")
        ]),
    )
    project = Project(domain_project)

    assert project.name == "Test Project"
    assert project.description == "a test project"
    assert project.github_url == "github.com"
    assert project.training_metadata == domain_project.training_metadata.training_metadata[
        0]
    assert project.created_at == domain_project.created_at
    assert project.id == domain_project.id
Ejemplo n.º 19
0
    def get_projects(self):
        """Get the list of projects from the filesystem.

        Returns
        -------
        list of rubicon.domain.Project
            The list of projects from the filesystem.
        """
        try:
            project_metadata_paths = self._ls_directories_only(self.root_dir)
            projects = [
                domain.Project(**json.loads(metadata)) for metadata in
                self.filesystem.cat(project_metadata_paths).values()
            ]
        except FileNotFoundError:
            return []

        return projects
Ejemplo n.º 20
0
    def get_project(self, project_name):
        """Retrieve a project from the configured filesystem.

        Parameters
        ----------
        project_name : str
            The name of the project to retrieve.

        Returns
        -------
        rubicon.domain.Project
            The project with name `project_name`.
        """
        project_metadata_path = self._get_project_metadata_path(project_name)

        try:
            project = json.loads(self.filesystem.cat(project_metadata_path))
        except FileNotFoundError:
            raise RubiconException(f"No project with name '{project_name}' found.")

        return domain.Project(**project)
def test_get_projects(asyn_client_w_mock_repo):
    rubicon = asyn_client_w_mock_repo

    project_domains = [
        domain.Project(name=f"Test Project {uuid.uuid4()}")
        for _ in range(0, 3)
    ]

    rubicon.repository.get_projects.return_value = project_domains

    projects = asyncio.run(rubicon.projects())

    expected = [call.get_projects()]

    project_ids = [p.id for p in projects]
    for pid in [p.id for p in project_domains]:
        assert pid in project_ids
        project_ids.remove(pid)

    assert len(project_ids) == 0
    assert rubicon.repository.mock_calls == expected
Ejemplo n.º 22
0
    async def get_projects(self):
        """Overrides `rubicon.repository.BaseRepository.get_projects` to
        asynchronously get the list of projects from the filesystem.

        Returns
        -------
        list of rubicon.domain.Project
            The list of projects from the filesystem.
        """
        try:
            project_metadata_paths = await self._ls_directories_only(self.root_dir)
            projects = [
                domain.Project(**json.loads(data))
                for data in await asyncio.gather(
                    *[self.filesystem._cat_file(path) for path in project_metadata_paths]
                )
            ]
        except FileNotFoundError:
            return []

        return projects
Ejemplo n.º 23
0
def test_get_projects(asyn_repo_w_mock_filesystem):
    written_projects = [domain.Project(f"Test Project {uuid.uuid4()}") for _ in range(0, 3)]

    project_dirs = [
        f"{asyn_repo_w_mock_filesystem.root_dir}/{slugify(p.name)}" for p in written_projects
    ]

    asyn_repo_w_mock_filesystem.filesystem._ls.return_value = [
        {"name": path, "StorageClass": "DIRECTORY"} for path in project_dirs
    ]
    asyn_repo_w_mock_filesystem.filesystem._cat_file.side_effect = [
        json.dumps(e) for e in written_projects
    ]

    projects = asyncio.run(asyn_repo_w_mock_filesystem.get_projects())

    assert len(projects) == 3

    project_ids = [p.id for p in written_projects]
    for project in projects:
        assert project.id in project_ids
        project_ids.remove(project.id)
Ejemplo n.º 24
0
    async def get_project(self, project_name):
        """Overrides `rubicon.repository.BaseRepository.get_project` to
        asynchronously retrieve a project from the configured filesystem.

        Parameters
        ----------
        project_name : str
            The name of the project to retrieve.

        Returns
        -------
        rubicon.domain.Project
            The project with name `project_name`.
        """
        project_metadata_path = self._get_project_metadata_path(project_name)

        try:
            project = json.loads(await self.filesystem._cat_file(project_metadata_path))
        except FileNotFoundError:
            raise RubiconException(f"No project with name '{project_name}' found.")

        return domain.Project(**project)
Ejemplo n.º 25
0
def test_get_experiments(asyn_repo_w_mock_filesystem):
    project = domain.Project(f"Test Project {uuid.uuid4()}")
    written_experiments = [_create_experiment_domain(project=project) for _ in range(0, 3)]

    experiment_dirs = [
        f"{asyn_repo_w_mock_filesystem.root_dir}/{slugify(e.project_name)}/experiments/{e.id}"
        for e in written_experiments
    ]

    asyn_repo_w_mock_filesystem.filesystem._ls.return_value = [
        {"name": path, "StorageClass": "DIRECTORY"} for path in experiment_dirs
    ]
    asyn_repo_w_mock_filesystem.filesystem._cat_file.side_effect = [
        json.dumps(e) for e in written_experiments
    ]

    experiments = asyncio.run(asyn_repo_w_mock_filesystem.get_experiments(project.name))

    assert len(experiments) == 3

    experiment_ids = [e.id for e in written_experiments]
    for experiment in experiments:
        assert experiment.id in experiment_ids
        experiment_ids.remove(experiment.id)
def test_get_projects_as_dask_df(asyn_client_w_mock_repo):
    rubicon = asyn_client_w_mock_repo

    project_name = f"Test Project {uuid.uuid4()}"
    project_domain = domain.Project(name=project_name)
    experiment_domains = [
        domain.Experiment(project_name=project_name,
                          name=f"Test Experiment {uuid.uuid4()}")
        for _ in range(0, 2)
    ]

    rubicon.repository.get_project.return_value = project_domain
    rubicon.repository.get_experiments.return_value = experiment_domains
    rubicon.repository.get_tags.return_value = [{
        "added_tags": [],
        "removed_tags": []
    }]
    rubicon.repository.get_parameters.return_value = []
    rubicon.repository.get_metrics.return_value = []

    ddf = asyncio.run(rubicon.get_project_as_dask_df(project_name))

    assert isinstance(ddf, dd.core.DataFrame)
    assert len(ddf.compute()) == 2
Ejemplo n.º 27
0
def test_get_artifacts_metadata(asyn_repo_w_mock_filesystem):
    project = domain.Project(f"Test Project {uuid.uuid4()}")
    written_artifacts = [_create_artifact_domain(project=project)[1] for _ in range(0, 3)]

    artifact_dirs = [
        f"{asyn_repo_w_mock_filesystem.root_dir}/{slugify(project.name)}/artifacts/{a.id}"
        for a in written_artifacts
    ]

    asyn_repo_w_mock_filesystem.filesystem._ls.return_value = [
        {"name": path, "StorageClass": "DIRECTORY"} for path in artifact_dirs
    ]
    asyn_repo_w_mock_filesystem.filesystem._cat_file.side_effect = [
        json.dumps(a) for a in written_artifacts
    ]

    artifacts = asyncio.run(asyn_repo_w_mock_filesystem.get_artifacts_metadata(project.name))

    assert len(artifacts) == 3

    artifact_ids = [a.id for a in written_artifacts]
    for artifact in artifacts:
        assert artifact.id in artifact_ids
        artifact_ids.remove(artifact.id)
Ejemplo n.º 28
0
def test_get_dataframes_metadata(asyn_repo_w_mock_filesystem):
    project = domain.Project(f"Test Project {uuid.uuid4()}")
    written_dataframes = [_create_dataframe_domain(project=project)[1] for _ in range(0, 3)]

    dataframe_dirs = [
        f"{asyn_repo_w_mock_filesystem.root_dir}/{slugify(project.name)}/dataframes/{d.id}"
        for d in written_dataframes
    ]

    asyn_repo_w_mock_filesystem.filesystem._ls.return_value = [
        {"name": path, "StorageClass": "DIRECTORY"} for path in dataframe_dirs
    ]
    asyn_repo_w_mock_filesystem.filesystem._cat_file.side_effect = [
        json.dumps(d) for d in written_dataframes
    ]

    dataframes = asyncio.run(asyn_repo_w_mock_filesystem.get_dataframes_metadata(project.name))

    assert len(dataframes) == 3

    dataframe_ids = [d.id for d in written_dataframes]
    for dataframe in dataframes:
        assert dataframe.id in dataframe_ids
        dataframe_ids.remove(dataframe.id)
Ejemplo n.º 29
0
def _create_artifact_domain(project=None, tags=[]):
    if project is None:
        project = domain.Project(f"Test Project {uuid.uuid4()}")

    return project, domain.Artifact(name=f"Test Artifact {uuid.uuid4()}", parent_id=project.id)
Ejemplo n.º 30
0
def _create_dataframe_domain(project=None, tags=[]):
    if project is None:
        project = domain.Project(f"Test Project {uuid.uuid4()}")

    return project, domain.Dataframe(parent_id=project.id)