Example #1
0
    def get_tags(self, project_name, experiment_id=None, dataframe_id=None):
        """Retrieve tags from the configured filesystem.

        Parameters
        ----------
        project_name : str
            The name of the project the object to retrieve
            tags from belongs to.
        experiment_id : str, optional
            The ID of the experiment to retrieve tags from.
        dataframe_id : str, optional
            The ID of the dataframe to retrieve tags from.

        Returns
        -------
        list of dict
            A list of dictionaries with one key each,
            `added_tags` or `removed_tags`, where the
            value is a list of tag names that have been
            added to or removed from the specified object.
        """
        tag_metadata_root = self._get_tag_metadata_root(project_name, experiment_id, dataframe_id)
        tag_metadata_glob = f"{tag_metadata_root}/tags_*.json"

        tag_paths = self.filesystem.glob(tag_metadata_glob, detail=True)
        if len(tag_paths) == 0:
            return []

        sorted_tag_paths = self._sort_tag_paths(tag_paths)

        tag_data = self.filesystem.cat([p for _, p in sorted_tag_paths])
        sorted_tag_data = [json.loads(tag_data[p]) for _, p in sorted_tag_paths]

        return sorted_tag_data
Example #2
0
    def get_parameters(self, project_name, experiment_id):
        """Retrieve all parameters from the configured filesystem
        that belong to the experiment with ID `experiment_id`.

        Parameters
        ----------
        project_name : str
            The name of the project the experiment with ID
            `experiment_id` is logged to.
        experiment_id : str
            The ID of the experiment to retrieve all parameters
            from.

        Returns
        -------
        list of rubicon.domain.Parameter
            The parameters logged to the experiment with ID
            `experiment_id`.
        """
        parameter_metadata_root = self._get_parameter_metadata_root(
            project_name, experiment_id)

        try:
            parameter_metadata_paths = self._ls_directories_only(
                parameter_metadata_root)
            parameters = [
                domain.Parameter(**json.loads(data)) for data in
                self.filesystem.cat(parameter_metadata_paths).values()
            ]
        except FileNotFoundError:
            return []

        return parameters
Example #3
0
    async def get_parameter(self, project_name, experiment_id, parameter_name):
        """Overrides `rubicon.repository.BaseRepository.get_parameter` to
        asynchronously retrieve a parameter from the configured filesystem.

        Parameters
        ----------
        project_name : str
            The name of the project this parameter belongs to.
        experiment_id : str
            The ID of the experiment the parameter with name
            `parameter_name` is logged to.
        parameter_name : str
            The name of the parameter to retrieve.

        Returns
        -------
        rubicon.domain.Parameter
            The parameter with name `parameter_name`.
        """
        parameter_metadata_path = self._get_parameter_metadata_path(
            project_name, experiment_id, parameter_name)

        try:
            parameter = json.loads(
                await self.filesystem._cat_file(parameter_metadata_path))
        except FileNotFoundError:
            raise RubiconException(
                f"No parameter with name '{parameter_name}' found.")

        return domain.Parameter(**parameter)
Example #4
0
    async def get_experiments(self, project_name):
        """Overrides `rubicon.repository.BaseRepository.get_experiments` to
        asynchronously retrieve all experiments from the configured filesystem
        that belong to the project with name `project_name`.

        Parameters
        ----------
        project_name : str
            The name of the project to retrieve all experiments
            from.

        Returns
        -------
        list of rubicon.domain.Experiment
            The experiments logged to the project with name
            `project_name`.
        """
        experiment_metadata_root = self._get_experiment_metadata_root(
            project_name)

        try:
            experiment_metadata_paths = await self._ls_directories_only(
                experiment_metadata_root)
            experiments = [
                domain.Experiment(**json.loads(data))
                for data in await asyncio.gather(*[
                    self.filesystem._cat_file(path)
                    for path in experiment_metadata_paths
                ])
            ]
        except FileNotFoundError:
            return []

        return experiments
Example #5
0
    def get_experiments(self, project_name):
        """Retrieve all experiments from the configured filesystem
        that belong to the project with name `project_name`.

        Parameters
        ----------
        project_name : str
            The name of the project to retrieve all experiments
            from.

        Returns
        -------
        list of rubicon.domain.Experiment
            The experiments logged to the project with name
            `project_name`.
        """
        experiment_metadata_root = self._get_experiment_metadata_root(
            project_name)

        try:
            experiment_metadata_paths = self._ls_directories_only(
                experiment_metadata_root)
            experiments = [
                domain.Experiment(**json.loads(data)) for data in
                self.filesystem.cat(experiment_metadata_paths).values()
            ]
        except FileNotFoundError:
            return []

        return experiments
Example #6
0
    async def get_metric(self, project_name, experiment_id, metric_name):
        """Overrides `rubicon.repository.BaseRepository.get_metric` to
        asynchronously retrieve a metric from the configured filesystem.

        Parameters
        ----------
        project_name : str
            The name of the project this metric belongs to.
        experiment_id : str
            The ID of the experiment the metric with name
            `metric_name` is logged to.
        metric_name : str
            The name of the metric to retrieve.

        Returns
        -------
        rubicon.domain.Metric
            The metric with name `metric_name`.
        """
        metric_metadata_path = self._get_metric_metadata_path(
            project_name, experiment_id, metric_name)

        try:
            metric = json.loads(
                await self.filesystem._cat_file(metric_metadata_path))
        except FileNotFoundError:
            raise RubiconException(
                f"No metric with name '{metric_name}' found.")

        return domain.Metric(**metric)
Example #7
0
    async def get_artifact_metadata(self, project_name, artifact_id, experiment_id=None):
        """Overrides `rubicon.repository.BaseRepository.get_artifact_metadata`
        to asynchronously retrieve an artifact's metadata from the configured
        filesystem.

        Parameters
        ----------
        project_name : str
            The name of the project the artifact with ID
            `artifact_id` is logged to.
        artifact_id : str
            The ID of the artifact to retrieve.
        experiment_id : str, optional
            The ID of the experiment the artifact with ID
            `artifact_id` is logged to. Artifacts do not
            need to belong to an experiment.

        Returns
        -------
        rubicon.domain.Artifact
            The artifact with ID `artifact_id`.
        """
        artifact_metadata_path = self._get_artifact_metadata_path(
            project_name, experiment_id, artifact_id
        )

        try:
            artifact = json.loads(await self.filesystem._cat_file(artifact_metadata_path))
        except FileNotFoundError:
            raise RubiconException(f"No artifact with id `{artifact_id}` found.")

        return domain.Artifact(**artifact)
Example #8
0
    async def get_artifacts_metadata(self, project_name, experiment_id=None):
        """Overrides `rubicon.repository.BaseRepository.get_artifacts_metadata`
        to asynchronously retrieve all artifacts' metadata from the configured
        filesystem that belong to the specified object.

        Parameters
        ----------
        project_name : str
            The name of the project to retrieve all artifacts
            from.
        experiment_id : str, optional
            The ID of the experiment to retrieve all artifacts
            from. Artifacts do not need to belong to an
            experiment.

        Returns
        -------
        list of rubicon.domain.Artifact
            The artifacts logged to the specified object.
        """
        artifact_metadata_root = self._get_artifact_metadata_root(project_name, experiment_id)

        try:
            artifact_metadata_paths = await self._ls_directories_only(artifact_metadata_root)
            artifacts = [
                domain.Artifact(**json.loads(data))
                for data in await asyncio.gather(
                    *[self.filesystem._cat_file(path) for path in artifact_metadata_paths]
                )
            ]
        except FileNotFoundError:
            return []

        return artifacts
Example #9
0
    async def get_dataframe_metadata(self, project_name, dataframe_id, experiment_id=None):
        """Overrides `rubicon.repository.BaseRepository.get_dataframe_metadata`
        to asynchronously retrieve a dataframes's metadata from the configured
        filesystem.

        Parameters
        ----------
        project_name : str
            The name of the project the dataframe with ID
            `dataframe_id` is logged to.
        dataframe_id : str
            The ID of the dataframe to retrieve.
        experiment_id : str, optional
            The ID of the experiment the dataframe with ID
            `dataframe_id` is logged to. Dataframes do not
            need to belong to an experiment.

        Returns
        -------
        rubicon.domain.Dataframe
            The dataframe with ID `dataframe_id`.
        """
        dataframe_metadata_path = self._get_dataframe_metadata_path(
            project_name, experiment_id, dataframe_id
        )

        try:
            dataframe = json.loads(await self.filesystem._cat_file(dataframe_metadata_path))
        except FileNotFoundError:
            raise RubiconException(f"No dataframe with id `{dataframe_id}` found.")

        return domain.Dataframe(**dataframe)
Example #10
0
    async def get_metrics(self, project_name, experiment_id):
        """Overrides `rubicon.repository.BaseRepository.get_metrics` to
        asynchronously retrieve all metrics from the configured filesystem
        that belong to the experiment with ID `experiment_id`.

        Parameters
        ----------
        project_name : str
            The name of the project the experiment with ID
            `experiment_id` is logged to.
        experiment_id : str
            The ID of the experiment to retrieve all metrics
            from.

        Returns
        -------
        list of rubicon.domain.Metric
            The metrics logged to the experiment with ID
            `experiment_id`.
        """
        metric_metadata_root = self._get_metric_metadata_root(project_name, experiment_id)

        try:
            metric_metadata_paths = await self._ls_directories_only(metric_metadata_root)
            metrics = [
                domain.Metric(**json.loads(data))
                for data in await asyncio.gather(
                    *[self.filesystem._cat_file(path) for path in metric_metadata_paths]
                )
            ]
        except FileNotFoundError:
            return []

        return metrics
Example #11
0
    def get_dataframes_metadata(self, project_name, experiment_id=None):
        """Retrieve all dataframes' metadata from the configured
        filesystem that belong to the specified object.

        Parameters
        ----------
        project_name : str
            The name of the project to retrieve all dataframes
            from.
        experiment_id : str, optional
            The ID of the experiment to retrieve all dataframes
            from. Dataframes do not need to belong to an
            experiment.

        Returns
        -------
        list of rubicon.domain.Dataframe
            The dataframes logged to the specified object.
        """
        dataframe_metadata_root = self._get_dataframe_metadata_root(
            project_name, experiment_id)

        try:
            dataframe_metadata_paths = self._ls_directories_only(
                dataframe_metadata_root)
            dataframes = [
                domain.Dataframe(**json.loads(data)) for data in
                self.filesystem.cat(dataframe_metadata_paths).values()
            ]
        except FileNotFoundError:
            return []

        return dataframes
Example #12
0
    async def get_feature(self, project_name, experiment_id, feature_name):
        """Overrides `rubicon.repository.BaseRepository.get_feature` to
        asynchronously retrieve a feature from the configured filesystem.

        Parameters
        ----------
        project_name : str
            The name of the project the experiment with ID
            `experiment_id` is logged to.
        experiment_id : str
            The ID of the experiment the feature with name
            `feature_name` is logged to.
        feature_name : str
            The name of the feature to retrieve.

        Returns
        -------
        rubicon.domain.Feature
            The feature with name `feature_name`.
        """
        feature_metadata_path = self._get_feature_metadata_path(
            project_name, experiment_id, feature_name)

        try:
            feature = json.loads(
                await self.filesystem._cat_file(feature_metadata_path))
        except FileNotFoundError:
            raise RubiconException(
                f"No feature with name '{feature_name}' found.")

        return domain.Feature(**feature)
Example #13
0
    async def get_experiment(self, project_name, experiment_id):
        """Overrides `rubicon.repository.BaseRepository.get_experiment` to
        asynchronously retrieve an experiment from the configured filesystem.

        Parameters
        ----------
        project_name : str
            The name of the project the experiment with ID
            `experiment_id` is logged to.
        experiment_id : str
            The ID of the experiment to retrieve.

        Returns
        -------
        rubicon.domain.Experiment
            The experiment with ID `experiment_id`.
        """
        experiment_metadata_path = self._get_experiment_metadata_path(
            project_name, experiment_id)

        try:
            experiment = json.loads(
                await self.filesystem._cat_file(experiment_metadata_path))
        except FileNotFoundError:
            raise RubiconException(
                f"No experiment with id `{experiment_id}` found.")

        return domain.Experiment(**experiment)
Example #14
0
def test_can_deserialize_training_metadata():
    to_deserialize = '{"training_metadata": {"_type": "training_metadata", "value": [["test/path", "SELECT * FROM test"], ["test/other/path", "SELECT * FROM test"]]}}'
    deserialized = json.loads(to_deserialize)

    assert isinstance(deserialized["training_metadata"], TrainingMetadata)
    assert deserialized["training_metadata"].training_metadata == [
        ("test/path", "SELECT * FROM test"),
        ("test/other/path", "SELECT * FROM test"),
    ]
Example #15
0
    def get_projects(self):
        """Get the list of projects from the filesystem.

        Returns
        -------
        list of rubicon.domain.Project
            The list of projects from the filesystem.
        """
        try:
            project_metadata_paths = self._ls_directories_only(self.root_dir)
            projects = [
                domain.Project(**json.loads(metadata)) for metadata in
                self.filesystem.cat(project_metadata_paths).values()
            ]
        except FileNotFoundError:
            return []

        return projects
Example #16
0
    async def get_projects(self):
        """Overrides `rubicon.repository.BaseRepository.get_projects` to
        asynchronously get the list of projects from the filesystem.

        Returns
        -------
        list of rubicon.domain.Project
            The list of projects from the filesystem.
        """
        try:
            project_metadata_paths = await self._ls_directories_only(self.root_dir)
            projects = [
                domain.Project(**json.loads(data))
                for data in await asyncio.gather(
                    *[self.filesystem._cat_file(path) for path in project_metadata_paths]
                )
            ]
        except FileNotFoundError:
            return []

        return projects
Example #17
0
    def get_project(self, project_name):
        """Retrieve a project from the configured filesystem.

        Parameters
        ----------
        project_name : str
            The name of the project to retrieve.

        Returns
        -------
        rubicon.domain.Project
            The project with name `project_name`.
        """
        project_metadata_path = self._get_project_metadata_path(project_name)

        try:
            project = json.loads(self.filesystem.cat(project_metadata_path))
        except FileNotFoundError:
            raise RubiconException(f"No project with name '{project_name}' found.")

        return domain.Project(**project)
Example #18
0
    async def get_tags(self,
                       project_name,
                       experiment_id=None,
                       dataframe_id=None):
        """Overrides `rubicon.repository.BaseRepository.get_tags` to
        asynchronously retrieve tags from the configured filesystem.

        Parameters
        ----------
        project_name : str
            The name of the project the object to retrieve
            tags from belongs to.
        experiment_id : str, optional
            The ID of the experiment to retrieve tags from.
        dataframe_id : str, optional
            The ID of the dataframe to retrieve tags from.

        Returns
        -------
        list of dict
            A list of dictionaries with one key each,
            `added_tags` or `removed_tags`, where the
            value is a list of tag names that have been
            added to or removed from the specified object.
        """
        tag_metadata_root = self._get_tag_metadata_root(
            project_name, experiment_id, dataframe_id)

        all_paths = await self.filesystem._lsdir(tag_metadata_root)
        tag_paths = [p for p in all_paths if "/tags_" in p["name"]]
        if len(tag_paths) == 0:
            return []

        sorted_tag_paths = self._sort_tag_paths(tag_paths)

        raw_sorted_tag_data = await asyncio.gather(
            *[self.filesystem._cat_file(p) for _, p in sorted_tag_paths])
        sorted_tag_data = [json.loads(t) for t in raw_sorted_tag_data]

        return sorted_tag_data
Example #19
0
    async def get_project(self, project_name):
        """Overrides `rubicon.repository.BaseRepository.get_project` to
        asynchronously retrieve a project from the configured filesystem.

        Parameters
        ----------
        project_name : str
            The name of the project to retrieve.

        Returns
        -------
        rubicon.domain.Project
            The project with name `project_name`.
        """
        project_metadata_path = self._get_project_metadata_path(project_name)

        try:
            project = json.loads(await self.filesystem._cat_file(project_metadata_path))
        except FileNotFoundError:
            raise RubiconException(f"No project with name '{project_name}' found.")

        return domain.Project(**project)
Example #20
0
def test_can_deserialize_datetime():
    now = datetime.utcnow()
    to_deserialize = '{"date": {"_type": "datetime", "value": "' + str(now) + '"}}'
    deserialized = json.loads(to_deserialize)

    assert deserialized["date"] == now
Example #21
0
def test_can_deserialize_set():
    to_deserialize = '{"tags": {"_type": "set", "value": ["tag-b", "tag-a"]}}'
    deserialized = json.loads(to_deserialize)

    assert deserialized["tags"] == set(["tag-a", "tag-b"])