Beispiel #1
0
    def unified_dataset(self):
        """Unified dataset for this project.

        :return: Unified dataset for this project.
        :rtype: :class:`~tamr_unify_client.dataset.resource.Dataset`
        """
        alias = self.api_path + "/unifiedDataset"
        resource_json = self.client.get(alias).successful().json()
        return Dataset.from_json(self.client, resource_json, alias)
Beispiel #2
0
    def create(self, creation_spec):
        """
        Create a Dataset in Tamr

        :param creation_spec: Dataset creation specification should be formatted as specified in the `Public Docs for Creating a Dataset <https://docs.tamr.com/reference#create-a-dataset>`_.
        :type creation_spec: dict[str, str]
        :returns: The created Dataset
        :rtype: :class:`~tamr_unify_client.dataset.resource.Dataset`
        """
        data = self.client.post(self.api_path,
                                json=creation_spec).successful().json()
        return Dataset.from_json(self.client, data)
Beispiel #3
0
    def published_cluster_stats(self):
        """Retrieves published cluster stats for this project.

        :returns: The published cluster stats dataset.
        :rtype: :class:`~tamr_unify_client.dataset.resource.Dataset`
        """
        # Replace this workaround with a direct API call once API
        # is fixed. APIs that need to work are: fetching the dataset and
        # being able to call refresh on resulting dataset. Until then, we grab
        # the dataset by constructing its name from the corresponding Unified Dataset's name
        unified_dataset = self.unified_dataset()
        name = unified_dataset.name + "_dedup_published_cluster_stats"
        dataset = self.client.datasets.by_name(name)

        path = self.api_path + "/publishedClusterStats"
        return Dataset.from_json(self.client, dataset._data, path)
Beispiel #4
0
    def published_clusters(self):
        """Published record clusters generated by Tamr's pair-matching model.

        :returns: The published clusters represented as a dataset.
        :rtype: :class:`~tamr_unify_client.dataset.resource.Dataset`
        """

        unified_dataset = self.unified_dataset()

        # Replace this workaround with a direct API call once API
        # is fixed. APIs that need to work are: fetching the dataset and
        # being able to call refresh on resulting dataset. Until then, we grab
        # the dataset by constructing its name from the corresponding Unified Dataset's name
        name = unified_dataset.name + "_dedup_published_clusters"
        canonical = self.client.datasets.by_name(name)
        resource_json = canonical._data
        alias = self.api_path + "/publishedClusters"
        return Dataset.from_json(self.client, resource_json, alias)