def unified_dataset(self): """Unified dataset for this project. :return: Unified dataset for this project. :rtype: :class:`~tamr_unify_client.dataset.resource.Dataset` """ alias = self.api_path + "/unifiedDataset" resource_json = self.client.get(alias).successful().json() return Dataset.from_json(self.client, resource_json, alias)
def create(self, creation_spec): """ Create a Dataset in Tamr :param creation_spec: Dataset creation specification should be formatted as specified in the `Public Docs for Creating a Dataset <https://docs.tamr.com/reference#create-a-dataset>`_. :type creation_spec: dict[str, str] :returns: The created Dataset :rtype: :class:`~tamr_unify_client.dataset.resource.Dataset` """ data = self.client.post(self.api_path, json=creation_spec).successful().json() return Dataset.from_json(self.client, data)
def published_cluster_stats(self): """Retrieves published cluster stats for this project. :returns: The published cluster stats dataset. :rtype: :class:`~tamr_unify_client.dataset.resource.Dataset` """ # Replace this workaround with a direct API call once API # is fixed. APIs that need to work are: fetching the dataset and # being able to call refresh on resulting dataset. Until then, we grab # the dataset by constructing its name from the corresponding Unified Dataset's name unified_dataset = self.unified_dataset() name = unified_dataset.name + "_dedup_published_cluster_stats" dataset = self.client.datasets.by_name(name) path = self.api_path + "/publishedClusterStats" return Dataset.from_json(self.client, dataset._data, path)
def published_clusters(self): """Published record clusters generated by Tamr's pair-matching model. :returns: The published clusters represented as a dataset. :rtype: :class:`~tamr_unify_client.dataset.resource.Dataset` """ unified_dataset = self.unified_dataset() # Replace this workaround with a direct API call once API # is fixed. APIs that need to work are: fetching the dataset and # being able to call refresh on resulting dataset. Until then, we grab # the dataset by constructing its name from the corresponding Unified Dataset's name name = unified_dataset.name + "_dedup_published_clusters" canonical = self.client.datasets.by_name(name) resource_json = canonical._data alias = self.api_path + "/publishedClusters" return Dataset.from_json(self.client, resource_json, alias)