def unified_dataset(self):
        """Unified dataset for this project.

        :return: Unified dataset for this project.
        :rtype: :class:`~tamr_unify_client.models.dataset.resource.Dataset`
        """
        alias = self.api_path + "/unifiedDataset"
        resource_json = self.client.get(alias).successful().json()
        return Dataset.from_json(self.client, resource_json, alias)
Ejemplo n.º 2
0
    def published_clusters(self):
        """Published record clusters generated by Unify's pair-matching model.

        Call :func:`~tamr_unify_client.models.dataset.resource.Dataset.refresh` from
        this dataset to republish clusters according to the latest clustering.

        :returns: The published clusters represented as a dataset.
        :rtype: :class:`~tamr_unify_client.models.dataset.resource.Dataset`
        """
        alias = self.api_path + "/publishedClusters"
        return Dataset(self.client, None, alias)
Ejemplo n.º 3
0
    def pairs(self):
        """Record pairs generated by Unify's binning model.
        Pairs are displayed on the "Pairs" page in the Unify UI.

        Call :func:`~tamr_unify_client.models.dataset.resource.Dataset.refresh` from
        this dataset to regenerate pairs according to the latest binning model.

        :returns: The record pairs represented as a dataset.
        :rtype: :class:`~tamr_unify_client.models.dataset.resource.Dataset`
        """
        alias = self.api_path + "/recordPairs"
        return Dataset(self.client, None, alias)
Ejemplo n.º 4
0
    def create(self, creation_spec):
        """
        Create a Dataset in Unify

        :param creation_spec: Dataset creation specification should be formatted as specified in the `Public Docs for Creating a Dataset <https://docs.tamr.com/reference#create-a-dataset>`_.
        :type creation_spec: dict[str, str]
        :returns: The created Dataset
        :rtype: :class:`~tamr_unify_client.models.dataset.resource.Dataset`
        """
        data = self.client.post(self.api_path,
                                json=creation_spec).successful().json()
        return Dataset.from_json(self, data)
Ejemplo n.º 5
0
    def record_clusters(self):
        """Record Clusters as a dataset. Unify clusters labeled pairs using pairs
        model. These clusters populate the cluster review page and get transient
        cluster ids, rather than published cluster ids (i.e., "Permanent Ids")

        Call :func:`~tamr_unify_client.models.dataset.resource.Dataset.refresh` from
        this dataset to generate clusters based on to the latest pair-matching model.

        :returns: The record clusters represented as a dataset.
        :rtype: :class:`~tamr_unify_client.models.dataset.resource.Dataset`
        """
        alias = self.api_path + "/recordClusters"
        return Dataset(self.client, None, alias)
Ejemplo n.º 6
0
    def high_impact_pairs(self):
        """High-impact pairs as a dataset. Unify labels pairs as "high-impact" if
        labeling these pairs would help it learn most quickly (i.e. "Active learning").

        High-impact pairs are displayed with a ⚡ lightning bolt icon on the
        "Pairs" page in the Unify UI.

        Call :func:`~tamr_unify_client.models.dataset.resource.Dataset.refresh` from
        this dataset to produce new high-impact pairs according to the latest
        pair-matching model.

        :returns: The high-impact pairs represented as a dataset.
        :rtype: :class:`~tamr_unify_client.models.dataset.resource.Dataset`
        """
        alias = self.api_path + "/highImpactPairs"
        return Dataset(self.client, None, alias)
Ejemplo n.º 7
0
    def test_feature_to_record(self):
        feature = {"type": "Feature", "id": "1"}
        actual = Dataset._feature_to_record(feature, ["pk"], "geo")
        expected = {"pk": "1"}
        self.assertEqual(expected, actual)

        feature = {
            "type": "Feature",
            "id": "1",
            "geometry": {
                "type": "Point",
                "coordinates": [0, 0]
            },
        }
        actual = Dataset._feature_to_record(feature, ["pk"], "geo")
        expected = {"pk": "1", "geo": {"point": [0, 0]}}
        self.assertEqual(expected, actual)

        feature = {
            "type": "Feature",
            "id": "1",
            "geometry": {
                "type": "MultiPoint",
                "coordinates": [[0, 0], [1, 1]]
            },
        }
        actual = Dataset._feature_to_record(feature, ["pk"], "geo")
        expected = {"pk": "1", "geo": {"multiPoint": [[0, 0], [1, 1]]}}
        self.assertEqual(expected, actual)

        feature = {
            "type": "Feature",
            "id": "1",
            "geometry": {
                "type": "LineString",
                "coordinates": [[0, 0], [1, 1]]
            },
        }
        actual = Dataset._feature_to_record(feature, ["pk"], "geo")
        expected = {"pk": "1", "geo": {"lineString": [[0, 0], [1, 1]]}}
        self.assertEqual(expected, actual)

        feature = {
            "type": "Feature",
            "id": "1",
            "geometry": {
                "type": "MultiLineString",
                "coordinates": [[[0, 0], [1, 1], [2, 2]]],
            },
        }
        actual = Dataset._feature_to_record(feature, ["pk"], "geo")
        expected = {
            "pk": "1",
            "geo": {
                "multiLineString": [[[0, 0], [1, 1], [2, 2]]]
            }
        }
        self.assertEqual(expected, actual)

        feature = {
            "type": "Feature",
            "id": "1",
            "geometry": {
                "type": "Polygon",
                "coordinates": [[[0, 0], [1, 1], [2, 2]]]
            },
        }
        actual = Dataset._feature_to_record(feature, ["pk"], "geo")
        expected = {"pk": "1", "geo": {"polygon": [[[0, 0], [1, 1], [2, 2]]]}}
        self.assertEqual(expected, actual)

        feature = {
            "type": "Feature",
            "id": "1",
            "geometry": {
                "type": "MultiPolygon",
                "coordinates": [[[[0, 0], [1, 1], [2, 2]]]],
            },
        }
        actual = Dataset._feature_to_record(feature, ["pk"], "geo")
        expected = {
            "pk": "1",
            "geo": {
                "multiPolygon": [[[[0, 0], [1, 1], [2, 2]]]]
            }
        }
        self.assertEqual(expected, actual)

        feature = {"type": "Feature", "id": "1", "geometry": None}
        actual = Dataset._feature_to_record(feature, ["pk"], "geo")
        expected = {"pk": "1"}
        self.assertEqual(expected, actual)

        feature = {
            "type": "Feature",
            "id": "1",
            "bbox": [0, 0, 1, 1],
            "geometry": {
                "type": "Point",
                "coordinates": [0, 0]
            },
        }
        actual = Dataset._feature_to_record(feature, ["pk"], "geo")
        expected = {"pk": "1", "geo": {"point": [0, 0]}, "bbox": [0, 0, 1, 1]}
        self.assertEqual(expected, actual)

        feature = {
            "type": "Feature",
            "id": "1",
            "bbox": None,
            "geometry": {
                "type": "Point",
                "coordinates": [0, 0]
            },
        }
        actual = Dataset._feature_to_record(feature, ["pk"], "geo")
        expected = {"pk": "1", "geo": {"point": [0, 0]}}
        self.assertEqual(expected, actual)

        feature = {
            "type": "Feature",
            "id": "1",
            "bbox": [0, 0, 1, 1],
            "geometry": {
                "type": "Point",
                "coordinates": [0, 0]
            },
            "properties": {
                "prop1": "val1",
                "prop2": "val2"
            },
        }
        actual = Dataset._feature_to_record(feature, ["pk"], "geo")
        expected = {
            "pk": "1",
            "geo": {
                "point": [0, 0]
            },
            "bbox": [0, 0, 1, 1],
            "prop1": "val1",
            "prop2": "val2",
        }
        self.assertEqual(expected, actual)

        feature = {
            "type": "Feature",
            "id": "1",
            "bbox": [0, 0, 1, 1],
            "geometry": {
                "type": "Point",
                "coordinates": [0, 0]
            },
            "properties": None,
        }
        actual = Dataset._feature_to_record(feature, ["pk"], "geo")
        expected = {"pk": "1", "geo": {"point": [0, 0]}, "bbox": [0, 0, 1, 1]}
        self.assertEqual(expected, actual)

        feature = {
            "type": "Feature",
            "id": "1",
            "bbox": [0, 0, 1, 1],
            "geometry": {
                "type": "Point",
                "coordinates": [0, 0]
            },
            # Properties with names that conflict with
            # the props in the key or geometry
            # get ignored
            "properties": {
                "pk": "val1",
                "geo": "val2",
                "bbox": "val3"
            },
        }
        actual = Dataset._feature_to_record(feature, ["pk"], "geo")
        expected = {"pk": "1", "geo": {"point": [0, 0]}, "bbox": [0, 0, 1, 1]}
        self.assertEqual(expected, actual)

        feature = {
            "type": "Feature",
            "id": ["1", "2"],
            "geometry": {
                "type": "Point",
                "coordinates": [0, 0]
            },
        }
        actual = Dataset._feature_to_record(feature, ["pk1", "pk2"], "geo")
        expected = {"pk1": "1", "pk2": "2", "geo": {"point": [0, 0]}}
        self.assertEqual(expected, actual)

        class NotAFeature:
            @property
            def __geo_interface__(self):
                return {
                    "type": "Feature",
                    "id": "1",
                    "geometry": {
                        "type": "Point",
                        "coordinates": [0, 0]
                    },
                }

        naf = NotAFeature()
        actual = Dataset._feature_to_record(naf, ["pk"], "geo")
        expected = {"pk": "1", "geo": {"point": [0, 0]}}
        self.assertEqual(expected, actual)
Ejemplo n.º 8
0
    def test_record_to_feature(self):
        empty_record = {"id": "1"}

        def key_value_single(rec):
            return rec["id"]

        actual = Dataset._record_to_feature(empty_record, key_value_single,
                                            ["id"], "geom")
        expected = {"type": "Feature", "id": "1"}
        self.assertEqual(expected, actual)

        record_with_point = {"id": "1", "geom": {"point": [1, 1]}}
        actual = Dataset._record_to_feature(record_with_point,
                                            key_value_single, ["id"], "geom")
        expected = {
            "type": "Feature",
            "id": "1",
            "geometry": {
                "type": "Point",
                "coordinates": [1, 1]
            },
        }
        self.assertEqual(expected, actual)

        record_with_multi_point = {"id": "1", "geom": {"multiPoint": [[1, 1]]}}
        actual = Dataset._record_to_feature(record_with_multi_point,
                                            key_value_single, ["id"], "geom")
        expected = {
            "type": "Feature",
            "id": "1",
            "geometry": {
                "type": "MultiPoint",
                "coordinates": [[1, 1]]
            },
        }
        self.assertEqual(expected, actual)

        record_with_line = {
            "id": "1",
            "geom": {
                "lineString": [[1, 1], [2, 2]]
            }
        }
        actual = Dataset._record_to_feature(record_with_line, key_value_single,
                                            ["id"], "geom")
        expected = {
            "type": "Feature",
            "id": "1",
            "geometry": {
                "type": "LineString",
                "coordinates": [[1, 1], [2, 2]]
            },
        }
        self.assertEqual(expected, actual)

        record_with_multi_line = {
            "id": "1",
            "geom": {
                "multiLineString": [[[1, 1], [2, 2]]]
            },
        }
        actual = Dataset._record_to_feature(record_with_multi_line,
                                            key_value_single, ["id"], "geom")
        expected = {
            "type": "Feature",
            "id": "1",
            "geometry": {
                "type": "MultiLineString",
                "coordinates": [[[1, 1], [2, 2]]]
            },
        }
        self.assertEqual(expected, actual)

        record_with_polygon = {
            "id": "1",
            "geom": {
                "polygon": [[[1, 1], [2, 2], [3, 3]]]
            },
        }
        actual = Dataset._record_to_feature(record_with_polygon,
                                            key_value_single, ["id"], "geom")
        expected = {
            "type": "Feature",
            "id": "1",
            "geometry": {
                "type": "Polygon",
                "coordinates": [[[1, 1], [2, 2], [3, 3]]]
            },
        }
        self.assertEqual(expected, actual)

        record_with_multi_polygon = {
            "id": "1",
            "geom": {
                "multiPolygon": [[[[1, 1], [2, 2], [3, 3]]]]
            },
        }
        actual = Dataset._record_to_feature(record_with_multi_polygon,
                                            key_value_single, ["id"], "geom")
        expected = {
            "type": "Feature",
            "id": "1",
            "geometry": {
                "type": "MultiPolygon",
                "coordinates": [[[[1, 1], [2, 2], [3, 3]]]],
            },
        }
        self.assertEqual(expected, actual)

        record_with_full_geo = {
            "id": "1",
            "geom": {
                "point": None,
                "multiPoint": None,
                "lineString": None,
                "multiLineString": None,
                "polygon": None,
                "multiPolygon": [[[[1, 1], [2, 2], [3, 3]]]],
            },
        }
        actual = Dataset._record_to_feature(record_with_full_geo,
                                            key_value_single, ["id"], "geom")
        expected = {
            "type": "Feature",
            "id": "1",
            "geometry": {
                "type": "MultiPolygon",
                "coordinates": [[[[1, 1], [2, 2], [3, 3]]]],
            },
        }
        self.assertEqual(expected, actual)

        record_with_null_geo = {
            "id": "1",
            "geom": {
                "point": None,
                "multiPoint": None,
                "lineString": None,
                "multiLineString": None,
                "polygon": None,
                "multiPolygon": None,
            },
        }
        actual = Dataset._record_to_feature(record_with_null_geo,
                                            key_value_single, ["id"], "geom")
        expected = {"geometry": None, "type": "Feature", "id": "1"}
        self.assertEqual(expected, actual)

        record_with_bbox = {"id": "1", "bbox": [[0, 0], [1, 1]]}
        actual = Dataset._record_to_feature(record_with_bbox, key_value_single,
                                            ["id"], "geom")
        expected = {"type": "Feature", "id": "1", "bbox": [[0, 0], [1, 1]]}
        self.assertEqual(expected, actual)

        record_with_props = {"id": "1", "p1": "v1", "p2": "v2"}
        actual = Dataset._record_to_feature(record_with_props,
                                            key_value_single, ["id"], "geom")
        expected = {
            "type": "Feature",
            "id": "1",
            "properties": {
                "p1": "v1",
                "p2": "v2"
            },
        }
        self.assertEqual(expected, actual)

        def key_value_composite(rec):
            return [rec[v] for v in ["id1", "id2"]]

        record_with_composite_key = {"id1": "1", "id2": "2"}
        actual = Dataset._record_to_feature(record_with_composite_key,
                                            key_value_composite,
                                            ["id1", "id2"], "geom")
        expected = {"type": "Feature", "id": ["1", "2"]}
        self.assertEqual(expected, actual)

        record_with_everything = {
            "id1": "1",
            "id2": "2",
            "bbox": [[0, 0], [1, 1]],
            "name": "record with everything",
            "geom": {
                "point": None,
                "multiPoint": None,
                "lineString": None,
                "multiLineString": None,
                "polygon": [[[0, 0], [0, 1], [1, 1], [1, 0], [0, 0]]],
                "multiPolygon": None,
            },
            "alternate_geom": {
                "point": [1, 1],
                "multiPoint": None,
                "lineString": None,
                "multiLineString": None,
                "polygon": None,
                "multiPolygon": None,
            },
        }
        actual = Dataset._record_to_feature(record_with_everything,
                                            key_value_composite,
                                            ["id1", "id2"], "geom")
        expected = {
            "type": "Feature",
            "id": ["1", "2"],
            "bbox": [[0, 0], [1, 1]],
            "properties": {
                "name": "record with everything",
                "alternate_geom": {
                    "point": [1, 1],
                    "multiPoint": None,
                    "lineString": None,
                    "multiLineString": None,
                    "polygon": None,
                    "multiPolygon": None,
                },
            },
            "geometry": {
                "type": "Polygon",
                "coordinates": [[[0, 0], [0, 1], [1, 1], [1, 0], [0, 0]]],
            },
        }
        self.assertEqual(expected, actual)

        record_without_geo = {"id": "1", "prop1": "val1"}
        actual = Dataset._record_to_feature(record_without_geo,
                                            key_value_single, ["id"], None)
        expected = {
            "type": "Feature",
            "id": "1",
            "properties": {
                "prop1": "val1"
            }
        }
        self.assertEqual(expected, actual)