Beispiel #1
0
    def test_list_feature_sets(self, mocked_client, mocker):
        mocker.patch.object(
            mocked_client,
            "_core_service_stub",
            return_value=Core.CoreServiceStub(grpc.insecure_channel("")),
        )

        feature_set_1_proto = FeatureSetProto(
            spec=FeatureSetSpecProto(
                project="test",
                name="driver_car",
                max_age=Duration(seconds=3600),
                labels={"key1": "val1", "key2": "val2"},
                features=[
                    FeatureSpecProto(
                        name="feature_1", value_type=ValueProto.ValueType.FLOAT
                    )
                ],
            )
        )
        feature_set_2_proto = FeatureSetProto(
            spec=FeatureSetSpecProto(
                project="test",
                name="driver_ride",
                max_age=Duration(seconds=3600),
                labels={"key1": "val1"},
                features=[
                    FeatureSpecProto(
                        name="feature_1", value_type=ValueProto.ValueType.FLOAT
                    )
                ],
            )
        )

        mocker.patch.object(
            mocked_client._core_service_stub,
            "ListFeatureSets",
            return_value=ListFeatureSetsResponse(
                feature_sets=[feature_set_1_proto, feature_set_2_proto]
            ),
        )

        feature_sets = mocked_client.list_feature_sets(labels={"key1": "val1"})
        assert len(feature_sets) == 2

        feature_set = feature_sets[0]
        assert (
            feature_set.name == "driver_car"
            and "key1" in feature_set.labels
            and feature_set.labels["key1"] == "val1"
            and "key2" in feature_set.labels
            and feature_set.labels["key2"] == "val2"
            and feature_set.fields["feature_1"].name == "feature_1"
            and feature_set.fields["feature_1"].dtype == ValueType.FLOAT
            and len(feature_set.features) == 1
        )
Beispiel #2
0
    def ApplyFeatureSet(self, request: ApplyFeatureSetRequest, context):
        feature_set = request.feature_set

        if feature_set.spec.source.type == SourceTypeProto.INVALID:
            feature_set.spec.source.kafka_source_config.CopyFrom(
                KafkaSourceConfigProto(bootstrap_servers="server.com",
                                       topic="topic1"))
            feature_set.spec.source.type = SourceTypeProto.KAFKA

        feature_set_meta = FeatureSetMeta(
            status=FeatureSetStatus.STATUS_READY,
            created_timestamp=Timestamp(seconds=10),
        )
        applied_feature_set = FeatureSetProto(spec=feature_set.spec,
                                              meta=feature_set_meta)
        self._feature_sets[feature_set.spec.name] = applied_feature_set

        _logger.info("registered feature set " + feature_set.spec.name +
                     " with " + str(len(feature_set.spec.entities)) +
                     " entities and " + str(len(feature_set.spec.features)) +
                     " features")

        return ApplyFeatureSetResponse(
            feature_set=applied_feature_set,
            status=ApplyFeatureSetResponse.Status.CREATED,
        )
Beispiel #3
0
    def to_proto(self) -> FeatureSetProto:
        """
        Converts a feature set object to its protobuf representation

        Returns:
            FeatureSetProto protobuf
        """

        meta = FeatureSetMetaProto(created_timestamp=self.created_timestamp,
                                   status=self.status)

        spec = FeatureSetSpecProto(
            name=self.name,
            version=self.version,
            project=self.project,
            max_age=self.max_age,
            source=self.source.to_proto() if self.source is not None else None,
            features=[
                field.to_proto() for field in self._fields.values()
                if type(field) == Feature
            ],
            entities=[
                field.to_proto() for field in self._fields.values()
                if type(field) == Entity
            ],
        )

        return FeatureSetProto(spec=spec, meta=meta)
Beispiel #4
0
    def test_get_feature_set(self, mocked_client, mocker):
        mocked_client._core_service_stub = Core.CoreServiceStub(
            grpc.insecure_channel("")
        )

        from google.protobuf.duration_pb2 import Duration

        mocker.patch.object(
            mocked_client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(
                feature_set=FeatureSetProto(
                    spec=FeatureSetSpecProto(
                        name="my_feature_set",
                        max_age=Duration(seconds=3600),
                        labels={"key1": "val1", "key2": "val2"},
                        features=[
                            FeatureSpecProto(
                                name="my_feature_1",
                                value_type=ValueProto.ValueType.FLOAT,
                            ),
                            FeatureSpecProto(
                                name="my_feature_2",
                                value_type=ValueProto.ValueType.FLOAT,
                            ),
                        ],
                        entities=[
                            EntitySpecProto(
                                name="my_entity_1",
                                value_type=ValueProto.ValueType.INT64,
                            )
                        ],
                        source=Source(
                            type=SourceType.KAFKA,
                            kafka_source_config=KafkaSourceConfig(
                                bootstrap_servers="localhost:9092", topic="topic"
                            ),
                        ),
                    ),
                    meta=FeatureSetMetaProto(),
                )
            ),
        )
        mocked_client.set_project("my_project")
        feature_set = mocked_client.get_feature_set("my_feature_set")

        assert (
            feature_set.name == "my_feature_set"
            and "key1" in feature_set.labels
            and feature_set.labels["key1"] == "val1"
            and "key2" in feature_set.labels
            and feature_set.labels["key2"] == "val2"
            and feature_set.fields["my_feature_1"].name == "my_feature_1"
            and feature_set.fields["my_feature_1"].dtype == ValueType.FLOAT
            and feature_set.fields["my_entity_1"].name == "my_entity_1"
            and feature_set.fields["my_entity_1"].dtype == ValueType.INT64
            and len(feature_set.features) == 2
            and len(feature_set.entities) == 1
        )
Beispiel #5
0
    def test_list_ingest_jobs(self, mocked_client, mocker):
        mocker.patch.object(
            mocked_client,
            "_core_service_stub",
            return_value=Core.CoreServiceStub(grpc.insecure_channel("")),
        )

        feature_set_proto = FeatureSetProto(
            spec=FeatureSetSpecProto(
                project="test", name="driver", max_age=Duration(seconds=3600),
            )
        )

        mocker.patch.object(
            mocked_client._core_service_stub,
            "ListIngestionJobs",
            return_value=ListIngestionJobsResponse(
                jobs=[
                    IngestJobProto(
                        id="kafka-to-redis",
                        external_id="job-2222",
                        status=IngestionJobStatus.RUNNING,
                        feature_sets=[feature_set_proto],
                        source=Source(
                            type=SourceType.KAFKA,
                            kafka_source_config=KafkaSourceConfig(
                                bootstrap_servers="localhost:9092", topic="topic"
                            ),
                        ),
                        store=Store(name="redis"),
                    )
                ]
            ),
        )

        # list ingestion jobs by target feature set reference
        ingest_jobs = mocked_client.list_ingest_jobs(
            feature_set_ref=FeatureSetRef.from_feature_set(
                FeatureSet.from_proto(feature_set_proto)
            )
        )
        assert len(ingest_jobs) >= 1

        ingest_job = ingest_jobs[0]
        assert (
            ingest_job.status == IngestionJobStatus.RUNNING
            and ingest_job.id == "kafka-to-redis"
            and ingest_job.external_id == "job-2222"
            and ingest_job.feature_sets[0].name == "driver"
            and ingest_job.source.source_type == "Kafka"
        )
Beispiel #6
0
    def from_dict(cls, fs_dict):
        """
        Creates a feature set from a dict

        Args:
            fs_dict: A dict representation of a feature set

        Returns:
            Returns a FeatureSet object based on the feature set dict
        """

        feature_set_proto = json_format.ParseDict(fs_dict,
                                                  FeatureSetProto(),
                                                  ignore_unknown_fields=True)
        return cls.from_proto(feature_set_proto)
Beispiel #7
0
    def from_dict(cls, fs_dict):
        """
        Creates a feature set from a dict

        Args:
            fs_dict: A dict representation of a feature set

        Returns:
            Returns a FeatureSet object based on the feature set dict
        """

        if ("kind"
                not in fs_dict) and (fs_dict["kind"].strip() != "feature_set"):
            raise Exception(
                f"Resource kind is not a feature set {str(fs_dict)}")
        feature_set_proto = json_format.ParseDict(fs_dict,
                                                  FeatureSetProto(),
                                                  ignore_unknown_fields=True)
        return cls.from_proto(feature_set_proto)
Beispiel #8
0
    def test_get_batch_features(self, mocked_client, mocker):

        mocked_client._serving_service_stub = Serving.ServingServiceStub(
            grpc.insecure_channel(""))
        mocked_client._core_service_stub = Core.CoreServiceStub(
            grpc.insecure_channel(""))

        mocker.patch.object(
            mocked_client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(feature_set=FeatureSetProto(
                spec=FeatureSetSpecProto(
                    name="driver",
                    project="driver_project",
                    entities=[
                        EntitySpecProto(name="driver",
                                        value_type=ValueProto.ValueType.INT64),
                        EntitySpecProto(
                            name="transaction",
                            value_type=ValueProto.ValueType.INT64,
                        ),
                    ],
                    features=[
                        FeatureSpecProto(
                            name="driver_id",
                            value_type=ValueProto.ValueType.FLOAT,
                        ),
                        FeatureSpecProto(
                            name="driver_name",
                            value_type=ValueProto.ValueType.STRING,
                        ),
                    ],
                ),
                meta=FeatureSetMetaProto(
                    status=FeatureSetStatusProto.STATUS_READY),
            )),
        )

        expected_dataframe = pd.DataFrame({
            "datetime": [datetime.utcnow() for _ in range(3)],
            "driver": [1001, 1002, 1003],
            "transaction": [1001, 1002, 1003],
            "driver_id": [1001, 1002, 1003],
        })

        final_results = tempfile.mktemp()
        pandavro.to_avro(file_path_or_buffer=final_results,
                         df=expected_dataframe)

        mocker.patch.object(
            mocked_client._serving_service_stub,
            "GetBatchFeatures",
            return_value=GetBatchFeaturesResponse(job=BatchRetrievalJob(
                id="123",
                type=JobType.JOB_TYPE_DOWNLOAD,
                status=JobStatus.JOB_STATUS_DONE,
                file_uris=[f"file://{final_results}"],
                data_format=DataFormat.DATA_FORMAT_AVRO,
            )),
        )

        mocker.patch.object(
            mocked_client._serving_service_stub,
            "GetJob",
            return_value=GetJobResponse(job=BatchRetrievalJob(
                id="123",
                type=JobType.JOB_TYPE_DOWNLOAD,
                status=JobStatus.JOB_STATUS_DONE,
                file_uris=[f"file://{final_results}"],
                data_format=DataFormat.DATA_FORMAT_AVRO,
            )),
        )

        mocker.patch.object(
            mocked_client._serving_service_stub,
            "GetFeastServingInfo",
            return_value=GetFeastServingInfoResponse(
                job_staging_location=f"file://{tempfile.mkdtemp()}/",
                type=FeastServingType.FEAST_SERVING_TYPE_BATCH,
            ),
        )

        mocked_client.set_project("project1")
        # TODO: Abstract away GCS client and GCP dependency
        # NOTE: Feast Serving does not allow for feature references
        # that specify the same feature in the same request.
        with patch("google.cloud.storage.Client"):
            response = mocked_client.get_batch_features(
                entity_rows=pd.DataFrame({
                    "datetime": [
                        pd.datetime.now(tz=timezone("Asia/Singapore"))
                        for _ in range(3)
                    ],
                    "driver": [1001, 1002, 1003],
                    "transaction": [1001, 1002, 1003],
                }),
                feature_refs=["driver:driver_id", "driver_id"],
                project="driver_project",
            )  # Type: GetBatchFeaturesResponse

        assert response.id == "123" and response.status == JobStatus.JOB_STATUS_DONE

        actual_dataframe = response.to_dataframe()

        assert actual_dataframe[["driver_id"
                                 ]].equals(expected_dataframe[["driver_id"]])
Beispiel #9
0
    def test_get_batch_features(self, mocked_client, mocker):

        mocked_client._serving_service_stub = Serving.ServingServiceStub(
            grpc.insecure_channel(""))
        mocked_client._core_service_stub = Core.CoreServiceStub(
            grpc.insecure_channel(""))

        mocker.patch.object(
            mocked_client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(feature_set=FeatureSetProto(
                spec=FeatureSetSpecProto(
                    name="customer_fs",
                    version=1,
                    project="my_project",
                    entities=[
                        EntitySpecProto(name="customer",
                                        value_type=ValueProto.ValueType.INT64),
                        EntitySpecProto(
                            name="transaction",
                            value_type=ValueProto.ValueType.INT64,
                        ),
                    ],
                    features=[
                        FeatureSpecProto(
                            name="customer_feature_1",
                            value_type=ValueProto.ValueType.FLOAT,
                        ),
                        FeatureSpecProto(
                            name="customer_feature_2",
                            value_type=ValueProto.ValueType.STRING,
                        ),
                    ],
                ),
                meta=FeatureSetMetaProto(
                    status=FeatureSetStatusProto.STATUS_READY),
            )),
        )

        expected_dataframe = pd.DataFrame({
            "datetime": [datetime.utcnow() for _ in range(3)],
            "customer": [1001, 1002, 1003],
            "transaction": [1001, 1002, 1003],
            "my_project/customer_feature_1:1": [1001, 1002, 1003],
            "my_project/customer_feature_2:1": [1001, 1002, 1003],
        })

        final_results = tempfile.mktemp()
        to_avro(file_path_or_buffer=final_results, df=expected_dataframe)

        mocker.patch.object(
            mocked_client._serving_service_stub,
            "GetBatchFeatures",
            return_value=GetBatchFeaturesResponse(job=BatchFeaturesJob(
                id="123",
                type=JobType.JOB_TYPE_DOWNLOAD,
                status=JobStatus.JOB_STATUS_DONE,
                file_uris=[f"file://{final_results}"],
                data_format=DataFormat.DATA_FORMAT_AVRO,
            )),
        )

        mocker.patch.object(
            mocked_client._serving_service_stub,
            "GetJob",
            return_value=GetJobResponse(job=BatchFeaturesJob(
                id="123",
                type=JobType.JOB_TYPE_DOWNLOAD,
                status=JobStatus.JOB_STATUS_DONE,
                file_uris=[f"file://{final_results}"],
                data_format=DataFormat.DATA_FORMAT_AVRO,
            )),
        )

        mocker.patch.object(
            mocked_client._serving_service_stub,
            "GetFeastServingInfo",
            return_value=GetFeastServingInfoResponse(
                job_staging_location=f"file://{tempfile.mkdtemp()}/",
                type=FeastServingType.FEAST_SERVING_TYPE_BATCH,
            ),
        )

        mocked_client.set_project("project1")
        response = mocked_client.get_batch_features(
            entity_rows=pd.DataFrame({
                "datetime": [
                    pd.datetime.now(tz=timezone("Asia/Singapore"))
                    for _ in range(3)
                ],
                "customer": [1001, 1002, 1003],
                "transaction": [1001, 1002, 1003],
            }),
            feature_refs=[
                "my_project/customer_feature_1:1",
                "my_project/customer_feature_2:1",
            ],
        )  # type: Job

        assert response.id == "123" and response.status == JobStatus.JOB_STATUS_DONE

        actual_dataframe = response.to_dataframe()

        assert actual_dataframe[[
            "my_project/customer_feature_1:1",
            "my_project/customer_feature_2:1"
        ]].equals(expected_dataframe[[
            "my_project/customer_feature_1:1",
            "my_project/customer_feature_2:1"
        ]])