Beispiel #1
0
    def test_get_feature_set(self, mocked_client, mocker):
        mocked_client._core_service_stub = Core.CoreServiceStub(
            grpc.insecure_channel("")
        )

        from google.protobuf.duration_pb2 import Duration

        mocker.patch.object(
            mocked_client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(
                feature_set=FeatureSetProto(
                    spec=FeatureSetSpecProto(
                        name="my_feature_set",
                        max_age=Duration(seconds=3600),
                        labels={"key1": "val1", "key2": "val2"},
                        features=[
                            FeatureSpecProto(
                                name="my_feature_1",
                                value_type=ValueProto.ValueType.FLOAT,
                            ),
                            FeatureSpecProto(
                                name="my_feature_2",
                                value_type=ValueProto.ValueType.FLOAT,
                            ),
                        ],
                        entities=[
                            EntitySpecProto(
                                name="my_entity_1",
                                value_type=ValueProto.ValueType.INT64,
                            )
                        ],
                        source=Source(
                            type=SourceType.KAFKA,
                            kafka_source_config=KafkaSourceConfig(
                                bootstrap_servers="localhost:9092", topic="topic"
                            ),
                        ),
                    ),
                    meta=FeatureSetMetaProto(),
                )
            ),
        )
        mocked_client.set_project("my_project")
        feature_set = mocked_client.get_feature_set("my_feature_set")

        assert (
            feature_set.name == "my_feature_set"
            and "key1" in feature_set.labels
            and feature_set.labels["key1"] == "val1"
            and "key2" in feature_set.labels
            and feature_set.labels["key2"] == "val2"
            and feature_set.fields["my_feature_1"].name == "my_feature_1"
            and feature_set.fields["my_feature_1"].dtype == ValueType.FLOAT
            and feature_set.fields["my_entity_1"].name == "my_entity_1"
            and feature_set.fields["my_entity_1"].dtype == ValueType.INT64
            and len(feature_set.features) == 2
            and len(feature_set.entities) == 1
        )
Beispiel #2
0
    def test_list_feature_sets(self, mocked_client, mocker):
        mocker.patch.object(
            mocked_client,
            "_core_service_stub",
            return_value=Core.CoreServiceStub(grpc.insecure_channel("")),
        )

        feature_set_1_proto = FeatureSetProto(
            spec=FeatureSetSpecProto(
                project="test",
                name="driver_car",
                max_age=Duration(seconds=3600),
                labels={"key1": "val1", "key2": "val2"},
                features=[
                    FeatureSpecProto(
                        name="feature_1", value_type=ValueProto.ValueType.FLOAT
                    )
                ],
            )
        )
        feature_set_2_proto = FeatureSetProto(
            spec=FeatureSetSpecProto(
                project="test",
                name="driver_ride",
                max_age=Duration(seconds=3600),
                labels={"key1": "val1"},
                features=[
                    FeatureSpecProto(
                        name="feature_1", value_type=ValueProto.ValueType.FLOAT
                    )
                ],
            )
        )

        mocker.patch.object(
            mocked_client._core_service_stub,
            "ListFeatureSets",
            return_value=ListFeatureSetsResponse(
                feature_sets=[feature_set_1_proto, feature_set_2_proto]
            ),
        )

        feature_sets = mocked_client.list_feature_sets(labels={"key1": "val1"})
        assert len(feature_sets) == 2

        feature_set = feature_sets[0]
        assert (
            feature_set.name == "driver_car"
            and "key1" in feature_set.labels
            and feature_set.labels["key1"] == "val1"
            and "key2" in feature_set.labels
            and feature_set.labels["key2"] == "val2"
            and feature_set.fields["feature_1"].name == "feature_1"
            and feature_set.fields["feature_1"].dtype == ValueType.FLOAT
            and len(feature_set.features) == 1
        )
Beispiel #3
0
    def test_list_features(self, mocked_client, mocker):
        mocker.patch.object(
            mocked_client,
            "_core_service_stub",
            return_value=Core.CoreServiceStub(grpc.insecure_channel("")),
        )

        feature1_proto = FeatureSpecProto(
            name="feature_1", value_type=ValueProto.ValueType.FLOAT
        )
        feature2_proto = FeatureSpecProto(
            name="feature_2", value_type=ValueProto.ValueType.STRING
        )

        mocker.patch.object(
            mocked_client._core_service_stub,
            "ListFeatures",
            return_value=ListFeaturesResponse(
                features={
                    "driver_car:feature_1": feature1_proto,
                    "driver_car:feature_2": feature2_proto,
                }
            ),
        )

        features = mocked_client.list_features_by_ref(project="test")
        assert len(features) == 2

        ref_str_list = []
        feature_name_list = []
        feature_dtype_list = []
        for ref_str, feature_proto in features.items():
            ref_str_list.append(ref_str)
            feature_name_list.append(feature_proto.name)
            feature_dtype_list.append(feature_proto.dtype)

        assert (
            set(ref_str_list) == set(["driver_car:feature_1", "driver_car:feature_2"])
            and set(feature_name_list) == set(["feature_1", "feature_2"])
            and set(feature_dtype_list) == set([ValueType.FLOAT, ValueType.STRING])
        )
Beispiel #4
0
    def test_get_batch_features(self, mocked_client, mocker):

        mocked_client._serving_service_stub = Serving.ServingServiceStub(
            grpc.insecure_channel(""))
        mocked_client._core_service_stub = Core.CoreServiceStub(
            grpc.insecure_channel(""))

        mocker.patch.object(
            mocked_client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(feature_set=FeatureSetProto(
                spec=FeatureSetSpecProto(
                    name="driver",
                    project="driver_project",
                    entities=[
                        EntitySpecProto(name="driver",
                                        value_type=ValueProto.ValueType.INT64),
                        EntitySpecProto(
                            name="transaction",
                            value_type=ValueProto.ValueType.INT64,
                        ),
                    ],
                    features=[
                        FeatureSpecProto(
                            name="driver_id",
                            value_type=ValueProto.ValueType.FLOAT,
                        ),
                        FeatureSpecProto(
                            name="driver_name",
                            value_type=ValueProto.ValueType.STRING,
                        ),
                    ],
                ),
                meta=FeatureSetMetaProto(
                    status=FeatureSetStatusProto.STATUS_READY),
            )),
        )

        expected_dataframe = pd.DataFrame({
            "datetime": [datetime.utcnow() for _ in range(3)],
            "driver": [1001, 1002, 1003],
            "transaction": [1001, 1002, 1003],
            "driver_id": [1001, 1002, 1003],
        })

        final_results = tempfile.mktemp()
        pandavro.to_avro(file_path_or_buffer=final_results,
                         df=expected_dataframe)

        mocker.patch.object(
            mocked_client._serving_service_stub,
            "GetBatchFeatures",
            return_value=GetBatchFeaturesResponse(job=BatchRetrievalJob(
                id="123",
                type=JobType.JOB_TYPE_DOWNLOAD,
                status=JobStatus.JOB_STATUS_DONE,
                file_uris=[f"file://{final_results}"],
                data_format=DataFormat.DATA_FORMAT_AVRO,
            )),
        )

        mocker.patch.object(
            mocked_client._serving_service_stub,
            "GetJob",
            return_value=GetJobResponse(job=BatchRetrievalJob(
                id="123",
                type=JobType.JOB_TYPE_DOWNLOAD,
                status=JobStatus.JOB_STATUS_DONE,
                file_uris=[f"file://{final_results}"],
                data_format=DataFormat.DATA_FORMAT_AVRO,
            )),
        )

        mocker.patch.object(
            mocked_client._serving_service_stub,
            "GetFeastServingInfo",
            return_value=GetFeastServingInfoResponse(
                job_staging_location=f"file://{tempfile.mkdtemp()}/",
                type=FeastServingType.FEAST_SERVING_TYPE_BATCH,
            ),
        )

        mocked_client.set_project("project1")
        # TODO: Abstract away GCS client and GCP dependency
        # NOTE: Feast Serving does not allow for feature references
        # that specify the same feature in the same request.
        with patch("google.cloud.storage.Client"):
            response = mocked_client.get_batch_features(
                entity_rows=pd.DataFrame({
                    "datetime": [
                        pd.datetime.now(tz=timezone("Asia/Singapore"))
                        for _ in range(3)
                    ],
                    "driver": [1001, 1002, 1003],
                    "transaction": [1001, 1002, 1003],
                }),
                feature_refs=["driver:driver_id", "driver_id"],
                project="driver_project",
            )  # Type: GetBatchFeaturesResponse

        assert response.id == "123" and response.status == JobStatus.JOB_STATUS_DONE

        actual_dataframe = response.to_dataframe()

        assert actual_dataframe[["driver_id"
                                 ]].equals(expected_dataframe[["driver_id"]])
Beispiel #5
0
    def test_get_batch_features(self, mocked_client, mocker):

        mocked_client._serving_service_stub = Serving.ServingServiceStub(
            grpc.insecure_channel(""))
        mocked_client._core_service_stub = Core.CoreServiceStub(
            grpc.insecure_channel(""))

        mocker.patch.object(
            mocked_client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(feature_set=FeatureSetProto(
                spec=FeatureSetSpecProto(
                    name="customer_fs",
                    version=1,
                    project="my_project",
                    entities=[
                        EntitySpecProto(name="customer",
                                        value_type=ValueProto.ValueType.INT64),
                        EntitySpecProto(
                            name="transaction",
                            value_type=ValueProto.ValueType.INT64,
                        ),
                    ],
                    features=[
                        FeatureSpecProto(
                            name="customer_feature_1",
                            value_type=ValueProto.ValueType.FLOAT,
                        ),
                        FeatureSpecProto(
                            name="customer_feature_2",
                            value_type=ValueProto.ValueType.STRING,
                        ),
                    ],
                ),
                meta=FeatureSetMetaProto(
                    status=FeatureSetStatusProto.STATUS_READY),
            )),
        )

        expected_dataframe = pd.DataFrame({
            "datetime": [datetime.utcnow() for _ in range(3)],
            "customer": [1001, 1002, 1003],
            "transaction": [1001, 1002, 1003],
            "my_project/customer_feature_1:1": [1001, 1002, 1003],
            "my_project/customer_feature_2:1": [1001, 1002, 1003],
        })

        final_results = tempfile.mktemp()
        to_avro(file_path_or_buffer=final_results, df=expected_dataframe)

        mocker.patch.object(
            mocked_client._serving_service_stub,
            "GetBatchFeatures",
            return_value=GetBatchFeaturesResponse(job=BatchFeaturesJob(
                id="123",
                type=JobType.JOB_TYPE_DOWNLOAD,
                status=JobStatus.JOB_STATUS_DONE,
                file_uris=[f"file://{final_results}"],
                data_format=DataFormat.DATA_FORMAT_AVRO,
            )),
        )

        mocker.patch.object(
            mocked_client._serving_service_stub,
            "GetJob",
            return_value=GetJobResponse(job=BatchFeaturesJob(
                id="123",
                type=JobType.JOB_TYPE_DOWNLOAD,
                status=JobStatus.JOB_STATUS_DONE,
                file_uris=[f"file://{final_results}"],
                data_format=DataFormat.DATA_FORMAT_AVRO,
            )),
        )

        mocker.patch.object(
            mocked_client._serving_service_stub,
            "GetFeastServingInfo",
            return_value=GetFeastServingInfoResponse(
                job_staging_location=f"file://{tempfile.mkdtemp()}/",
                type=FeastServingType.FEAST_SERVING_TYPE_BATCH,
            ),
        )

        mocked_client.set_project("project1")
        response = mocked_client.get_batch_features(
            entity_rows=pd.DataFrame({
                "datetime": [
                    pd.datetime.now(tz=timezone("Asia/Singapore"))
                    for _ in range(3)
                ],
                "customer": [1001, 1002, 1003],
                "transaction": [1001, 1002, 1003],
            }),
            feature_refs=[
                "my_project/customer_feature_1:1",
                "my_project/customer_feature_2:1",
            ],
        )  # type: Job

        assert response.id == "123" and response.status == JobStatus.JOB_STATUS_DONE

        actual_dataframe = response.to_dataframe()

        assert actual_dataframe[[
            "my_project/customer_feature_1:1",
            "my_project/customer_feature_2:1"
        ]].equals(expected_dataframe[[
            "my_project/customer_feature_1:1",
            "my_project/customer_feature_2:1"
        ]])