def test_get_feature_set(self, mocked_client, mocker): mocked_client._core_service_stub = Core.CoreServiceStub( grpc.insecure_channel("") ) from google.protobuf.duration_pb2 import Duration mocker.patch.object( mocked_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse( feature_set=FeatureSetProto( spec=FeatureSetSpecProto( name="my_feature_set", max_age=Duration(seconds=3600), labels={"key1": "val1", "key2": "val2"}, features=[ FeatureSpecProto( name="my_feature_1", value_type=ValueProto.ValueType.FLOAT, ), FeatureSpecProto( name="my_feature_2", value_type=ValueProto.ValueType.FLOAT, ), ], entities=[ EntitySpecProto( name="my_entity_1", value_type=ValueProto.ValueType.INT64, ) ], source=Source( type=SourceType.KAFKA, kafka_source_config=KafkaSourceConfig( bootstrap_servers="localhost:9092", topic="topic" ), ), ), meta=FeatureSetMetaProto(), ) ), ) mocked_client.set_project("my_project") feature_set = mocked_client.get_feature_set("my_feature_set") assert ( feature_set.name == "my_feature_set" and "key1" in feature_set.labels and feature_set.labels["key1"] == "val1" and "key2" in feature_set.labels and feature_set.labels["key2"] == "val2" and feature_set.fields["my_feature_1"].name == "my_feature_1" and feature_set.fields["my_feature_1"].dtype == ValueType.FLOAT and feature_set.fields["my_entity_1"].name == "my_entity_1" and feature_set.fields["my_entity_1"].dtype == ValueType.INT64 and len(feature_set.features) == 2 and len(feature_set.entities) == 1 )
def test_list_feature_sets(self, mocked_client, mocker): mocker.patch.object( mocked_client, "_core_service_stub", return_value=Core.CoreServiceStub(grpc.insecure_channel("")), ) feature_set_1_proto = FeatureSetProto( spec=FeatureSetSpecProto( project="test", name="driver_car", max_age=Duration(seconds=3600), labels={"key1": "val1", "key2": "val2"}, features=[ FeatureSpecProto( name="feature_1", value_type=ValueProto.ValueType.FLOAT ) ], ) ) feature_set_2_proto = FeatureSetProto( spec=FeatureSetSpecProto( project="test", name="driver_ride", max_age=Duration(seconds=3600), labels={"key1": "val1"}, features=[ FeatureSpecProto( name="feature_1", value_type=ValueProto.ValueType.FLOAT ) ], ) ) mocker.patch.object( mocked_client._core_service_stub, "ListFeatureSets", return_value=ListFeatureSetsResponse( feature_sets=[feature_set_1_proto, feature_set_2_proto] ), ) feature_sets = mocked_client.list_feature_sets(labels={"key1": "val1"}) assert len(feature_sets) == 2 feature_set = feature_sets[0] assert ( feature_set.name == "driver_car" and "key1" in feature_set.labels and feature_set.labels["key1"] == "val1" and "key2" in feature_set.labels and feature_set.labels["key2"] == "val2" and feature_set.fields["feature_1"].name == "feature_1" and feature_set.fields["feature_1"].dtype == ValueType.FLOAT and len(feature_set.features) == 1 )
def test_list_features(self, mocked_client, mocker): mocker.patch.object( mocked_client, "_core_service_stub", return_value=Core.CoreServiceStub(grpc.insecure_channel("")), ) feature1_proto = FeatureSpecProto( name="feature_1", value_type=ValueProto.ValueType.FLOAT ) feature2_proto = FeatureSpecProto( name="feature_2", value_type=ValueProto.ValueType.STRING ) mocker.patch.object( mocked_client._core_service_stub, "ListFeatures", return_value=ListFeaturesResponse( features={ "driver_car:feature_1": feature1_proto, "driver_car:feature_2": feature2_proto, } ), ) features = mocked_client.list_features_by_ref(project="test") assert len(features) == 2 ref_str_list = [] feature_name_list = [] feature_dtype_list = [] for ref_str, feature_proto in features.items(): ref_str_list.append(ref_str) feature_name_list.append(feature_proto.name) feature_dtype_list.append(feature_proto.dtype) assert ( set(ref_str_list) == set(["driver_car:feature_1", "driver_car:feature_2"]) and set(feature_name_list) == set(["feature_1", "feature_2"]) and set(feature_dtype_list) == set([ValueType.FLOAT, ValueType.STRING]) )
def test_get_batch_features(self, mocked_client, mocker): mocked_client._serving_service_stub = Serving.ServingServiceStub( grpc.insecure_channel("")) mocked_client._core_service_stub = Core.CoreServiceStub( grpc.insecure_channel("")) mocker.patch.object( mocked_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=FeatureSetProto( spec=FeatureSetSpecProto( name="driver", project="driver_project", entities=[ EntitySpecProto(name="driver", value_type=ValueProto.ValueType.INT64), EntitySpecProto( name="transaction", value_type=ValueProto.ValueType.INT64, ), ], features=[ FeatureSpecProto( name="driver_id", value_type=ValueProto.ValueType.FLOAT, ), FeatureSpecProto( name="driver_name", value_type=ValueProto.ValueType.STRING, ), ], ), meta=FeatureSetMetaProto( status=FeatureSetStatusProto.STATUS_READY), )), ) expected_dataframe = pd.DataFrame({ "datetime": [datetime.utcnow() for _ in range(3)], "driver": [1001, 1002, 1003], "transaction": [1001, 1002, 1003], "driver_id": [1001, 1002, 1003], }) final_results = tempfile.mktemp() pandavro.to_avro(file_path_or_buffer=final_results, df=expected_dataframe) mocker.patch.object( mocked_client._serving_service_stub, "GetBatchFeatures", return_value=GetBatchFeaturesResponse(job=BatchRetrievalJob( id="123", type=JobType.JOB_TYPE_DOWNLOAD, status=JobStatus.JOB_STATUS_DONE, file_uris=[f"file://{final_results}"], data_format=DataFormat.DATA_FORMAT_AVRO, )), ) mocker.patch.object( mocked_client._serving_service_stub, "GetJob", return_value=GetJobResponse(job=BatchRetrievalJob( id="123", type=JobType.JOB_TYPE_DOWNLOAD, status=JobStatus.JOB_STATUS_DONE, file_uris=[f"file://{final_results}"], data_format=DataFormat.DATA_FORMAT_AVRO, )), ) mocker.patch.object( mocked_client._serving_service_stub, "GetFeastServingInfo", return_value=GetFeastServingInfoResponse( job_staging_location=f"file://{tempfile.mkdtemp()}/", type=FeastServingType.FEAST_SERVING_TYPE_BATCH, ), ) mocked_client.set_project("project1") # TODO: Abstract away GCS client and GCP dependency # NOTE: Feast Serving does not allow for feature references # that specify the same feature in the same request. with patch("google.cloud.storage.Client"): response = mocked_client.get_batch_features( entity_rows=pd.DataFrame({ "datetime": [ pd.datetime.now(tz=timezone("Asia/Singapore")) for _ in range(3) ], "driver": [1001, 1002, 1003], "transaction": [1001, 1002, 1003], }), feature_refs=["driver:driver_id", "driver_id"], project="driver_project", ) # Type: GetBatchFeaturesResponse assert response.id == "123" and response.status == JobStatus.JOB_STATUS_DONE actual_dataframe = response.to_dataframe() assert actual_dataframe[["driver_id" ]].equals(expected_dataframe[["driver_id"]])
def test_get_batch_features(self, mocked_client, mocker): mocked_client._serving_service_stub = Serving.ServingServiceStub( grpc.insecure_channel("")) mocked_client._core_service_stub = Core.CoreServiceStub( grpc.insecure_channel("")) mocker.patch.object( mocked_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=FeatureSetProto( spec=FeatureSetSpecProto( name="customer_fs", version=1, project="my_project", entities=[ EntitySpecProto(name="customer", value_type=ValueProto.ValueType.INT64), EntitySpecProto( name="transaction", value_type=ValueProto.ValueType.INT64, ), ], features=[ FeatureSpecProto( name="customer_feature_1", value_type=ValueProto.ValueType.FLOAT, ), FeatureSpecProto( name="customer_feature_2", value_type=ValueProto.ValueType.STRING, ), ], ), meta=FeatureSetMetaProto( status=FeatureSetStatusProto.STATUS_READY), )), ) expected_dataframe = pd.DataFrame({ "datetime": [datetime.utcnow() for _ in range(3)], "customer": [1001, 1002, 1003], "transaction": [1001, 1002, 1003], "my_project/customer_feature_1:1": [1001, 1002, 1003], "my_project/customer_feature_2:1": [1001, 1002, 1003], }) final_results = tempfile.mktemp() to_avro(file_path_or_buffer=final_results, df=expected_dataframe) mocker.patch.object( mocked_client._serving_service_stub, "GetBatchFeatures", return_value=GetBatchFeaturesResponse(job=BatchFeaturesJob( id="123", type=JobType.JOB_TYPE_DOWNLOAD, status=JobStatus.JOB_STATUS_DONE, file_uris=[f"file://{final_results}"], data_format=DataFormat.DATA_FORMAT_AVRO, )), ) mocker.patch.object( mocked_client._serving_service_stub, "GetJob", return_value=GetJobResponse(job=BatchFeaturesJob( id="123", type=JobType.JOB_TYPE_DOWNLOAD, status=JobStatus.JOB_STATUS_DONE, file_uris=[f"file://{final_results}"], data_format=DataFormat.DATA_FORMAT_AVRO, )), ) mocker.patch.object( mocked_client._serving_service_stub, "GetFeastServingInfo", return_value=GetFeastServingInfoResponse( job_staging_location=f"file://{tempfile.mkdtemp()}/", type=FeastServingType.FEAST_SERVING_TYPE_BATCH, ), ) mocked_client.set_project("project1") response = mocked_client.get_batch_features( entity_rows=pd.DataFrame({ "datetime": [ pd.datetime.now(tz=timezone("Asia/Singapore")) for _ in range(3) ], "customer": [1001, 1002, 1003], "transaction": [1001, 1002, 1003], }), feature_refs=[ "my_project/customer_feature_1:1", "my_project/customer_feature_2:1", ], ) # type: Job assert response.id == "123" and response.status == JobStatus.JOB_STATUS_DONE actual_dataframe = response.to_dataframe() assert actual_dataframe[[ "my_project/customer_feature_1:1", "my_project/customer_feature_2:1" ]].equals(expected_dataframe[[ "my_project/customer_feature_1:1", "my_project/customer_feature_2:1" ]])