def to_proto(self) -> FeatureSetProto: """ Converts a feature set object to its protobuf representation Returns: FeatureSetProto protobuf """ meta = FeatureSetMetaProto(created_timestamp=self.created_timestamp, status=self.status) spec = FeatureSetSpecProto( name=self.name, version=self.version, project=self.project, max_age=self.max_age, source=self.source.to_proto() if self.source is not None else None, features=[ field.to_proto() for field in self._fields.values() if type(field) == Feature ], entities=[ field.to_proto() for field in self._fields.values() if type(field) == Entity ], ) return FeatureSetProto(spec=spec, meta=meta)
def test_get_feature_set(self, mocked_client, mocker): mocked_client._core_service_stub = Core.CoreServiceStub( grpc.insecure_channel("") ) from google.protobuf.duration_pb2 import Duration mocker.patch.object( mocked_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse( feature_set=FeatureSetProto( spec=FeatureSetSpecProto( name="my_feature_set", max_age=Duration(seconds=3600), labels={"key1": "val1", "key2": "val2"}, features=[ FeatureSpecProto( name="my_feature_1", value_type=ValueProto.ValueType.FLOAT, ), FeatureSpecProto( name="my_feature_2", value_type=ValueProto.ValueType.FLOAT, ), ], entities=[ EntitySpecProto( name="my_entity_1", value_type=ValueProto.ValueType.INT64, ) ], source=Source( type=SourceType.KAFKA, kafka_source_config=KafkaSourceConfig( bootstrap_servers="localhost:9092", topic="topic" ), ), ), meta=FeatureSetMetaProto(), ) ), ) mocked_client.set_project("my_project") feature_set = mocked_client.get_feature_set("my_feature_set") assert ( feature_set.name == "my_feature_set" and "key1" in feature_set.labels and feature_set.labels["key1"] == "val1" and "key2" in feature_set.labels and feature_set.labels["key2"] == "val2" and feature_set.fields["my_feature_1"].name == "my_feature_1" and feature_set.fields["my_feature_1"].dtype == ValueType.FLOAT and feature_set.fields["my_entity_1"].name == "my_entity_1" and feature_set.fields["my_entity_1"].dtype == ValueType.INT64 and len(feature_set.features) == 2 and len(feature_set.entities) == 1 )
def test_get_batch_features(self, mocked_client, mocker): mocked_client._serving_service_stub = Serving.ServingServiceStub( grpc.insecure_channel("")) mocked_client._core_service_stub = Core.CoreServiceStub( grpc.insecure_channel("")) mocker.patch.object( mocked_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=FeatureSetProto( spec=FeatureSetSpecProto( name="driver", project="driver_project", entities=[ EntitySpecProto(name="driver", value_type=ValueProto.ValueType.INT64), EntitySpecProto( name="transaction", value_type=ValueProto.ValueType.INT64, ), ], features=[ FeatureSpecProto( name="driver_id", value_type=ValueProto.ValueType.FLOAT, ), FeatureSpecProto( name="driver_name", value_type=ValueProto.ValueType.STRING, ), ], ), meta=FeatureSetMetaProto( status=FeatureSetStatusProto.STATUS_READY), )), ) expected_dataframe = pd.DataFrame({ "datetime": [datetime.utcnow() for _ in range(3)], "driver": [1001, 1002, 1003], "transaction": [1001, 1002, 1003], "driver_id": [1001, 1002, 1003], }) final_results = tempfile.mktemp() pandavro.to_avro(file_path_or_buffer=final_results, df=expected_dataframe) mocker.patch.object( mocked_client._serving_service_stub, "GetBatchFeatures", return_value=GetBatchFeaturesResponse(job=BatchRetrievalJob( id="123", type=JobType.JOB_TYPE_DOWNLOAD, status=JobStatus.JOB_STATUS_DONE, file_uris=[f"file://{final_results}"], data_format=DataFormat.DATA_FORMAT_AVRO, )), ) mocker.patch.object( mocked_client._serving_service_stub, "GetJob", return_value=GetJobResponse(job=BatchRetrievalJob( id="123", type=JobType.JOB_TYPE_DOWNLOAD, status=JobStatus.JOB_STATUS_DONE, file_uris=[f"file://{final_results}"], data_format=DataFormat.DATA_FORMAT_AVRO, )), ) mocker.patch.object( mocked_client._serving_service_stub, "GetFeastServingInfo", return_value=GetFeastServingInfoResponse( job_staging_location=f"file://{tempfile.mkdtemp()}/", type=FeastServingType.FEAST_SERVING_TYPE_BATCH, ), ) mocked_client.set_project("project1") # TODO: Abstract away GCS client and GCP dependency # NOTE: Feast Serving does not allow for feature references # that specify the same feature in the same request. with patch("google.cloud.storage.Client"): response = mocked_client.get_batch_features( entity_rows=pd.DataFrame({ "datetime": [ pd.datetime.now(tz=timezone("Asia/Singapore")) for _ in range(3) ], "driver": [1001, 1002, 1003], "transaction": [1001, 1002, 1003], }), feature_refs=["driver:driver_id", "driver_id"], project="driver_project", ) # Type: GetBatchFeaturesResponse assert response.id == "123" and response.status == JobStatus.JOB_STATUS_DONE actual_dataframe = response.to_dataframe() assert actual_dataframe[["driver_id" ]].equals(expected_dataframe[["driver_id"]])
def test_get_batch_features(self, mocked_client, mocker): mocked_client._serving_service_stub = Serving.ServingServiceStub( grpc.insecure_channel("")) mocked_client._core_service_stub = Core.CoreServiceStub( grpc.insecure_channel("")) mocker.patch.object( mocked_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=FeatureSetProto( spec=FeatureSetSpecProto( name="customer_fs", version=1, project="my_project", entities=[ EntitySpecProto(name="customer", value_type=ValueProto.ValueType.INT64), EntitySpecProto( name="transaction", value_type=ValueProto.ValueType.INT64, ), ], features=[ FeatureSpecProto( name="customer_feature_1", value_type=ValueProto.ValueType.FLOAT, ), FeatureSpecProto( name="customer_feature_2", value_type=ValueProto.ValueType.STRING, ), ], ), meta=FeatureSetMetaProto( status=FeatureSetStatusProto.STATUS_READY), )), ) expected_dataframe = pd.DataFrame({ "datetime": [datetime.utcnow() for _ in range(3)], "customer": [1001, 1002, 1003], "transaction": [1001, 1002, 1003], "my_project/customer_feature_1:1": [1001, 1002, 1003], "my_project/customer_feature_2:1": [1001, 1002, 1003], }) final_results = tempfile.mktemp() to_avro(file_path_or_buffer=final_results, df=expected_dataframe) mocker.patch.object( mocked_client._serving_service_stub, "GetBatchFeatures", return_value=GetBatchFeaturesResponse(job=BatchFeaturesJob( id="123", type=JobType.JOB_TYPE_DOWNLOAD, status=JobStatus.JOB_STATUS_DONE, file_uris=[f"file://{final_results}"], data_format=DataFormat.DATA_FORMAT_AVRO, )), ) mocker.patch.object( mocked_client._serving_service_stub, "GetJob", return_value=GetJobResponse(job=BatchFeaturesJob( id="123", type=JobType.JOB_TYPE_DOWNLOAD, status=JobStatus.JOB_STATUS_DONE, file_uris=[f"file://{final_results}"], data_format=DataFormat.DATA_FORMAT_AVRO, )), ) mocker.patch.object( mocked_client._serving_service_stub, "GetFeastServingInfo", return_value=GetFeastServingInfoResponse( job_staging_location=f"file://{tempfile.mkdtemp()}/", type=FeastServingType.FEAST_SERVING_TYPE_BATCH, ), ) mocked_client.set_project("project1") response = mocked_client.get_batch_features( entity_rows=pd.DataFrame({ "datetime": [ pd.datetime.now(tz=timezone("Asia/Singapore")) for _ in range(3) ], "customer": [1001, 1002, 1003], "transaction": [1001, 1002, 1003], }), feature_refs=[ "my_project/customer_feature_1:1", "my_project/customer_feature_2:1", ], ) # type: Job assert response.id == "123" and response.status == JobStatus.JOB_STATUS_DONE actual_dataframe = response.to_dataframe() assert actual_dataframe[[ "my_project/customer_feature_1:1", "my_project/customer_feature_2:1" ]].equals(expected_dataframe[[ "my_project/customer_feature_1:1", "my_project/customer_feature_2:1" ]])