def test_feature_set_ingest_fail_if_pending(self, dataframe, exception, test_client, mocker): with pytest.raises(exception): test_client.set_project("project1") driver_fs = FeatureSet( "driver-feature-set", source=KafkaSource(brokers="kafka:9092", topic="test"), ) driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) # Register with Feast core test_client.apply(driver_fs) driver_fs = driver_fs.to_proto() driver_fs.meta.status = FeatureSetStatusProto.STATUS_PENDING mocker.patch.object( test_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=driver_fs), ) # Need to create a mock producer with patch("feast.client.get_producer"): # Ingest data into Feast test_client.ingest("driver-feature-set", dataframe, timeout=1)
def test_feature_set_ingest_success(self, dataframe, client, mocker): client.set_project("project1") driver_fs = FeatureSet("driver-feature-set", source=KafkaSource(brokers="kafka:9092", topic="test")) driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) # Register with Feast core client.apply(driver_fs) driver_fs = driver_fs.to_proto() driver_fs.meta.status = FeatureSetStatusProto.STATUS_READY mocker.patch.object( client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=driver_fs), ) # Need to create a mock producer with patch("feast.client.get_producer") as mocked_queue: # Ingest data into Feast client.ingest("driver-feature-set", dataframe)
def test_feature_set_ingest_throws_exception_if_kafka_down( self, dataframe, test_client, exception, mocker): test_client.set_project("project1") driver_fs = FeatureSet( "driver-feature-set", source=KafkaSource(brokers="localhost:4412", topic="test"), ) driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) # Register with Feast core test_client.apply(driver_fs) driver_fs = driver_fs.to_proto() driver_fs.meta.status = FeatureSetStatusProto.STATUS_READY mocker.patch.object( test_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=driver_fs), ) with pytest.raises(exception): test_client.ingest("driver-feature-set", dataframe)
def test_feature_set_types_success(self, client, dataframe, mocker): all_types_fs = FeatureSet( name="all_types", entities=[Entity(name="user_id", dtype=ValueType.INT64)], features=[ Feature(name="float_feature", dtype=ValueType.FLOAT), Feature(name="int64_feature", dtype=ValueType.INT64), Feature(name="int32_feature", dtype=ValueType.INT32), Feature(name="string_feature", dtype=ValueType.STRING), Feature(name="bytes_feature", dtype=ValueType.BYTES), Feature(name="bool_feature", dtype=ValueType.BOOL), Feature(name="double_feature", dtype=ValueType.DOUBLE), Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST), Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST), Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST), Feature(name="string_list_feature", dtype=ValueType.STRING_LIST), Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST), Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST), Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST), ], max_age=Duration(seconds=3600), ) # Register with Feast core client.apply(all_types_fs) mocker.patch.object( client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=all_types_fs.to_proto()), ) # Ingest data into Feast client.ingest(all_types_fs, dataframe=dataframe)
def test_feature_set_ingest_success(self, dataframe, client, mocker): driver_fs = FeatureSet("driver-feature-set") driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) driver_fs.source = KafkaSource(topic="feature-topic", brokers="127.0.0.1") client._message_producer = MagicMock() client._message_producer.produce = MagicMock() # Register with Feast core client.apply(driver_fs) mocker.patch.object( client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse( feature_set=driver_fs.to_proto()), ) # Ingest data into Feast client.ingest("driver-feature-set", dataframe=dataframe)
def test_get_feature_set(self, mocked_client, mocker): mocked_client._core_service_stub = Core.CoreServiceStub( grpc.insecure_channel("") ) from google.protobuf.duration_pb2 import Duration mocker.patch.object( mocked_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse( feature_set=FeatureSetProto( spec=FeatureSetSpecProto( name="my_feature_set", max_age=Duration(seconds=3600), labels={"key1": "val1", "key2": "val2"}, features=[ FeatureSpecProto( name="my_feature_1", value_type=ValueProto.ValueType.FLOAT, ), FeatureSpecProto( name="my_feature_2", value_type=ValueProto.ValueType.FLOAT, ), ], entities=[ EntitySpecProto( name="my_entity_1", value_type=ValueProto.ValueType.INT64, ) ], source=Source( type=SourceType.KAFKA, kafka_source_config=KafkaSourceConfig( bootstrap_servers="localhost:9092", topic="topic" ), ), ), meta=FeatureSetMetaProto(), ) ), ) mocked_client.set_project("my_project") feature_set = mocked_client.get_feature_set("my_feature_set") assert ( feature_set.name == "my_feature_set" and "key1" in feature_set.labels and feature_set.labels["key1"] == "val1" and "key2" in feature_set.labels and feature_set.labels["key2"] == "val2" and feature_set.fields["my_feature_1"].name == "my_feature_1" and feature_set.fields["my_feature_1"].dtype == ValueType.FLOAT and feature_set.fields["my_entity_1"].name == "my_entity_1" and feature_set.fields["my_entity_1"].dtype == ValueType.INT64 and len(feature_set.features) == 2 and len(feature_set.entities) == 1 )
def test_get_feature_set(self, mock_client, mocker): mock_client._core_service_stub = Core.CoreServiceStub(grpc.insecure_channel("")) from google.protobuf.duration_pb2 import Duration mocker.patch.object( mock_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse( feature_set=FeatureSetSpec( name="my_feature_set", version=2, max_age=Duration(seconds=3600), features=[ FeatureSpec( name="my_feature_1", value_type=ValueProto.ValueType.FLOAT ), FeatureSpec( name="my_feature_2", value_type=ValueProto.ValueType.FLOAT ), ], entities=[ EntitySpec( name="my_entity_1", value_type=ValueProto.ValueType.INT64 ) ], source=Source( type=SourceType.KAFKA, kafka_source_config=KafkaSourceConfig( bootstrap_servers="localhost:9092", topic="topic" ), ), ) ), ) feature_set = mock_client.get_feature_set("my_feature_set", version=2) assert ( feature_set.name == "my_feature_set" and feature_set.version == 2 and feature_set.fields["my_feature_1"].name == "my_feature_1" and feature_set.fields["my_feature_1"].dtype == ValueType.FLOAT and feature_set.fields["my_entity_1"].name == "my_entity_1" and feature_set.fields["my_entity_1"].dtype == ValueType.INT64 and len(feature_set.features) == 2 and len(feature_set.entities) == 1 )
def test_feature_set_types_success(self, test_client, dataframe, mocker): test_client.set_project("project1") all_types_fs = FeatureSet( name="all_types", entities=[Entity(name="user_id", dtype=ValueType.INT64)], features=[ Feature(name="float_feature", dtype=ValueType.FLOAT), Feature(name="int64_feature", dtype=ValueType.INT64), Feature(name="int32_feature", dtype=ValueType.INT32), Feature(name="string_feature", dtype=ValueType.STRING), Feature(name="bytes_feature", dtype=ValueType.BYTES), Feature(name="bool_feature", dtype=ValueType.BOOL), Feature(name="double_feature", dtype=ValueType.DOUBLE), Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST), Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST), Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST), Feature(name="string_list_feature", dtype=ValueType.STRING_LIST), Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST), # Feature(name="bool_list_feature", # dtype=ValueType.BOOL_LIST), # TODO: Add support for this # type again https://github.com/feast-dev/feast/issues/341 Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST), ], max_age=Duration(seconds=3600), ) # Register with Feast core test_client.apply(all_types_fs) mocker.patch.object( test_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse( feature_set=all_types_fs.to_proto()), ) # Need to create a mock producer with patch("feast.client.get_producer"): # Ingest data into Feast test_client.ingest(all_types_fs, dataframe)
def test_feature_set_ingest_success(self, dataframe, client, mocker): driver_fs = FeatureSet("driver-feature-set") driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) # Register with Feast core client.apply(driver_fs) mocker.patch.object( client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=driver_fs.to_proto()), ) # Ingest data into Feast client.ingest("driver-feature-set", dataframe=dataframe)
def test_get_batch_features(self, mocked_client, mocker): mocked_client._serving_service_stub = Serving.ServingServiceStub( grpc.insecure_channel("")) mocked_client._core_service_stub = Core.CoreServiceStub( grpc.insecure_channel("")) mocker.patch.object( mocked_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=FeatureSetProto( spec=FeatureSetSpecProto( name="driver", project="driver_project", entities=[ EntitySpecProto(name="driver", value_type=ValueProto.ValueType.INT64), EntitySpecProto( name="transaction", value_type=ValueProto.ValueType.INT64, ), ], features=[ FeatureSpecProto( name="driver_id", value_type=ValueProto.ValueType.FLOAT, ), FeatureSpecProto( name="driver_name", value_type=ValueProto.ValueType.STRING, ), ], ), meta=FeatureSetMetaProto( status=FeatureSetStatusProto.STATUS_READY), )), ) expected_dataframe = pd.DataFrame({ "datetime": [datetime.utcnow() for _ in range(3)], "driver": [1001, 1002, 1003], "transaction": [1001, 1002, 1003], "driver_id": [1001, 1002, 1003], }) final_results = tempfile.mktemp() pandavro.to_avro(file_path_or_buffer=final_results, df=expected_dataframe) mocker.patch.object( mocked_client._serving_service_stub, "GetBatchFeatures", return_value=GetBatchFeaturesResponse(job=BatchRetrievalJob( id="123", type=JobType.JOB_TYPE_DOWNLOAD, status=JobStatus.JOB_STATUS_DONE, file_uris=[f"file://{final_results}"], data_format=DataFormat.DATA_FORMAT_AVRO, )), ) mocker.patch.object( mocked_client._serving_service_stub, "GetJob", return_value=GetJobResponse(job=BatchRetrievalJob( id="123", type=JobType.JOB_TYPE_DOWNLOAD, status=JobStatus.JOB_STATUS_DONE, file_uris=[f"file://{final_results}"], data_format=DataFormat.DATA_FORMAT_AVRO, )), ) mocker.patch.object( mocked_client._serving_service_stub, "GetFeastServingInfo", return_value=GetFeastServingInfoResponse( job_staging_location=f"file://{tempfile.mkdtemp()}/", type=FeastServingType.FEAST_SERVING_TYPE_BATCH, ), ) mocked_client.set_project("project1") # TODO: Abstract away GCS client and GCP dependency # NOTE: Feast Serving does not allow for feature references # that specify the same feature in the same request. with patch("google.cloud.storage.Client"): response = mocked_client.get_batch_features( entity_rows=pd.DataFrame({ "datetime": [ pd.datetime.now(tz=timezone("Asia/Singapore")) for _ in range(3) ], "driver": [1001, 1002, 1003], "transaction": [1001, 1002, 1003], }), feature_refs=["driver:driver_id", "driver_id"], project="driver_project", ) # Type: GetBatchFeaturesResponse assert response.id == "123" and response.status == JobStatus.JOB_STATUS_DONE actual_dataframe = response.to_dataframe() assert actual_dataframe[["driver_id" ]].equals(expected_dataframe[["driver_id"]])
def test_get_batch_features(self, mock_client, mocker): mock_client._serving_service_stub = Serving.ServingServiceStub( grpc.insecure_channel("") ) mock_client._core_service_stub = Core.CoreServiceStub(grpc.insecure_channel("")) mocker.patch.object( mock_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse( feature_set=FeatureSetSpec( name="customer_fs", version=1, entities=[ EntitySpec( name="customer", value_type=ValueProto.ValueType.INT64 ), EntitySpec( name="transaction", value_type=ValueProto.ValueType.INT64 ), ], features=[ FeatureSpec( name="customer_feature_1", value_type=ValueProto.ValueType.FLOAT, ), FeatureSpec( name="customer_feature_2", value_type=ValueProto.ValueType.STRING, ), ], ) ), ) expected_dataframe = pd.DataFrame( { "datetime": [datetime.utcnow() for _ in range(3)], "customer": [1001, 1002, 1003], "transaction": [1001, 1002, 1003], "customer_fs:1:customer_feature_1": [1001, 1002, 1003], "customer_fs:1:customer_feature_2": [1001, 1002, 1003], } ) final_results = tempfile.mktemp() to_avro(file_path_or_buffer=final_results, df=expected_dataframe) mocker.patch.object( mock_client._serving_service_stub, "GetBatchFeatures", return_value=GetBatchFeaturesResponse( job=BatchFeaturesJob( id="123", type=JobType.JOB_TYPE_DOWNLOAD, status=JobStatus.JOB_STATUS_DONE, file_uris=[f"file://{final_results}"], data_format=DataFormat.DATA_FORMAT_AVRO, ) ), ) mocker.patch.object( mock_client._serving_service_stub, "GetJob", return_value=GetJobResponse( job=BatchFeaturesJob( id="123", type=JobType.JOB_TYPE_DOWNLOAD, status=JobStatus.JOB_STATUS_DONE, file_uris=[f"file://{final_results}"], data_format=DataFormat.DATA_FORMAT_AVRO, ) ), ) mocker.patch.object( mock_client._serving_service_stub, "GetFeastServingInfo", return_value=GetFeastServingInfoResponse( job_staging_location=f"file://{tempfile.mkdtemp()}/", type=FeastServingType.FEAST_SERVING_TYPE_BATCH, ), ) response = mock_client.get_batch_features( entity_rows=pd.DataFrame( { "datetime": [ pd.datetime.now(tz=timezone("Asia/Singapore")) for _ in range(3) ], "customer": [1001, 1002, 1003], "transaction": [1001, 1002, 1003], } ), feature_ids=[ "customer_fs:1:customer_feature_1", "customer_fs:1:customer_feature_2", ], ) # type: Job assert response.id == "123" and response.status == JobStatus.JOB_STATUS_DONE actual_dataframe = response.to_dataframe() assert actual_dataframe[ ["customer_fs:1:customer_feature_1", "customer_fs:1:customer_feature_2"] ].equals( expected_dataframe[ ["customer_fs:1:customer_feature_1", "customer_fs:1:customer_feature_2"] ] )