def test_feature_set_ingest_success(self, dataframe, client): driver_fs = FeatureSet("driver-feature-set") driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) driver_fs.source = KafkaSource(topic="feature-topic", brokers="127.0.0.1") driver_fs._message_producer = MagicMock() driver_fs._message_producer.send = MagicMock() # Register with Feast core client.apply(driver_fs) # Ingest data into Feast driver_fs.ingest(dataframe=dataframe) # Make sure message producer is called driver_fs._message_producer.send.assert_called()
def test_feature_set_types_success(self, client, dataframe): all_types_fs = FeatureSet( name="all_types", entities=[Entity(name="user_id", dtype=ValueType.INT64)], features=[ Feature(name="float_feature", dtype=ValueType.FLOAT), Feature(name="int64_feature", dtype=ValueType.INT64), Feature(name="int32_feature", dtype=ValueType.INT32), Feature(name="string_feature", dtype=ValueType.STRING), Feature(name="bytes_feature", dtype=ValueType.BYTES), Feature(name="bool_feature", dtype=ValueType.BOOL), Feature(name="double_feature", dtype=ValueType.DOUBLE), Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST), Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST), Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST), Feature(name="string_list_feature", dtype=ValueType.STRING_LIST), Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST), Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST), Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST), ], max_age=Duration(seconds=3600), ) all_types_fs.source = KafkaSource(topic="feature-topic", brokers="127.0.0.1") all_types_fs._message_producer = MagicMock() all_types_fs._message_producer.send = MagicMock() # Register with Feast core client.apply(all_types_fs) # Ingest data into Feast all_types_fs.ingest(dataframe=dataframe) # Make sure message producer is called all_types_fs._message_producer.send.assert_called()
def test_feature_set_ingest_failure(self, client, dataframe, exception): with pytest.raises(exception): # Create feature set driver_fs = FeatureSet("driver-feature-set") driver_fs.source = KafkaSource(topic="feature-topic", brokers="fake.broker.com") driver_fs._message_producer = MagicMock() driver_fs._message_producer.send = MagicMock() # Update based on dataset driver_fs.update_from_dataset( dataframe, column_mapping={ "entity_id": Entity(name="entity", dtype=ValueType.INT64) }, ) # Register with Feast core client.apply(driver_fs) # Ingest data into Feast driver_fs.ingest(dataframe=dataframe)
def test_all_types(client): all_types_fs = client.get_feature_set(name="all_types", version=1) if all_types_fs is None: # Register new feature set if it doesnt exist all_types_fs = FeatureSet( name="all_types", entities=[Entity(name="user_id", dtype=ValueType.INT64)], features=[ Feature(name="float_feature", dtype=ValueType.FLOAT), Feature(name="int64_feature", dtype=ValueType.INT64), Feature(name="int32_feature", dtype=ValueType.INT32), Feature(name="string_feature", dtype=ValueType.STRING), Feature(name="bytes_feature", dtype=ValueType.BYTES), Feature(name="bool_feature", dtype=ValueType.BOOL), Feature(name="double_feature", dtype=ValueType.DOUBLE), Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST), Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST), Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST), Feature(name="string_list_feature", dtype=ValueType.STRING_LIST), Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST), Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST), Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST), ], max_age=Duration(seconds=3600), ) # Register feature set client.apply(all_types_fs) # Feast Core needs some time to fully commit the FeatureSet applied # when there is no existing job yet for the Featureset time.sleep(10) all_types_fs = client.get_feature_set(name="all_types", version=1) if all_types_fs is None: raise Exception( "Client cannot retrieve 'all_types_fs' FeatureSet " "after registration. Either Feast Core does not save the " "FeatureSet correctly or the client needs to wait longer for FeatureSet " "to be committed.") all_types_df = pd.DataFrame({ "datetime": [datetime.utcnow().replace(tzinfo=pytz.utc) for _ in range(3)], "user_id": [1001, 1002, 1003], "int32_feature": [np.int32(1), np.int32(2), np.int32(3)], "int64_feature": [np.int64(1), np.int64(2), np.int64(3)], "float_feature": [np.float(0.1), np.float(0.2), np.float(0.3)], "double_feature": [np.float64(0.1), np.float64(0.2), np.float64(0.3)], "string_feature": ["one", "two", "three"], "bytes_feature": [b"one", b"two", b"three"], "bool_feature": [True, False, False], "int32_list_feature": [ np.array([1, 2, 3, 4], dtype=np.int32), np.array([1, 2, 3, 4], dtype=np.int32), np.array([1, 2, 3, 4], dtype=np.int32), ], "int64_list_feature": [ np.array([1, 2, 3, 4], dtype=np.int64), np.array([1, 2, 3, 4], dtype=np.int64), np.array([1, 2, 3, 4], dtype=np.int64), ], "float_list_feature": [ np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float32), np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float32), np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float32), ], "double_list_feature": [ np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float64), np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float64), np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float64), ], "string_list_feature": [ np.array(["one", "two", "three"]), np.array(["one", "two", "three"]), np.array(["one", "two", "three"]), ], "bytes_list_feature": [ np.array([b"one", b"two", b"three"]), np.array([b"one", b"two", b"three"]), np.array([b"one", b"two", b"three"]), ], "bool_list_feature": [ np.array([True, False, True]), np.array([True, False, True]), np.array([True, False, True]), ], }) # Ingest user embedding data all_types_fs.ingest(dataframe=all_types_df) time.sleep(3) # Poll serving for feature values until the correct values are returned while True: time.sleep(1) response = client.get_online_features( entity_rows=[ GetOnlineFeaturesRequest.EntityRow( fields={ "user_id": Value( int64_val=all_types_df.iloc[0]["user_id"]) }) ], feature_ids=[ "all_types:1:float_feature", "all_types:1:int64_feature", "all_types:1:int32_feature", "all_types:1:string_feature", "all_types:1:bytes_feature", "all_types:1:bool_feature", "all_types:1:double_feature", "all_types:1:float_list_feature", "all_types:1:int64_list_feature", "all_types:1:int32_list_feature", "all_types:1:string_list_feature", "all_types:1:bytes_list_feature", "all_types:1:bool_list_feature", "all_types:1:double_list_feature", ], ) # type: GetOnlineFeaturesResponse if response is None: continue returned_float_list = ( response.field_values[0].fields["all_types:1:float_list_feature"]. float_list_val.val) sent_float_list = all_types_df.iloc[0]["float_list_feature"] # TODO: Add tests for each value and type if math.isclose(returned_float_list[0], sent_float_list[0], abs_tol=FLOAT_TOLERANCE): break # Wait for values to appear in Serving time.sleep(1)