def test_feature_set_ingest_throws_exception_if_kafka_down( self, dataframe, test_client, exception, mocker): test_client.set_project("project1") driver_fs = FeatureSet( "driver-feature-set", source=KafkaSource(brokers="localhost:4412", topic="test"), ) driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) # Register with Feast core test_client.apply(driver_fs) driver_fs = driver_fs.to_proto() driver_fs.meta.status = FeatureSetStatusProto.STATUS_READY mocker.patch.object( test_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=driver_fs), ) with pytest.raises(exception): test_client.ingest("driver-feature-set", dataframe)
def test_apply_feature_set_success(self, client): # Create Feature Sets fs1 = FeatureSet("my-feature-set-1") fs1.add(Feature(name="fs1-my-feature-1", dtype=ValueType.INT64)) fs1.add(Feature(name="fs1-my-feature-2", dtype=ValueType.STRING)) fs1.add(Entity(name="fs1-my-entity-1", dtype=ValueType.INT64)) fs2 = FeatureSet("my-feature-set-2") fs2.add(Feature(name="fs2-my-feature-1", dtype=ValueType.STRING_LIST)) fs2.add(Feature(name="fs2-my-feature-2", dtype=ValueType.BYTES_LIST)) fs2.add(Entity(name="fs2-my-entity-1", dtype=ValueType.INT64)) # Register Feature Set with Core client.apply(fs1) client.apply(fs2) feature_sets = client.list_feature_sets() # List Feature Sets assert ( len(feature_sets) == 2 and feature_sets[0].name == "my-feature-set-1" and feature_sets[0].features[0].name == "fs1-my-feature-1" and feature_sets[0].features[0].dtype == ValueType.INT64 and feature_sets[1].features[1].dtype == ValueType.BYTES_LIST )
def test_feature_set_ingest_fail_if_pending(self, dataframe, exception, test_client, mocker): with pytest.raises(exception): test_client.set_project("project1") driver_fs = FeatureSet( "driver-feature-set", source=KafkaSource(brokers="kafka:9092", topic="test"), ) driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) # Register with Feast core test_client.apply(driver_fs) driver_fs = driver_fs.to_proto() driver_fs.meta.status = FeatureSetStatusProto.STATUS_PENDING mocker.patch.object( test_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=driver_fs), ) # Need to create a mock producer with patch("feast.client.get_producer"): # Ingest data into Feast test_client.ingest("driver-feature-set", dataframe, timeout=1)
def test_feature_set_ingest_success(self, dataframe, client, mocker): client.set_project("project1") driver_fs = FeatureSet("driver-feature-set", source=KafkaSource(brokers="kafka:9092", topic="test")) driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) # Register with Feast core client.apply(driver_fs) driver_fs = driver_fs.to_proto() driver_fs.meta.status = FeatureSetStatusProto.STATUS_READY mocker.patch.object( client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=driver_fs), ) # Need to create a mock producer with patch("feast.client.get_producer") as mocked_queue: # Ingest data into Feast client.ingest("driver-feature-set", dataframe)
def test_feature_set_ingest_success(self, dataframe, client, mocker): driver_fs = FeatureSet("driver-feature-set") driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) driver_fs.source = KafkaSource(topic="feature-topic", brokers="127.0.0.1") client._message_producer = MagicMock() client._message_producer.produce = MagicMock() # Register with Feast core client.apply(driver_fs) mocker.patch.object( client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse( feature_set=driver_fs.to_proto()), ) # Ingest data into Feast client.ingest("driver-feature-set", dataframe=dataframe)
def test_unequal_feature_based_on_labels(): f1 = Feature(name="feature-1", dtype=ValueType.INT64, labels={"k1": "v1"}) f2 = Feature(name="feature-1", dtype=ValueType.INT64, labels={"k1": "v1"}) assert f1 == f2 f3 = Feature(name="feature-1", dtype=ValueType.INT64) assert f1 != f3 f4 = Feature(name="feature-1", dtype=ValueType.INT64, labels={"k1": "notv1"}) assert f1 != f4
def test_import_tfx_schema(self): tests_folder = pathlib.Path(__file__).parent test_input_schema_json = open(tests_folder / "data" / "tensorflow_metadata" / "bikeshare_schema.json").read() test_input_schema = schema_pb2.Schema() json_format.Parse(test_input_schema_json, test_input_schema) feature_set = FeatureSet( name="bikeshare", entities=[Entity(name="station_id", dtype=ValueType.INT64)], features=[ Feature(name="name", dtype=ValueType.STRING), Feature(name="status", dtype=ValueType.STRING), Feature(name="latitude", dtype=ValueType.FLOAT), Feature(name="longitude", dtype=ValueType.FLOAT), Feature(name="location", dtype=ValueType.STRING), ], ) # Before update for entity in feature_set.entities: assert entity.presence is None assert entity.shape is None for feature in feature_set.features: assert feature.presence is None assert feature.shape is None assert feature.string_domain is None assert feature.float_domain is None assert feature.int_domain is None feature_set.import_tfx_schema(test_input_schema) # After update for entity in feature_set.entities: assert entity.presence is not None assert entity.shape is not None for feature in feature_set.features: assert feature.presence is not None assert feature.shape is not None if feature.name in ["location", "name", "status"]: assert feature.string_domain is not None elif feature.name in ["latitude", "longitude"]: assert feature.float_domain is not None elif feature.name in ["station_id"]: assert feature.int_domain is not None
def test_feature_set_ingest_success(self, dataframe, client, mocker): driver_fs = FeatureSet("driver-feature-set") driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) # Register with Feast core client.apply(driver_fs) mocker.patch.object( client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=driver_fs.to_proto()), ) # Ingest data into Feast client.ingest("driver-feature-set", dataframe=dataframe)
def test_feature_class_contains_labels(): fs = FeatureSet("my-feature-set", labels={"key1": "val1", "key2": "val2"}) fs.add( Feature( name="my-feature-1", dtype=ValueType.INT64, labels={"feature_key1": "feature_val1"}, )) assert "feature_key1" in fs.features[0].labels.keys() assert fs.features[0].labels["feature_key1"] == "feature_val1"
def test_feature_set_import_export_yaml(self): test_feature_set = FeatureSet( name="bikeshare", entities=[Entity(name="station_id", dtype=ValueType.INT64)], features=[ Feature(name="name", dtype=ValueType.STRING), Feature(name="longitude", dtype=ValueType.FLOAT), Feature(name="location", dtype=ValueType.STRING), ], ) # Create a string YAML representation of the feature set string_yaml = test_feature_set.to_yaml() # Create a new feature set object from the YAML string actual_feature_set_from_string = FeatureSet.from_yaml(string_yaml) # Ensure equality is upheld to original feature set assert test_feature_set == actual_feature_set_from_string
def test_feature_set_ingest_success(self, dataframe, client): driver_fs = FeatureSet("driver-feature-set") driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) driver_fs.source = KafkaSource(topic="feature-topic", brokers="127.0.0.1") driver_fs._message_producer = MagicMock() driver_fs._message_producer.send = MagicMock() # Register with Feast core client.apply(driver_fs) # Ingest data into Feast driver_fs.ingest(dataframe=dataframe) # Make sure message producer is called driver_fs._message_producer.send.assert_called()
def test_add_features_from_df_success( self, dataframe, feature_count, entity_count, discard_unused_fields, features, entities, ): my_feature_set = FeatureSet( name="my_feature_set", features=[Feature(name="dummy_f1", dtype=ValueType.INT64)], entities=[Entity(name="dummy_entity_1", dtype=ValueType.INT64)], ) my_feature_set.infer_fields_from_df( dataframe, discard_unused_fields=discard_unused_fields, features=features, entities=entities, ) assert len(my_feature_set.features) == feature_count assert len(my_feature_set.entities) == entity_count
def test_feature_set_types_success(self, test_client, dataframe, mocker): test_client.set_project("project1") all_types_fs = FeatureSet( name="all_types", entities=[Entity(name="user_id", dtype=ValueType.INT64)], features=[ Feature(name="float_feature", dtype=ValueType.FLOAT), Feature(name="int64_feature", dtype=ValueType.INT64), Feature(name="int32_feature", dtype=ValueType.INT32), Feature(name="string_feature", dtype=ValueType.STRING), Feature(name="bytes_feature", dtype=ValueType.BYTES), Feature(name="bool_feature", dtype=ValueType.BOOL), Feature(name="double_feature", dtype=ValueType.DOUBLE), Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST), Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST), Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST), Feature(name="string_list_feature", dtype=ValueType.STRING_LIST), Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST), # Feature(name="bool_list_feature", # dtype=ValueType.BOOL_LIST), # TODO: Add support for this # type again https://github.com/feast-dev/feast/issues/341 Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST), ], max_age=Duration(seconds=3600), ) # Register with Feast core test_client.apply(all_types_fs) mocker.patch.object( test_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse( feature_set=all_types_fs.to_proto()), ) # Need to create a mock producer with patch("feast.client.get_producer"): # Ingest data into Feast test_client.ingest(all_types_fs, dataframe)
class TestFeatureSet: @pytest.fixture(scope="function") def server(self): server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) Core.add_CoreServiceServicer_to_server(CoreServicer(), server) server.add_insecure_port("[::]:50051") server.start() yield server server.stop(0) @pytest.fixture def client(self, server): return Client(core_url="localhost:50051") def test_add_remove_features_success(self): fs = FeatureSet("my-feature-set") fs.add(Feature(name="my-feature-1", dtype=ValueType.INT64)) fs.add(Feature(name="my-feature-2", dtype=ValueType.INT64)) fs.drop(name="my-feature-1") assert len(fs.features) == 1 and fs.features[0].name == "my-feature-2" def test_remove_feature_failure(self): with pytest.raises(ValueError): fs = FeatureSet("my-feature-set") fs.drop(name="my-feature-1") def test_update_from_source_failure(self): with pytest.raises(Exception): df = pd.DataFrame() fs = FeatureSet("driver-feature-set") fs.infer_fields_from_df(df) @pytest.mark.parametrize( "dataframe,feature_count,entity_count,discard_unused_fields,features,entities", [ ( dataframes.GOOD, 3, 1, True, [], [Entity(name="entity_id", dtype=ValueType.INT64)], ), ( dataframes.GOOD_FIVE_FEATURES, 5, 1, True, [], [Entity(name="entity_id", dtype=ValueType.INT64)], ), ( dataframes.GOOD_FIVE_FEATURES, 6, 1, True, [Feature(name="feature_6", dtype=ValueType.INT64)], [Entity(name="entity_id", dtype=ValueType.INT64)], ), ( dataframes.GOOD_FIVE_FEATURES_TWO_ENTITIES, 5, 2, True, [], [ Entity(name="entity_1_id", dtype=ValueType.INT64), Entity(name="entity_2_id", dtype=ValueType.INT64), ], ), ( dataframes.GOOD_FIVE_FEATURES_TWO_ENTITIES, 6, 3, False, [], [ Entity(name="entity_1_id", dtype=ValueType.INT64), Entity(name="entity_2_id", dtype=ValueType.INT64), ], ), ( dataframes.NO_FEATURES, 0, 1, True, [], [Entity(name="entity_id", dtype=ValueType.INT64)], ), ( pd.DataFrame({ "datetime": [ datetime.utcnow().replace(tzinfo=pytz.utc) for _ in range(3) ] }), 0, 0, True, [], [], ), ], ids=[ "Test small dataframe update with hardcoded entity", "Test larger dataframe update with hardcoded entity", "Test larger dataframe update with hardcoded entity and feature", "Test larger dataframe update with two hardcoded entities and discarding of existing fields", "Test larger dataframe update with two hardcoded entities and retention of existing fields", "Test dataframe with no featuresdataframe", "Test empty dataframe", ], ) def test_add_features_from_df_success( self, dataframe, feature_count, entity_count, discard_unused_fields, features, entities, ): my_feature_set = FeatureSet( name="my_feature_set", features=[Feature(name="dummy_f1", dtype=ValueType.INT64)], entities=[Entity(name="dummy_entity_1", dtype=ValueType.INT64)], ) my_feature_set.infer_fields_from_df( dataframe, discard_unused_fields=discard_unused_fields, features=features, entities=entities, ) assert len(my_feature_set.features) == feature_count assert len(my_feature_set.entities) == entity_count
def test_unequal_feature_other_has_no_labels(): f1 = Feature(name="feature-1", dtype=ValueType.INT64, labels={"k1": "v1"}) f2 = Feature(name="feature-1", dtype=ValueType.INT64) assert f1 != f2
def test_feature_set_types_success(self, client, dataframe): all_types_fs = FeatureSet( name="all_types", entities=[Entity(name="user_id", dtype=ValueType.INT64)], features=[ Feature(name="float_feature", dtype=ValueType.FLOAT), Feature(name="int64_feature", dtype=ValueType.INT64), Feature(name="int32_feature", dtype=ValueType.INT32), Feature(name="string_feature", dtype=ValueType.STRING), Feature(name="bytes_feature", dtype=ValueType.BYTES), Feature(name="bool_feature", dtype=ValueType.BOOL), Feature(name="double_feature", dtype=ValueType.DOUBLE), Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST), Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST), Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST), Feature(name="string_list_feature", dtype=ValueType.STRING_LIST), Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST), Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST), Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST), ], max_age=Duration(seconds=3600), ) all_types_fs.source = KafkaSource(topic="feature-topic", brokers="127.0.0.1") all_types_fs._message_producer = MagicMock() all_types_fs._message_producer.send = MagicMock() # Register with Feast core client.apply(all_types_fs) # Ingest data into Feast all_types_fs.ingest(dataframe=dataframe) # Make sure message producer is called all_types_fs._message_producer.send.assert_called()
def test_feature_set_types_success(self, client, dataframe, mocker): all_types_fs = FeatureSet( name="all_types", entities=[Entity(name="user_id", dtype=ValueType.INT64)], features=[ Feature(name="float_feature", dtype=ValueType.FLOAT), Feature(name="int64_feature", dtype=ValueType.INT64), Feature(name="int32_feature", dtype=ValueType.INT32), Feature(name="string_feature", dtype=ValueType.STRING), Feature(name="bytes_feature", dtype=ValueType.BYTES), Feature(name="bool_feature", dtype=ValueType.BOOL), Feature(name="double_feature", dtype=ValueType.DOUBLE), Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST), Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST), Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST), Feature(name="string_list_feature", dtype=ValueType.STRING_LIST), Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST), Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST), Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST), ], max_age=Duration(seconds=3600), ) # Register with Feast core client.apply(all_types_fs) mocker.patch.object( client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=all_types_fs.to_proto()), ) # Ingest data into Feast client.ingest(all_types_fs, dataframe=dataframe)
class TestFeatureSet: @pytest.fixture(scope="function") def server(self): server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) Core.add_CoreServiceServicer_to_server(CoreServicer(), server) server.add_insecure_port("[::]:50051") server.start() yield server server.stop(0) @pytest.fixture def client(self, server): return Client(core_url="localhost:50051") def test_add_remove_features_success(self): fs = FeatureSet("my-feature-set") fs.add(Feature(name="my-feature-1", dtype=ValueType.INT64)) fs.add(Feature(name="my-feature-2", dtype=ValueType.INT64)) fs.drop(name="my-feature-1") assert len(fs.features) == 1 and fs.features[0].name == "my-feature-2" def test_remove_feature_failure(self): with pytest.raises(ValueError): fs = FeatureSet("my-feature-set") fs.drop(name="my-feature-1") def test_update_from_source_failure(self): with pytest.raises(Exception): df = pd.DataFrame() fs = FeatureSet("driver-feature-set") fs.infer_fields_from_df(df) @pytest.mark.parametrize( "dataframe,feature_count,entity_count,discard_unused_fields,features,entities", [ ( dataframes.GOOD, 3, 1, True, [], [Entity(name="entity_id", dtype=ValueType.INT64)], ), ( dataframes.GOOD_FIVE_FEATURES, 5, 1, True, [], [Entity(name="entity_id", dtype=ValueType.INT64)], ), ( dataframes.GOOD_FIVE_FEATURES, 6, 1, True, [Feature(name="feature_6", dtype=ValueType.INT64)], [Entity(name="entity_id", dtype=ValueType.INT64)], ), ( dataframes.GOOD_FIVE_FEATURES_TWO_ENTITIES, 5, 2, True, [], [ Entity(name="entity_1_id", dtype=ValueType.INT64), Entity(name="entity_2_id", dtype=ValueType.INT64), ], ), ( dataframes.GOOD_FIVE_FEATURES_TWO_ENTITIES, 6, 3, False, [], [ Entity(name="entity_1_id", dtype=ValueType.INT64), Entity(name="entity_2_id", dtype=ValueType.INT64), ], ), ( dataframes.NO_FEATURES, 0, 1, True, [], [Entity(name="entity_id", dtype=ValueType.INT64)], ), ( pd.DataFrame({ "datetime": [ datetime.utcnow().replace(tzinfo=pytz.utc) for _ in range(3) ] }), 0, 0, True, [], [], ), ], ids=[ "Test small dataframe update with hardcoded entity", "Test larger dataframe update with hardcoded entity", "Test larger dataframe update with hardcoded entity and feature", "Test larger dataframe update with two hardcoded entities and discarding of existing fields", "Test larger dataframe update with two hardcoded entities and retention of existing fields", "Test dataframe with no featuresdataframe", "Test empty dataframe", ], ) def test_add_features_from_df_success( self, dataframe, feature_count, entity_count, discard_unused_fields, features, entities, ): my_feature_set = FeatureSet( name="my_feature_set", features=[Feature(name="dummy_f1", dtype=ValueType.INT64)], entities=[Entity(name="dummy_entity_1", dtype=ValueType.INT64)], ) my_feature_set.infer_fields_from_df( dataframe, discard_unused_fields=discard_unused_fields, features=features, entities=entities, ) assert len(my_feature_set.features) == feature_count assert len(my_feature_set.entities) == entity_count def test_import_tfx_schema(self): tests_folder = pathlib.Path(__file__).parent test_input_schema_json = open(tests_folder / "data" / "tensorflow_metadata" / "bikeshare_schema.json").read() test_input_schema = schema_pb2.Schema() json_format.Parse(test_input_schema_json, test_input_schema) feature_set = FeatureSet( name="bikeshare", entities=[Entity(name="station_id", dtype=ValueType.INT64)], features=[ Feature(name="name", dtype=ValueType.STRING), Feature(name="status", dtype=ValueType.STRING), Feature(name="latitude", dtype=ValueType.FLOAT), Feature(name="longitude", dtype=ValueType.FLOAT), Feature(name="location", dtype=ValueType.STRING), ], ) # Before update for entity in feature_set.entities: assert entity.presence is None assert entity.shape is None for feature in feature_set.features: assert feature.presence is None assert feature.shape is None assert feature.string_domain is None assert feature.float_domain is None assert feature.int_domain is None feature_set.import_tfx_schema(test_input_schema) # After update for feature in feature_set.features: assert feature.presence is not None assert feature.shape is not None if feature.name in ["location", "name", "status"]: assert feature.string_domain is not None elif feature.name in ["latitude", "longitude"]: assert feature.float_domain is not None elif feature.name in ["station_id"]: assert feature.int_domain is not None def test_export_tfx_schema(self): tests_folder = pathlib.Path(__file__).parent test_input_feature_set = FeatureSet.from_yaml( str(tests_folder / "data" / "tensorflow_metadata" / "bikeshare_feature_set.yaml")) expected_schema_json = open(tests_folder / "data" / "tensorflow_metadata" / "bikeshare_schema.json").read() expected_schema = schema_pb2.Schema() json_format.Parse(expected_schema_json, expected_schema) _make_tfx_schema_domain_info_inline(expected_schema) actual_schema = test_input_feature_set.export_tfx_schema() assert len(actual_schema.feature) == len(expected_schema.feature) for actual, expected in zip(actual_schema.feature, expected_schema.feature): assert actual.SerializeToString() == expected.SerializeToString() def test_feature_set_import_export_yaml(self): test_feature_set = FeatureSet( name="bikeshare", entities=[Entity(name="station_id", dtype=ValueType.INT64)], features=[ Feature(name="name", dtype=ValueType.STRING), Feature(name="longitude", dtype=ValueType.FLOAT), Feature(name="location", dtype=ValueType.STRING), ], ) # Create a string YAML representation of the feature set string_yaml = test_feature_set.to_yaml() # Create a new feature set object from the YAML string actual_feature_set_from_string = FeatureSet.from_yaml(string_yaml) # Ensure equality is upheld to original feature set assert test_feature_set == actual_feature_set_from_string
def test_add_remove_features_success(self): fs = FeatureSet("my-feature-set") fs.add(Feature(name="my-feature-1", dtype=ValueType.INT64)) fs.add(Feature(name="my-feature-2", dtype=ValueType.INT64)) fs.drop(name="my-feature-1") assert len(fs.features) == 1 and fs.features[0].name == "my-feature-2"
def test_feature_without_labels_empty_dict(): f = Feature("my feature", dtype=ValueType.INT64) assert f.labels == OrderedDict() assert len(f.labels) == 0