def create_feature_view(name, feature_dtype, feature_is_list, has_empty_list, data_source): if feature_is_list is True: if feature_dtype == "int32": dtype = Array(Int32) elif feature_dtype == "int64": dtype = Array(Int64) elif feature_dtype == "float": dtype = Array(Float32) elif feature_dtype == "bool": dtype = Array(Bool) elif feature_dtype == "datetime": dtype = Array(UnixTimestamp) else: if feature_dtype == "int32": dtype = Int32 elif feature_dtype == "int64": dtype = Int64 elif feature_dtype == "float": dtype = Float32 elif feature_dtype == "bool": dtype = Bool elif feature_dtype == "datetime": dtype = UnixTimestamp return driver_feature_view( data_source, name=name, dtype=dtype, )
def test_apply_object_and_read(test_feature_store): assert isinstance(test_feature_store, FeatureStore) # Create Feature Views batch_source = FileSource( file_format=ParquetFormat(), path="file://feast/*", timestamp_field="ts_col", created_timestamp_column="timestamp", ) e1 = Entity(name="fs1_my_entity_1", value_type=ValueType.STRING, description="something") e2 = Entity(name="fs1_my_entity_2", value_type=ValueType.STRING, description="something") fv1 = FeatureView( name="my_feature_view_1", schema=[ Field(name="fs1_my_feature_1", dtype=Int64), Field(name="fs1_my_feature_2", dtype=String), Field(name="fs1_my_feature_3", dtype=Array(String)), Field(name="fs1_my_feature_4", dtype=Array(Bytes)), ], entities=["fs1_my_entity_1"], tags={"team": "matchmaking"}, batch_source=batch_source, ttl=timedelta(minutes=5), ) fv2 = FeatureView( name="my_feature_view_2", schema=[ Field(name="fs1_my_feature_1", dtype=Int64), Field(name="fs1_my_feature_2", dtype=String), Field(name="fs1_my_feature_3", dtype=Array(String)), Field(name="fs1_my_feature_4", dtype=Array(Bytes)), ], entities=["fs1_my_entity_1"], tags={"team": "matchmaking"}, batch_source=batch_source, ttl=timedelta(minutes=5), ) # Register Feature View test_feature_store.apply([fv1, e1, fv2, e2]) fv1_actual = test_feature_store.get_feature_view("my_feature_view_1") e1_actual = test_feature_store.get_entity("fs1_my_entity_1") assert fv1 == fv1_actual assert e1 == e1_actual assert fv2 != fv1_actual assert e2 != e1_actual test_feature_store.teardown()
def test_apply_data_source(test_registry: Registry): # Create Feature Views batch_source = FileSource( name="test_source", file_format=ParquetFormat(), path="file://feast/*", timestamp_field="ts_col", created_timestamp_column="timestamp", ) fv1 = FeatureView( name="my_feature_view_1", schema=[ Field(name="fs1_my_feature_1", dtype=Int64), Field(name="fs1_my_feature_2", dtype=String), Field(name="fs1_my_feature_3", dtype=Array(String)), Field(name="fs1_my_feature_4", dtype=Array(Bytes)), ], entities=["fs1_my_entity_1"], tags={"team": "matchmaking"}, batch_source=batch_source, ttl=timedelta(minutes=5), ) project = "project" # Register data source and feature view test_registry.apply_data_source(batch_source, project, commit=False) test_registry.apply_feature_view(fv1, project, commit=True) registry_feature_views = test_registry.list_feature_views(project) registry_data_sources = test_registry.list_data_sources(project) assert len(registry_feature_views) == 1 assert len(registry_data_sources) == 1 registry_feature_view = registry_feature_views[0] assert registry_feature_view.batch_source == batch_source registry_data_source = registry_data_sources[0] assert registry_data_source == batch_source # Check that change to batch source propagates batch_source.timestamp_field = "new_ts_col" test_registry.apply_data_source(batch_source, project, commit=False) test_registry.apply_feature_view(fv1, project, commit=True) registry_feature_views = test_registry.list_feature_views(project) registry_data_sources = test_registry.list_data_sources(project) assert len(registry_feature_views) == 1 assert len(registry_data_sources) == 1 registry_feature_view = registry_feature_views[0] assert registry_feature_view.batch_source == batch_source registry_batch_source = test_registry.list_data_sources(project)[0] assert registry_batch_source == batch_source test_registry.teardown() # Will try to reload registry, which will fail because the file has been deleted with pytest.raises(FileNotFoundError): test_registry._get_registry_proto()
def create_item_embeddings_feature_view(source, infer_features: bool = False): item_embeddings_feature_view = FeatureView( name="item_embeddings", entities=["item"], schema=None if infer_features else [ Field(name="embedding_double", dtype=Array(Float64)), Field(name="embedding_float", dtype=Array(Float32)), ], batch_source=source, ttl=timedelta(hours=2), ) return item_embeddings_feature_view
def test_apply_feature_view_success(test_feature_store): # Create Feature Views batch_source = FileSource( file_format=ParquetFormat(), path="file://feast/*", timestamp_field="ts_col", created_timestamp_column="timestamp", date_partition_column="date_partition_col", ) fv1 = FeatureView( name="my_feature_view_1", schema=[ Field(name="fs1_my_feature_1", dtype=Int64), Field(name="fs1_my_feature_2", dtype=String), Field(name="fs1_my_feature_3", dtype=Array(String)), Field(name="fs1_my_feature_4", dtype=Array(Bytes)), ], entities=["fs1_my_entity_1"], tags={"team": "matchmaking"}, batch_source=batch_source, ttl=timedelta(minutes=5), ) # Register Feature View test_feature_store.apply([fv1]) feature_views = test_feature_store.list_feature_views() # List Feature Views assert (len(feature_views) == 1 and feature_views[0].name == "my_feature_view_1" and feature_views[0].features[0].name == "fs1_my_feature_1" and feature_views[0].features[0].dtype == Int64 and feature_views[0].features[1].name == "fs1_my_feature_2" and feature_views[0].features[1].dtype == String and feature_views[0].features[2].name == "fs1_my_feature_3" and feature_views[0].features[2].dtype == Array(String) and feature_views[0].features[3].name == "fs1_my_feature_4" and feature_views[0].features[3].dtype == Array(Bytes) and feature_views[0].entities[0] == "fs1_my_entity_1") test_feature_store.teardown()
def test_apply_feature_view_integration(test_registry): # Create Feature Views batch_source = FileSource( file_format=ParquetFormat(), path="file://feast/*", timestamp_field="ts_col", created_timestamp_column="timestamp", ) fv1 = FeatureView( name="my_feature_view_1", schema=[ Field(name="fs1_my_feature_1", dtype=Int64), Field(name="fs1_my_feature_2", dtype=String), Field(name="fs1_my_feature_3", dtype=Array(String)), Field(name="fs1_my_feature_4", dtype=Array(Bytes)), ], entities=["fs1_my_entity_1"], tags={"team": "matchmaking"}, batch_source=batch_source, ttl=timedelta(minutes=5), ) project = "project" # Register Feature View test_registry.apply_feature_view(fv1, project) feature_views = test_registry.list_feature_views(project) # List Feature Views assert (len(feature_views) == 1 and feature_views[0].name == "my_feature_view_1" and feature_views[0].features[0].name == "fs1_my_feature_1" and feature_views[0].features[0].dtype == Int64 and feature_views[0].features[1].name == "fs1_my_feature_2" and feature_views[0].features[1].dtype == String and feature_views[0].features[2].name == "fs1_my_feature_3" and feature_views[0].features[2].dtype == Array(String) and feature_views[0].features[3].name == "fs1_my_feature_4" and feature_views[0].features[3].dtype == Array(Bytes) and feature_views[0].entities[0] == "fs1_my_entity_1") feature_view = test_registry.get_feature_view("my_feature_view_1", project) assert (feature_view.name == "my_feature_view_1" and feature_view.features[0].name == "fs1_my_feature_1" and feature_view.features[0].dtype == Int64 and feature_view.features[1].name == "fs1_my_feature_2" and feature_view.features[1].dtype == String and feature_view.features[2].name == "fs1_my_feature_3" and feature_view.features[2].dtype == Array(String) and feature_view.features[3].name == "fs1_my_feature_4" and feature_view.features[3].dtype == Array(Bytes) and feature_view.entities[0] == "fs1_my_entity_1") test_registry.delete_feature_view("my_feature_view_1", project) feature_views = test_registry.list_feature_views(project) assert len(feature_views) == 0 test_registry.teardown() # Will try to reload registry, which will fail because the file has been deleted with pytest.raises(FileNotFoundError): test_registry._get_registry_proto()
pa.bool_(): Value_pb2.BoolList, pa.string(): Value_pb2.StringList, pa.binary(): Value_pb2.BytesList, PA_TIMESTAMP_TYPE: Value_pb2.Int64List, } FEAST_TYPE_TO_ARROW_TYPE = { PrimitiveFeastType.INT32: pa.int32(), PrimitiveFeastType.INT64: pa.int64(), PrimitiveFeastType.FLOAT32: pa.float32(), PrimitiveFeastType.FLOAT64: pa.float64(), PrimitiveFeastType.STRING: pa.string(), PrimitiveFeastType.BYTES: pa.binary(), PrimitiveFeastType.BOOL: pa.bool_(), PrimitiveFeastType.UNIX_TIMESTAMP: pa.timestamp("s"), Array(PrimitiveFeastType.INT32): pa.list_(pa.int32()), Array(PrimitiveFeastType.INT64): pa.list_(pa.int64()), Array(PrimitiveFeastType.FLOAT32): pa.list_(pa.float32()), Array(PrimitiveFeastType.FLOAT64): pa.list_(pa.float64()), Array(PrimitiveFeastType.STRING): pa.list_(pa.string()), Array(PrimitiveFeastType.BYTES): pa.list_(pa.binary()), Array(PrimitiveFeastType.BOOL): pa.list_(pa.bool_()), Array(PrimitiveFeastType.UNIX_TIMESTAMP): pa.list_(pa.timestamp("s")), } def arrow_array_to_array_of_proto( arrow_type: pa.DataType, arrow_array: pa.Array) -> List[Value_pb2.Value]: values = [] if isinstance(arrow_type, pa.ListType):
def test_array_feast_type(): array_string = Array(String) assert array_string.to_value_type() == ValueType.STRING_LIST assert from_value_type(array_string.to_value_type()) == array_string array_float_32 = Array(Float32) assert array_float_32.to_value_type() == ValueType.FLOAT_LIST assert from_value_type(array_float_32.to_value_type()) == array_float_32 with pytest.raises(ValueError): _ = Array(Array) with pytest.raises(ValueError): _ = Array(Array(String))