def test_create_stream_feature_view(): stream_source = KafkaSource( name="kafka", event_timestamp_column="", bootstrap_servers="", message_format=AvroFormat(""), topic="topic", batch_source=FileSource(path="some path"), ) StreamFeatureView( name="test batch feature view", entities=[], ttl=timedelta(days=30), source=stream_source, ) with pytest.raises(ValueError): StreamFeatureView(name="test batch feature view", entities=[], ttl=timedelta(days=30)) with pytest.raises(ValueError): StreamFeatureView( name="test batch feature view", entities=[], ttl=timedelta(days=30), source=FileSource(path="some path"), )
def test_feature_table_import_export_yaml(self, batch_source): stream_source = KafkaSource( field_mapping={ "ride_distance": "ride_distance", "ride_duration": "ride_duration", }, bootstrap_servers="localhost:9094", message_format=ProtoFormat(class_path="class.path"), topic="test_topic", event_timestamp_column="ts_col", ) test_feature_table = FeatureTable( name="car_driver", features=[ Feature(name="ride_distance", dtype=ValueType.FLOAT), Feature(name="ride_duration", dtype=ValueType.STRING), ], entities=["car_driver_entity"], labels={"team": "matchmaking"}, batch_source=batch_source, stream_source=stream_source, ) # Create a string YAML representation of the feature table string_yaml = test_feature_table.to_yaml() # Create a new feature table object from the YAML string actual_feature_table_from_string = FeatureTable.from_yaml(string_yaml) # Ensure equality is upheld to original feature table assert test_feature_table == actual_feature_table_from_string
def test_apply_feature_table_success(self, test_client): test_client.set_project("project1") # Create Feature Tables batch_source = FileSource( file_format="parquet", file_url="file://feast/*", event_timestamp_column="ts_col", created_timestamp_column="timestamp", date_partition_column="date_partition_col", ) stream_source = KafkaSource( bootstrap_servers="localhost:9094", class_path="random/path/to/class", topic="test_topic", event_timestamp_column="ts_col", created_timestamp_column="timestamp", ) ft1 = FeatureTable( name="my-feature-table-1", features=[ Feature(name="fs1-my-feature-1", dtype=ValueType.INT64), Feature(name="fs1-my-feature-2", dtype=ValueType.STRING), Feature(name="fs1-my-feature-3", dtype=ValueType.STRING_LIST), Feature(name="fs1-my-feature-4", dtype=ValueType.BYTES_LIST), ], entities=["fs1-my-entity-1"], labels={"team": "matchmaking"}, batch_source=batch_source, stream_source=stream_source, ) # Register Feature Table with Core test_client.apply_feature_table(ft1) feature_tables = test_client.list_feature_tables() # List Feature Tables assert ( len(feature_tables) == 1 and feature_tables[0].name == "my-feature-table-1" and feature_tables[0].features[0].name == "fs1-my-feature-1" and feature_tables[0].features[0].dtype == ValueType.INT64 and feature_tables[0].features[1].name == "fs1-my-feature-2" and feature_tables[0].features[1].dtype == ValueType.STRING and feature_tables[0].features[2].name == "fs1-my-feature-3" and feature_tables[0].features[2].dtype == ValueType.STRING_LIST and feature_tables[0].features[3].name == "fs1-my-feature-4" and feature_tables[0].features[3].dtype == ValueType.BYTES_LIST and feature_tables[0].entities[0] == "fs1-my-entity-1" )
def feature_table(): return FeatureTable( name="ft", entities=[], features=[], stream_source=KafkaSource( topic="t", bootstrap_servers="", message_format=AvroFormat(""), event_timestamp_column="", ), )
def basic_featuretable(): batch_source = FileSource( field_mapping={ "dev_entity": "dev_entity_field", "dev_feature_float": "dev_feature_float_field", "dev_feature_string": "dev_feature_string_field", }, file_format="PARQUET", file_url="gs://example/feast/*", event_timestamp_column="datetime_col", created_timestamp_column="timestamp", date_partition_column="datetime", ) stream_source = KafkaSource( field_mapping={ "dev_entity": "dev_entity_field", "dev_feature_float": "dev_feature_float_field", "dev_feature_string": "dev_feature_string_field", }, bootstrap_servers="localhost:9094", class_path="random/path/to/class", topic="test_topic", event_timestamp_column="datetime_col", created_timestamp_column="timestamp", ) return FeatureTable( name="basic_featuretable", entities=["driver_id", "customer_id"], features=[ Feature(name="dev_feature_float", dtype=ValueType.FLOAT), Feature(name="dev_feature_string", dtype=ValueType.STRING), ], max_age=Duration(seconds=3600), batch_source=batch_source, stream_source=stream_source, labels={ "key1": "val1", "key2": "val2" }, )
def _create_ft(self, client: Client, features) -> None: entity = Entity( name="driver_car_id", description="Car driver id", value_type=ValueType.STRING, labels={"team": "matchmaking"}, ) # Register Entity with Core client.apply_entity(entity) # Create Feature Tables batch_source = FileSource( file_format=ParquetFormat(), file_url="file://feast/*", event_timestamp_column="ts_col", created_timestamp_column="timestamp", date_partition_column="date_partition_col", ) stream_source = KafkaSource( bootstrap_servers="localhost:9094", message_format=ProtoFormat("class.path"), topic="test_topic", event_timestamp_column="ts_col", created_timestamp_column="timestamp", ) ft1 = FeatureTable( name=self.table_name, features=features, entities=["driver_car_id"], labels={"team": "matchmaking"}, batch_source=batch_source, stream_source=stream_source, ) # Register Feature Table with Core client.apply_feature_table(ft1)
def test_apply_feature_table_integration(self, test_client): # Create Feature Tables batch_source = FileSource( file_format=ParquetFormat(), file_url="file://feast/*", event_timestamp_column="ts_col", created_timestamp_column="timestamp", date_partition_column="date_partition_col", ) stream_source = KafkaSource( bootstrap_servers="localhost:9094", message_format=ProtoFormat("class.path"), topic="test_topic", event_timestamp_column="ts_col", ) ft1 = FeatureTable( name="my-feature-table-1", features=[ Feature(name="fs1-my-feature-1", dtype=ValueType.INT64), Feature(name="fs1-my-feature-2", dtype=ValueType.STRING), Feature(name="fs1-my-feature-3", dtype=ValueType.STRING_LIST), Feature(name="fs1-my-feature-4", dtype=ValueType.BYTES_LIST), ], entities=["fs1-my-entity-1"], labels={"team": "matchmaking"}, batch_source=batch_source, stream_source=stream_source, ) # Register Feature Table with Core test_client.apply(ft1) feature_tables = test_client.list_feature_tables() # List Feature Tables assert (len(feature_tables) == 1 and feature_tables[0].name == "my-feature-table-1" and feature_tables[0].features[0].name == "fs1-my-feature-1" and feature_tables[0].features[0].dtype == ValueType.INT64 and feature_tables[0].features[1].name == "fs1-my-feature-2" and feature_tables[0].features[1].dtype == ValueType.STRING and feature_tables[0].features[2].name == "fs1-my-feature-3" and feature_tables[0].features[2].dtype == ValueType.STRING_LIST and feature_tables[0].features[3].name == "fs1-my-feature-4" and feature_tables[0].features[3].dtype == ValueType.BYTES_LIST and feature_tables[0].entities[0] == "fs1-my-entity-1") feature_table = test_client.get_feature_table("my-feature-table-1") assert (feature_table.name == "my-feature-table-1" and feature_table.features[0].name == "fs1-my-feature-1" and feature_table.features[0].dtype == ValueType.INT64 and feature_table.features[1].name == "fs1-my-feature-2" and feature_table.features[1].dtype == ValueType.STRING and feature_table.features[2].name == "fs1-my-feature-3" and feature_table.features[2].dtype == ValueType.STRING_LIST and feature_table.features[3].name == "fs1-my-feature-4" and feature_table.features[3].dtype == ValueType.BYTES_LIST and feature_table.entities[0] == "fs1-my-entity-1") test_client.delete_feature_table("my-feature-table-1") feature_tables = test_client.list_feature_tables() assert len(feature_tables) == 0
def test_default_data_source_kw_arg_warning(): # source_class = request.param with pytest.warns(DeprecationWarning): source = KafkaSource( "name", "column", "bootstrap_servers", ProtoFormat("class_path"), "topic" ) assert source.name == "name" assert source.timestamp_field == "column" assert source.kafka_options.bootstrap_servers == "bootstrap_servers" assert source.kafka_options.topic == "topic" with pytest.raises(ValueError): KafkaSource("name", "column", "bootstrap_servers", topic="topic") with pytest.warns(DeprecationWarning): source = KinesisSource( "name", "column", "c_column", ProtoFormat("class_path"), "region", "stream_name", ) assert source.name == "name" assert source.timestamp_field == "column" assert source.created_timestamp_column == "c_column" assert source.kinesis_options.region == "region" assert source.kinesis_options.stream_name == "stream_name" with pytest.raises(ValueError): KinesisSource( "name", "column", "c_column", region="region", stream_name="stream_name" ) with pytest.warns(DeprecationWarning): source = RequestSource( "name", [Field(name="val_to_add", dtype=Int64)], description="description" ) assert source.name == "name" assert source.description == "description" with pytest.raises(ValueError): RequestSource("name") with pytest.warns(DeprecationWarning): source = PushSource( "name", BigQuerySource(name="bigquery_source", table="table"), description="description", ) assert source.name == "name" assert source.description == "description" assert source.batch_source.name == "bigquery_source" with pytest.raises(ValueError): PushSource("name") # No name warning for DataSource with pytest.warns(UserWarning): source = KafkaSource( event_timestamp_column="column", bootstrap_servers="bootstrap_servers", message_format=ProtoFormat("class_path"), topic="topic", )