def test_apply_object_and_read(test_feature_store): assert isinstance(test_feature_store, FeatureStore) # Create Feature Views batch_source = FileSource( file_format=ParquetFormat(), path="file://feast/*", timestamp_field="ts_col", created_timestamp_column="timestamp", ) e1 = Entity(name="fs1_my_entity_1", value_type=ValueType.STRING, description="something") e2 = Entity(name="fs1_my_entity_2", value_type=ValueType.STRING, description="something") fv1 = FeatureView( name="my_feature_view_1", schema=[ Field(name="fs1_my_feature_1", dtype=Int64), Field(name="fs1_my_feature_2", dtype=String), Field(name="fs1_my_feature_3", dtype=Array(String)), Field(name="fs1_my_feature_4", dtype=Array(Bytes)), ], entities=["fs1_my_entity_1"], tags={"team": "matchmaking"}, batch_source=batch_source, ttl=timedelta(minutes=5), ) fv2 = FeatureView( name="my_feature_view_2", schema=[ Field(name="fs1_my_feature_1", dtype=Int64), Field(name="fs1_my_feature_2", dtype=String), Field(name="fs1_my_feature_3", dtype=Array(String)), Field(name="fs1_my_feature_4", dtype=Array(Bytes)), ], entities=["fs1_my_entity_1"], tags={"team": "matchmaking"}, batch_source=batch_source, ttl=timedelta(minutes=5), ) # Register Feature View test_feature_store.apply([fv1, e1, fv2, e2]) fv1_actual = test_feature_store.get_feature_view("my_feature_view_1") e1_actual = test_feature_store.get_entity("fs1_my_entity_1") assert fv1 == fv1_actual assert e1 == e1_actual assert fv2 != fv1_actual assert e2 != e1_actual test_feature_store.teardown()
def test_update_feature_views_with_inferred_features(): file_source = FileSource(name="test", path="test path") entity1 = Entity(name="test1", join_keys=["test_column_1"]) entity2 = Entity(name="test2", join_keys=["test_column_2"]) feature_view_1 = FeatureView( name="test1", entities=[entity1], schema=[ Field(name="feature", dtype=Float32), Field(name="test_column_1", dtype=String), ], source=file_source, ) feature_view_2 = FeatureView( name="test2", entities=[entity1, entity2], schema=[ Field(name="feature", dtype=Float32), Field(name="test_column_1", dtype=String), Field(name="test_column_2", dtype=String), ], source=file_source, ) assert len(feature_view_1.schema) == 2 assert len(feature_view_1.features) == 2 # The entity field should be deleted from the schema and features of the feature view. update_feature_views_with_inferred_features([feature_view_1], [entity1], RepoConfig(provider="local", project="test")) assert len(feature_view_1.schema) == 1 assert len(feature_view_1.features) == 1 assert len(feature_view_2.schema) == 3 assert len(feature_view_2.features) == 3 # The entity fields should be deleted from the schema and features of the feature view. update_feature_views_with_inferred_features( [feature_view_2], [entity1, entity2], RepoConfig(provider="local", project="test"), ) assert len(feature_view_2.schema) == 1 assert len(feature_view_2.features) == 1
def test_apply_feature_view_success(test_feature_store): # Create Feature Views batch_source = FileSource( file_format=ParquetFormat(), path="file://feast/*", timestamp_field="ts_col", created_timestamp_column="timestamp", date_partition_column="date_partition_col", ) fv1 = FeatureView( name="my_feature_view_1", schema=[ Field(name="fs1_my_feature_1", dtype=Int64), Field(name="fs1_my_feature_2", dtype=String), Field(name="fs1_my_feature_3", dtype=Array(String)), Field(name="fs1_my_feature_4", dtype=Array(Bytes)), ], entities=["fs1_my_entity_1"], tags={"team": "matchmaking"}, batch_source=batch_source, ttl=timedelta(minutes=5), ) # Register Feature View test_feature_store.apply([fv1]) feature_views = test_feature_store.list_feature_views() # List Feature Views assert (len(feature_views) == 1 and feature_views[0].name == "my_feature_view_1" and feature_views[0].features[0].name == "fs1_my_feature_1" and feature_views[0].features[0].dtype == Int64 and feature_views[0].features[1].name == "fs1_my_feature_2" and feature_views[0].features[1].dtype == String and feature_views[0].features[2].name == "fs1_my_feature_3" and feature_views[0].features[2].dtype == Array(String) and feature_views[0].features[3].name == "fs1_my_feature_4" and feature_views[0].features[3].dtype == Array(Bytes) and feature_views[0].entities[0] == "fs1_my_entity_1") test_feature_store.teardown()
def __init__( self, *args, name: Optional[str] = None, schema: Optional[Union[Dict[str, ValueType], List[Field]]] = None, description: Optional[str] = "", tags: Optional[Dict[str, str]] = None, owner: Optional[str] = "", ): """Creates a RequestSource object.""" positional_attributes = ["name", "schema"] _name = name _schema = schema if args: warnings.warn( ("Request source parameters should be specified as a keyword argument instead of a positional arg." "Feast 0.23+ will not support positional arguments to construct request sources" ), DeprecationWarning, ) if len(args) > len(positional_attributes): raise ValueError( f"Only {', '.join(positional_attributes)} are allowed as positional args when defining " f"feature views, for backwards compatibility.") if len(args) >= 1: _name = args[0] if len(args) >= 2: _schema = args[1] super().__init__(name=_name, description=description, tags=tags, owner=owner) if not _schema: raise ValueError("Schema needs to be provided for Request Source") if isinstance(_schema, Dict): warnings.warn( "Schema in RequestSource is changing type. The schema data type Dict[str, ValueType] is being deprecated in Feast 0.23. " "Please use List[Field] instead for the schema", DeprecationWarning, ) schemaList = [] for key, valueType in _schema.items(): schemaList.append( Field(name=key, dtype=VALUE_TYPES_TO_FEAST_TYPES[valueType])) self.schema = schemaList elif isinstance(_schema, List): self.schema = _schema else: raise Exception( "Schema type must be either dictionary or list, not " + str(type(_schema)))
def test_hash(): file_source = FileSource(name="my-file-source", path="test.parquet") feature_view = FeatureView( name="my-feature-view", entities=[], schema=[ Field(name="feature1", dtype=Float32), Field(name="feature2", dtype=Float32), ], source=file_source, ) feature_service_1 = FeatureService( name="my-feature-service", features=[feature_view[["feature1", "feature2"]]] ) feature_service_2 = FeatureService( name="my-feature-service", features=[feature_view[["feature1", "feature2"]]] ) feature_service_3 = FeatureService( name="my-feature-service", features=[feature_view[["feature1"]]] ) feature_service_4 = FeatureService( name="my-feature-service", features=[feature_view[["feature1"]]], description="test", ) s1 = {feature_service_1, feature_service_2} assert len(s1) == 1 s2 = {feature_service_1, feature_service_3} assert len(s2) == 2 s3 = {feature_service_3, feature_service_4} assert len(s3) == 2 s4 = {feature_service_1, feature_service_2, feature_service_3, feature_service_4} assert len(s4) == 3
def infer_features(self): """ Infers the set of features associated to this feature view from the input source. Raises: RegistryInferenceFailure: The set of features could not be inferred. """ df = pd.DataFrame() for feature_view_projection in self.source_feature_view_projections.values( ): for feature in feature_view_projection.features: dtype = feast_value_type_to_pandas_type( feature.dtype.to_value_type()) df[f"{feature_view_projection.name}__{feature.name}"] = pd.Series( dtype=dtype) df[f"{feature.name}"] = pd.Series(dtype=dtype) for request_data in self.source_request_sources.values(): for field in request_data.schema: dtype = feast_value_type_to_pandas_type( field.dtype.to_value_type()) df[f"{field.name}"] = pd.Series(dtype=dtype) output_df: pd.DataFrame = self.udf.__call__(df) inferred_features = [] for f, dt in zip(output_df.columns, output_df.dtypes): inferred_features.append( Field( name=f, dtype=from_value_type( python_type_to_feast_value_type(f, type_name=str(dt))), )) if self.features: missing_features = [] for specified_features in self.features: if specified_features not in inferred_features: missing_features.append(specified_features) if missing_features: raise SpecifiedFeaturesNotPresentError( [f.name for f in missing_features], self.name) else: self.features = inferred_features if not self.features: raise RegistryInferenceFailure( "OnDemandFeatureView", f"Could not infer Features for the feature view '{self.name}'.", )
def __init__( self, name: str, request_data_source: RequestSource, description: str = "", tags: Optional[Dict[str, str]] = None, owner: str = "", ): """ Creates a RequestFeatureView object. Args: name: The unique name of the request feature view. request_data_source: The request data source that specifies the schema and features of the request feature view. description (optional): A human-readable description. tags (optional): A dictionary of key-value pairs to store arbitrary metadata. owner (optional): The owner of the request feature view, typically the email of the primary maintainer. """ warnings.warn( "Request feature view is deprecated. " "Please use request data source instead", DeprecationWarning, ) if isinstance(request_data_source.schema, Dict): new_features = [ Field(name=name, dtype=dtype) for name, dtype in request_data_source.schema.items() ] else: new_features = request_data_source.schema super().__init__( name=name, features=new_features, description=description, tags=tags, owner=owner, ) self.request_source = request_data_source
def to_proto(self) -> DataSourceProto: schema_pb = [] if isinstance(self.schema, Dict): for key, value in self.schema.items(): schema_pb.append( Field(name=key, dtype=VALUE_TYPES_TO_FEAST_TYPES[ value.value]).to_proto()) else: for field in self.schema: schema_pb.append(field.to_proto()) data_source_proto = DataSourceProto( name=self.name, type=DataSourceProto.REQUEST_SOURCE, description=self.description, tags=self.tags, owner=self.owner, ) data_source_proto.request_data_options.schema.extend(schema_pb) return data_source_proto
def update_feature_views_with_inferred_features(fvs: List[FeatureView], entities: List[Entity], config: RepoConfig) -> None: """ Infers the set of features associated to each FeatureView and updates the FeatureView with those features. Inference occurs through considering each column of the underlying data source as a feature except columns that are associated with the data source's timestamp columns and the FeatureView's entity columns. Args: fvs: The feature views to be updated. entities: A list containing entities associated with the feature views. config: The config for the current feature store. """ entity_name_to_join_key_map = { entity.name: entity.join_key for entity in entities } join_keys = entity_name_to_join_key_map.values() for fv in fvs: # First drop all Entity fields. Then infer features if necessary. fv.schema = [ field for field in fv.schema if field.name not in join_keys ] fv.features = [ field for field in fv.features if field.name not in join_keys ] if not fv.features: columns_to_exclude = { fv.batch_source.timestamp_field, fv.batch_source.created_timestamp_column, } | { entity_name_to_join_key_map[entity_name] for entity_name in fv.entities } if fv.batch_source.timestamp_field in fv.batch_source.field_mapping: columns_to_exclude.add(fv.batch_source.field_mapping[ fv.batch_source.timestamp_field]) if (fv.batch_source.created_timestamp_column in fv.batch_source.field_mapping): columns_to_exclude.add(fv.batch_source.field_mapping[ fv.batch_source.created_timestamp_column]) for ( col_name, col_datatype, ) in fv.batch_source.get_table_column_names_and_types(config): if col_name not in columns_to_exclude and not re.match( "^__|__$", col_name, # double underscores often signal an internal-use column ): feature_name = (fv.batch_source.field_mapping[col_name] if col_name in fv.batch_source.field_mapping else col_name) field = Field( name=feature_name, dtype=from_value_type( fv.batch_source. source_datatype_to_feast_value_type()( col_datatype)), ) # Note that schema and features are two different attributes of a # FeatureView, and that features should be present in both. fv.schema.append(field) fv.features.append(field) if not fv.features: raise RegistryInferenceFailure( "FeatureView", f"Could not infer Features for the FeatureView named {fv.name}.", )
def test_default_data_source_kw_arg_warning(): # source_class = request.param with pytest.warns(DeprecationWarning): source = KafkaSource( "name", "column", "bootstrap_servers", ProtoFormat("class_path"), "topic" ) assert source.name == "name" assert source.timestamp_field == "column" assert source.kafka_options.bootstrap_servers == "bootstrap_servers" assert source.kafka_options.topic == "topic" with pytest.raises(ValueError): KafkaSource("name", "column", "bootstrap_servers", topic="topic") with pytest.warns(DeprecationWarning): source = KinesisSource( "name", "column", "c_column", ProtoFormat("class_path"), "region", "stream_name", ) assert source.name == "name" assert source.timestamp_field == "column" assert source.created_timestamp_column == "c_column" assert source.kinesis_options.region == "region" assert source.kinesis_options.stream_name == "stream_name" with pytest.raises(ValueError): KinesisSource( "name", "column", "c_column", region="region", stream_name="stream_name" ) with pytest.warns(DeprecationWarning): source = RequestSource( "name", [Field(name="val_to_add", dtype=Int64)], description="description" ) assert source.name == "name" assert source.description == "description" with pytest.raises(ValueError): RequestSource("name") with pytest.warns(DeprecationWarning): source = PushSource( "name", BigQuerySource(name="bigquery_source", table="table"), description="description", ) assert source.name == "name" assert source.description == "description" assert source.batch_source.name == "bigquery_source" with pytest.raises(ValueError): PushSource("name") # No name warning for DataSource with pytest.warns(UserWarning): source = KafkaSource( event_timestamp_column="column", bootstrap_servers="bootstrap_servers", message_format=ProtoFormat("class_path"), topic="topic", )
def test_hash(): file_source = FileSource(name="my-file-source", path="test.parquet") feature_view = FeatureView( name="my-feature-view", entities=[], schema=[ Field(name="feature1", dtype=Float32), Field(name="feature2", dtype=Float32), ], source=file_source, ) sources = [feature_view] on_demand_feature_view_1 = OnDemandFeatureView( name="my-on-demand-feature-view", sources=sources, schema=[ Field(name="output1", dtype=Float32), Field(name="output2", dtype=Float32), ], udf=udf1, ) on_demand_feature_view_2 = OnDemandFeatureView( name="my-on-demand-feature-view", sources=sources, schema=[ Field(name="output1", dtype=Float32), Field(name="output2", dtype=Float32), ], udf=udf1, ) on_demand_feature_view_3 = OnDemandFeatureView( name="my-on-demand-feature-view", sources=sources, schema=[ Field(name="output1", dtype=Float32), Field(name="output2", dtype=Float32), ], udf=udf2, ) on_demand_feature_view_4 = OnDemandFeatureView( name="my-on-demand-feature-view", sources=sources, schema=[ Field(name="output1", dtype=Float32), Field(name="output2", dtype=Float32), ], udf=udf2, description="test", ) s1 = {on_demand_feature_view_1, on_demand_feature_view_2} assert len(s1) == 1 s2 = {on_demand_feature_view_1, on_demand_feature_view_3} assert len(s2) == 2 s3 = {on_demand_feature_view_3, on_demand_feature_view_4} assert len(s3) == 2 s4 = { on_demand_feature_view_1, on_demand_feature_view_2, on_demand_feature_view_3, on_demand_feature_view_4, } assert len(s4) == 3
def test_inputs_parameter_deprecation_in_odfv(): date_request = RequestSource( name="date_request", schema=[Field(name="some_date", dtype=UnixTimestamp)], ) with pytest.warns(DeprecationWarning): @on_demand_feature_view( inputs={"date_request": date_request}, schema=[ Field(name="output", dtype=UnixTimestamp), Field(name="string_output", dtype=String), ], ) def test_view(features_df: pd.DataFrame) -> pd.DataFrame: data = pd.DataFrame() data["output"] = features_df["some_date"] data["string_output"] = features_df["some_date"].astype( pd.StringDtype()) return data odfv = test_view assert odfv.name == "test_view" assert len(odfv.source_request_sources) == 1 assert odfv.source_request_sources["date_request"].name == "date_request" assert odfv.source_request_sources[ "date_request"].schema == date_request.schema with pytest.raises(ValueError): @on_demand_feature_view( inputs={"date_request": date_request}, sources=[date_request], schema=[ Field(name="output", dtype=UnixTimestamp), Field(name="string_output", dtype=String), ], ) def incorrect_testview(features_df: pd.DataFrame) -> pd.DataFrame: data = pd.DataFrame() data["output"] = features_df["some_date"] data["string_output"] = features_df["some_date"].astype( pd.StringDtype()) return data @on_demand_feature_view( inputs={"odfv": date_request}, schema=[ Field(name="output", dtype=UnixTimestamp), Field(name="string_output", dtype=String), ], ) def test_correct_view(features_df: pd.DataFrame) -> pd.DataFrame: data = pd.DataFrame() data["output"] = features_df["some_date"] data["string_output"] = features_df["some_date"].astype( pd.StringDtype()) return data odfv = test_correct_view assert odfv.name == "test_correct_view" assert odfv.source_request_sources[ "date_request"].schema == date_request.schema
def test_modify_feature_views_success(test_registry, request_source_schema): # Create Feature Views batch_source = FileSource( file_format=ParquetFormat(), path="file://feast/*", timestamp_field="ts_col", created_timestamp_column="timestamp", ) request_source = RequestSource( name="request_source", schema=request_source_schema, ) fv1 = FeatureView( name="my_feature_view_1", schema=[Field(name="fs1_my_feature_1", dtype=Int64)], entities=["fs1_my_entity_1"], tags={"team": "matchmaking"}, batch_source=batch_source, ttl=timedelta(minutes=5), ) @on_demand_feature_view( features=[ Feature(name="odfv1_my_feature_1", dtype=ValueType.STRING), Feature(name="odfv1_my_feature_2", dtype=ValueType.INT32), ], sources=[request_source], ) def odfv1(feature_df: pd.DataFrame) -> pd.DataFrame: data = pd.DataFrame() data["odfv1_my_feature_1"] = feature_df["my_input_1"].astype( "category") data["odfv1_my_feature_2"] = feature_df["my_input_1"].astype("int32") return data project = "project" # Register Feature Views test_registry.apply_feature_view(odfv1, project) test_registry.apply_feature_view(fv1, project) # Modify odfv by changing a single feature dtype @on_demand_feature_view( features=[ Feature(name="odfv1_my_feature_1", dtype=ValueType.FLOAT), Feature(name="odfv1_my_feature_2", dtype=ValueType.INT32), ], sources=[request_source], ) def odfv1(feature_df: pd.DataFrame) -> pd.DataFrame: data = pd.DataFrame() data["odfv1_my_feature_1"] = feature_df["my_input_1"].astype("float") data["odfv1_my_feature_2"] = feature_df["my_input_1"].astype("int32") return data # Apply the modified odfv test_registry.apply_feature_view(odfv1, project) # Check odfv on_demand_feature_views = test_registry.list_on_demand_feature_views( project) assert ( len(on_demand_feature_views) == 1 and on_demand_feature_views[0].name == "odfv1" and on_demand_feature_views[0].features[0].name == "odfv1_my_feature_1" and on_demand_feature_views[0].features[0].dtype == Float32 and on_demand_feature_views[0].features[1].name == "odfv1_my_feature_2" and on_demand_feature_views[0].features[1].dtype == Int32) request_schema = on_demand_feature_views[0].get_request_data_schema() assert (list(request_schema.keys())[0] == "my_input_1" and list(request_schema.values())[0] == ValueType.INT32) feature_view = test_registry.get_on_demand_feature_view("odfv1", project) assert (feature_view.name == "odfv1" and feature_view.features[0].name == "odfv1_my_feature_1" and feature_view.features[0].dtype == Float32 and feature_view.features[1].name == "odfv1_my_feature_2" and feature_view.features[1].dtype == Int32) request_schema = feature_view.get_request_data_schema() assert (list(request_schema.keys())[0] == "my_input_1" and list(request_schema.values())[0] == ValueType.INT32) # Make sure fv1 is untouched feature_views = test_registry.list_feature_views(project) # List Feature Views assert (len(feature_views) == 1 and feature_views[0].name == "my_feature_view_1" and feature_views[0].features[0].name == "fs1_my_feature_1" and feature_views[0].features[0].dtype == Int64 and feature_views[0].entities[0] == "fs1_my_entity_1") feature_view = test_registry.get_feature_view("my_feature_view_1", project) assert (feature_view.name == "my_feature_view_1" and feature_view.features[0].name == "fs1_my_feature_1" and feature_view.features[0].dtype == Int64 and feature_view.entities[0] == "fs1_my_entity_1") test_registry.teardown() # Will try to reload registry, which will fail because the file has been deleted with pytest.raises(FileNotFoundError): test_registry._get_registry_proto()
) def test_view_with_missing_feature( features_df: pd.DataFrame) -> pd.DataFrame: data = pd.DataFrame() data["output"] = features_df["some_date"] return data with pytest.raises(SpecifiedFeaturesNotPresentError): test_view_with_missing_feature.infer_features() # TODO(kevjumba): remove this in feast 0.23 when deprecating @pytest.mark.parametrize( "request_source_schema", [ [Field(name="some_date", dtype=UnixTimestamp)], { "some_date": ValueType.UNIX_TIMESTAMP }, ], ) def test_datasource_inference(request_source_schema): # Create Feature Views date_request = RequestSource( name="date_request", schema=request_source_schema, ) @on_demand_feature_view( # Note: we deliberately use positional arguments here to test that they work correctly, # even though positional arguments are deprecated in favor of keyword arguments.
# fetch features. driver = Entity( name="driver_id", value_type=ValueType.INT64, description="driver id", ) # Our parquet files contain sample data that includes a driver_id column, timestamps and # three feature column. Here we define a Feature View that will allow us to serve this # data to our model online. driver_hourly_stats_view = FeatureView( name="driver_hourly_stats", entities=["driver_id"], ttl=Duration(seconds=86400 * 7), schema=[ Field(name="conv_rate", dtype=Float64), Field(name="acc_rate", dtype=Float32), Field(name="avg_daily_trips", dtype=Int64), ], online=True, batch_source=driver_hourly_stats, tags={}, ) input_request = RequestSource( name="vals_to_add", schema=[ Field(name="val_to_add", dtype=Int64), Field(name="val_to_add_2", dtype=Int64), Field(name="avg_daily_trips", dtype=Int64), ],
def test_historical_features_from_bigquery_sources_containing_backfills( environment): store = environment.feature_store now = datetime.now().replace(microsecond=0, second=0, minute=0) tomorrow = now + timedelta(days=1) day_after_tomorrow = now + timedelta(days=2) entity_df = pd.DataFrame(data=[ { "driver_id": 1001, "event_timestamp": day_after_tomorrow }, { "driver_id": 1002, "event_timestamp": day_after_tomorrow }, ]) driver_stats_df = pd.DataFrame(data=[ # Duplicated rows simple case { "driver_id": 1001, "avg_daily_trips": 10, "event_timestamp": now, "created": now, }, { "driver_id": 1001, "avg_daily_trips": 20, "event_timestamp": now, "created": tomorrow, }, # Duplicated rows after a backfill { "driver_id": 1002, "avg_daily_trips": 30, "event_timestamp": now, "created": tomorrow, }, { "driver_id": 1002, "avg_daily_trips": 40, "event_timestamp": tomorrow, "created": now, }, ]) expected_df = pd.DataFrame(data=[ { "driver_id": 1001, "event_timestamp": day_after_tomorrow, "avg_daily_trips": 20, }, { "driver_id": 1002, "event_timestamp": day_after_tomorrow, "avg_daily_trips": 40, }, ]) driver_stats_data_source = environment.data_source_creator.create_data_source( df=driver_stats_df, destination_name= f"test_driver_stats_{int(time.time_ns())}_{random.randint(1000, 9999)}", timestamp_field="event_timestamp", created_timestamp_column="created", ) driver = Entity(name="driver", join_keys=["driver_id"], value_type=ValueType.INT64) driver_fv = FeatureView( name="driver_stats", entities=["driver"], schema=[Field(name="avg_daily_trips", dtype=Int32)], batch_source=driver_stats_data_source, ttl=None, ) store.apply([driver, driver_fv]) offline_job = store.get_historical_features( entity_df=entity_df, features=["driver_stats:avg_daily_trips"], full_feature_names=False, ) start_time = datetime.utcnow() actual_df = offline_job.to_df() print(f"actual_df shape: {actual_df.shape}") end_time = datetime.utcnow() print( str(f"Time to execute job_from_df.to_df() = '{(end_time - start_time)}'\n" )) assert sorted(expected_df.columns) == sorted(actual_df.columns) assert_frame_equal(expected_df, actual_df, keys=["driver_id"])
test_registry.teardown() # Will try to reload registry, which will fail because the file has been deleted with pytest.raises(FileNotFoundError): test_registry._get_registry_proto() @pytest.mark.parametrize( "test_registry", [lazy_fixture("local_registry")], ) # TODO(kevjumba): remove this in feast 0.23 when deprecating @pytest.mark.parametrize( "request_source_schema", [[Field(name="my_input_1", dtype=Int32)], { "my_input_1": ValueType.INT32 }], ) def test_modify_feature_views_success(test_registry, request_source_schema): # Create Feature Views batch_source = FileSource( file_format=ParquetFormat(), path="file://feast/*", timestamp_field="ts_col", created_timestamp_column="timestamp", ) request_source = RequestSource( name="request_source", schema=request_source_schema,
def from_proto(cls, on_demand_feature_view_proto: OnDemandFeatureViewProto): """ Creates an on demand feature view from a protobuf representation. Args: on_demand_feature_view_proto: A protobuf representation of an on-demand feature view. Returns: A OnDemandFeatureView object based on the on-demand feature view protobuf. """ sources = [] for ( _, on_demand_source, ) in on_demand_feature_view_proto.spec.sources.items(): if on_demand_source.WhichOneof("source") == "feature_view": sources.append( FeatureView.from_proto( on_demand_source.feature_view).projection) elif on_demand_source.WhichOneof( "source") == "feature_view_projection": sources.append( FeatureViewProjection.from_proto( on_demand_source.feature_view_projection)) else: sources.append( RequestSource.from_proto( on_demand_source.request_data_source)) on_demand_feature_view_obj = cls( name=on_demand_feature_view_proto.spec.name, schema=[ Field( name=feature.name, dtype=from_value_type(ValueType(feature.value_type)), ) for feature in on_demand_feature_view_proto.spec.features ], sources=sources, udf=dill.loads( on_demand_feature_view_proto.spec.user_defined_function.body), description=on_demand_feature_view_proto.spec.description, tags=dict(on_demand_feature_view_proto.spec.tags), owner=on_demand_feature_view_proto.spec.owner, ) # FeatureViewProjections are not saved in the OnDemandFeatureView proto. # Create the default projection. on_demand_feature_view_obj.projection = FeatureViewProjection.from_definition( on_demand_feature_view_obj) if on_demand_feature_view_proto.meta.HasField("created_timestamp"): on_demand_feature_view_obj.created_timestamp = ( on_demand_feature_view_proto.meta.created_timestamp.ToDatetime( )) if on_demand_feature_view_proto.meta.HasField( "last_updated_timestamp"): on_demand_feature_view_obj.last_updated_timestamp = ( on_demand_feature_view_proto.meta.last_updated_timestamp. ToDatetime()) return on_demand_feature_view_obj
def test_apply_feature_view_integration(test_registry): # Create Feature Views batch_source = FileSource( file_format=ParquetFormat(), path="file://feast/*", timestamp_field="ts_col", created_timestamp_column="timestamp", ) fv1 = FeatureView( name="my_feature_view_1", schema=[ Field(name="fs1_my_feature_1", dtype=Int64), Field(name="fs1_my_feature_2", dtype=String), Field(name="fs1_my_feature_3", dtype=Array(String)), Field(name="fs1_my_feature_4", dtype=Array(Bytes)), ], entities=["fs1_my_entity_1"], tags={"team": "matchmaking"}, batch_source=batch_source, ttl=timedelta(minutes=5), ) project = "project" # Register Feature View test_registry.apply_feature_view(fv1, project) feature_views = test_registry.list_feature_views(project) # List Feature Views assert (len(feature_views) == 1 and feature_views[0].name == "my_feature_view_1" and feature_views[0].features[0].name == "fs1_my_feature_1" and feature_views[0].features[0].dtype == Int64 and feature_views[0].features[1].name == "fs1_my_feature_2" and feature_views[0].features[1].dtype == String and feature_views[0].features[2].name == "fs1_my_feature_3" and feature_views[0].features[2].dtype == Array(String) and feature_views[0].features[3].name == "fs1_my_feature_4" and feature_views[0].features[3].dtype == Array(Bytes) and feature_views[0].entities[0] == "fs1_my_entity_1") feature_view = test_registry.get_feature_view("my_feature_view_1", project) assert (feature_view.name == "my_feature_view_1" and feature_view.features[0].name == "fs1_my_feature_1" and feature_view.features[0].dtype == Int64 and feature_view.features[1].name == "fs1_my_feature_2" and feature_view.features[1].dtype == String and feature_view.features[2].name == "fs1_my_feature_3" and feature_view.features[2].dtype == Array(String) and feature_view.features[3].name == "fs1_my_feature_4" and feature_view.features[3].dtype == Array(Bytes) and feature_view.entities[0] == "fs1_my_entity_1") test_registry.delete_feature_view("my_feature_view_1", project) feature_views = test_registry.list_feature_views(project) assert len(feature_views) == 0 test_registry.teardown() # Will try to reload registry, which will fail because the file has been deleted with pytest.raises(FileNotFoundError): test_registry._get_registry_proto()
driver_hourly_stats = FileSource( path="data/driver_stats_with_string.parquet", timestamp_field="event_timestamp", created_timestamp_column="created", ) driver = Entity( name="driver_id", value_type=ValueType.INT64, description="driver id", ) driver_hourly_stats_view = BatchFeatureView( name="driver_hourly_stats", entities=["driver_id"], ttl=Duration(seconds=86400000), schema=[ Field(name="conv_rate", dtype=Float32), Field(name="acc_rate", dtype=Float32), Field(name="avg_daily_trips", dtype=Int64), Field(name="string_feature", dtype=String), ], online=True, batch_source=driver_hourly_stats, tags={}, ) # Define a request data source which encodes features / information only # available at request time (e.g. part of the user initiated HTTP request) input_request = RequestSource( name="vals_to_add", schema=[ Field(name="val_to_add", dtype=Int64),