def run_offline_online_store_consistency_test(fs: FeatureStore, fv: FeatureView) -> None: now = datetime.now() full_feature_names = True check_offline_store: bool = True # Run materialize() # use both tz-naive & tz-aware timestamps to test that they're both correctly handled start_date = (now - timedelta(hours=5)).replace(tzinfo=utc) end_date = now - timedelta(hours=2) fs.materialize(feature_views=[fv.name], start_date=start_date, end_date=end_date) # check result of materialize() check_offline_and_online_features( fs=fs, fv=fv, driver_id=1, event_timestamp=end_date, expected_value=0.3, full_feature_names=full_feature_names, check_offline_store=check_offline_store, ) check_offline_and_online_features( fs=fs, fv=fv, driver_id=2, event_timestamp=end_date, expected_value=None, full_feature_names=full_feature_names, check_offline_store=check_offline_store, ) # check prior value for materialize_incremental() check_offline_and_online_features( fs=fs, fv=fv, driver_id=3, event_timestamp=end_date, expected_value=4, full_feature_names=full_feature_names, check_offline_store=check_offline_store, ) # run materialize_incremental() fs.materialize_incremental(feature_views=[fv.name], end_date=now) # check result of materialize_incremental() check_offline_and_online_features( fs=fs, fv=fv, driver_id=3, event_timestamp=now, expected_value=5, full_feature_names=full_feature_names, check_offline_store=check_offline_store, )
def main(): pd.set_option("display.max_columns", None) pd.set_option("display.width", 1000) # Load the feature store from the current path fs = FeatureStore(repo_path=".") # Deploy the feature store to AWS print("Deploying feature store to AWS...") fs.apply([driver, driver_hourly_stats_view]) # Select features feature_refs = ["driver_hourly_stats:conv_rate", "driver_hourly_stats:acc_rate"] print("Loading features into the online store...") fs.materialize_incremental(end_date=datetime.now()) print("Retrieving online features...") # Retrieve features from the online store (DynamoDB) online_features = fs.get_online_features( feature_refs=feature_refs, entity_rows=[{"driver_id": 1001}, {"driver_id": 1002}], ).to_dict() print(pd.DataFrame.from_dict(online_features))
def main(): pd.set_option("display.max_columns", None) pd.set_option("display.width", 1000) # Load the feature store from the current path fs = FeatureStore(repo_path=".") # Deploy the feature store to Snowflake print("Deploying feature store to Snowflake...") fs.apply([driver, driver_stats_fv]) # Select features features = [ "driver_hourly_stats:conv_rate", "driver_hourly_stats:acc_rate" ] # Create an entity dataframe. This is the dataframe that will be enriched with historical features entity_df = pd.DataFrame({ "event_timestamp": [ pd.Timestamp(dt, unit="ms", tz="UTC").round("ms") for dt in pd.date_range( start=datetime.now() - timedelta(days=3), end=datetime.now(), periods=3, ) ], "driver_id": [1001, 1002, 1003], }) print("Retrieving training data...") # Retrieve historical features by joining the entity dataframe to the Snowflake table source training_df = fs.get_historical_features(features=features, entity_df=entity_df).to_df() print() print(training_df) print() print("Loading features into the online store...") fs.materialize_incremental(end_date=datetime.now()) print() print("Retrieving online features...") # Retrieve features from the online store online_features = fs.get_online_features( features=features, entity_rows=[{ "driver_id": 1001 }, { "driver_id": 1002 }], ).to_dict() print() print(pd.DataFrame.from_dict(online_features))
# Define an entity for the driver. You can think of entity as a primary key used to # fetch features. driver = Entity( name="driver_id", value_type=ValueType.INT64, description="driver id", ) # Our parquet files contain sample data that includes a driver_id column, timestamps and # three feature column. Here we define a Feature View that will allow us to serve this # data to our model online. driver_hourly_stats_view = FeatureView( name="driver_hourly_stats", entities=["driver_id"], ttl=Duration(seconds=86400 * 365), features=[ Feature(name="conv_rate", dtype=ValueType.DOUBLE), Feature(name="acc_rate", dtype=ValueType.FLOAT), Feature(name="avg_daily_trips", dtype=ValueType.INT64), ], online=True, batch_source=driver_hourly_stats, tags={}, ) fs = FeatureStore("") fs.apply([driver_hourly_stats_view, driver]) now = datetime.now() fs.materialize_incremental(now)