def run_offline_online_store_consistency_test(fs: FeatureStore, fv: FeatureView) -> None: now = datetime.now() full_feature_names = True check_offline_store: bool = True # Run materialize() # use both tz-naive & tz-aware timestamps to test that they're both correctly handled start_date = (now - timedelta(hours=5)).replace(tzinfo=utc) end_date = now - timedelta(hours=2) fs.materialize(feature_views=[fv.name], start_date=start_date, end_date=end_date) # check result of materialize() check_offline_and_online_features( fs=fs, fv=fv, driver_id=1, event_timestamp=end_date, expected_value=0.3, full_feature_names=full_feature_names, check_offline_store=check_offline_store, ) check_offline_and_online_features( fs=fs, fv=fv, driver_id=2, event_timestamp=end_date, expected_value=None, full_feature_names=full_feature_names, check_offline_store=check_offline_store, ) # check prior value for materialize_incremental() check_offline_and_online_features( fs=fs, fv=fv, driver_id=3, event_timestamp=end_date, expected_value=4, full_feature_names=full_feature_names, check_offline_store=check_offline_store, ) # run materialize_incremental() fs.materialize_incremental(feature_views=[fv.name], end_date=now) # check result of materialize_incremental() check_offline_and_online_features( fs=fs, fv=fv, driver_id=3, event_timestamp=now, expected_value=5, full_feature_names=full_feature_names, check_offline_store=check_offline_store, )
def setup_feature_store(): """Prepares the local environment for a FeatureStore docstring test.""" from datetime import datetime, timedelta from feast import Entity, Feature, FeatureStore, FeatureView, FileSource, ValueType from feast.repo_operations import init_repo init_repo("feature_repo", "local") fs = FeatureStore(repo_path="feature_repo") driver = Entity( name="driver_id", value_type=ValueType.INT64, description="driver id", ) driver_hourly_stats = FileSource( path="feature_repo/data/driver_stats.parquet", event_timestamp_column="event_timestamp", created_timestamp_column="created", ) driver_hourly_stats_view = FeatureView( name="driver_hourly_stats", entities=["driver_id"], ttl=timedelta(seconds=86400 * 1), features=[ Feature(name="conv_rate", dtype=ValueType.FLOAT), Feature(name="acc_rate", dtype=ValueType.FLOAT), Feature(name="avg_daily_trips", dtype=ValueType.INT64), ], batch_source=driver_hourly_stats, ) fs.apply([driver_hourly_stats_view, driver]) fs.materialize( start_date=datetime.utcnow() - timedelta(hours=3), end_date=datetime.utcnow() - timedelta(minutes=10), )
def store_online(feature_store: FeatureStore) -> FeatureStore: feature_store.materialize( start_date=datetime.utcnow() - timedelta(days=250), end_date=datetime.utcnow() - timedelta(minutes=10), ) return feature_store
) benchmark_feature_views = [ FeatureView( name=f"feature_view_{i}", entities=["entity"], ttl=Duration(seconds=86400), features=[ Feature(name=f"feature_{10 * i + j}", dtype=ValueType.INT64) for j in range(10) ], online=True, batch_source=generated_data_source, ) for i in range(25) ] benchmark_feature_service = FeatureService( name=f"benchmark_feature_service", features=benchmark_feature_views, ) fs = FeatureStore(".") fs.apply([ driver_hourly_stats_view, driver, entity, benchmark_feature_service, *benchmark_feature_views ]) now = datetime.now() fs.materialize(start, now) print("Materialization finished")
def construct_test_environment( test_repo_config: TestRepoConfig, create_and_apply: bool = False, materialize: bool = False, ) -> Environment: """ This method should take in the parameters from the test repo config and created a feature repo, apply it, and return the constructed feature store object to callers. This feature store object can be interacted for the purposes of tests. The user is *not* expected to perform any clean up actions. :param test_repo_config: configuration :return: A feature store built using the supplied configuration. """ df = create_dataset() project = f"test_correctness_{str(uuid.uuid4()).replace('-', '')[:8]}" module_name, config_class_name = test_repo_config.offline_store_creator.rsplit( ".", 1) offline_creator: DataSourceCreator = importer.get_class_from_type( module_name, config_class_name, "DataSourceCreator")(project) ds = offline_creator.create_data_source(project, df, field_mapping={ "ts_1": "ts", "id": "driver_id" }) offline_store = offline_creator.create_offline_store_config() online_store = test_repo_config.online_store with tempfile.TemporaryDirectory() as repo_dir_name: config = RepoConfig( registry=str(Path(repo_dir_name) / "registry.db"), project=project, provider=test_repo_config.provider, offline_store=offline_store, online_store=online_store, repo_path=repo_dir_name, ) fs = FeatureStore(config=config) environment = Environment( name=project, test_repo_config=test_repo_config, feature_store=fs, data_source=ds, data_source_creator=offline_creator, ) fvs = [] entities = [] try: if create_and_apply: entities.extend([driver(), customer()]) fvs.extend([ environment.driver_stats_feature_view(), environment.customer_feature_view(), ]) fs.apply(fvs + entities) if materialize: fs.materialize(environment.start_date, environment.end_date) yield environment finally: offline_creator.teardown() fs.teardown()