def serve_transformations_command(ctx: click.Context, port: int): """[Experimental] Start a the feature consumption server locally on a given port.""" repo = ctx.obj["CHDIR"] cli_check_repo(repo) store = FeatureStore(repo_path=str(repo)) store.serve_transformations(port)
def feature_view_list(ctx: click.Context): """ List all feature views """ repo = ctx.obj["CHDIR"] cli_check_repo(repo) store = FeatureStore(repo_path=str(repo)) table = [] for feature_view in [ *store.list_feature_views(), *store.list_request_feature_views(), *store.list_on_demand_feature_views(), ]: entities = set() if isinstance(feature_view, FeatureView): entities.update(feature_view.entities) elif isinstance(feature_view, OnDemandFeatureView): for backing_fv in feature_view.inputs.values(): if isinstance(backing_fv, FeatureView): entities.update(backing_fv.entities) table.append([ feature_view.name, entities if len(entities) > 0 else "n/a", type(feature_view).__name__, ]) from tabulate import tabulate print( tabulate(table, headers=["NAME", "ENTITIES", "TYPE"], tablefmt="plain"))
def run_offline_online_store_consistency_test( fs: FeatureStore, fv: FeatureView ) -> None: now = datetime.utcnow() # Run materialize() # use both tz-naive & tz-aware timestamps to test that they're both correctly handled start_date = (now - timedelta(hours=5)).replace(tzinfo=utc) end_date = now - timedelta(hours=2) fs.materialize(feature_views=[fv.name], start_date=start_date, end_date=end_date) # check result of materialize() check_offline_and_online_features( fs=fs, fv=fv, driver_id=1, event_timestamp=end_date, expected_value=0.3 ) check_offline_and_online_features( fs=fs, fv=fv, driver_id=2, event_timestamp=end_date, expected_value=None ) # check prior value for materialize_incremental() check_offline_and_online_features( fs=fs, fv=fv, driver_id=3, event_timestamp=end_date, expected_value=4 ) # run materialize_incremental() fs.materialize_incremental(feature_views=[fv.name], end_date=now) # check result of materialize_incremental() check_offline_and_online_features( fs=fs, fv=fv, driver_id=3, event_timestamp=now, expected_value=5 )
def prep_local_fs_and_fv() -> Iterator[Tuple[FeatureStore, FeatureView]]: with tempfile.NamedTemporaryFile(suffix=".parquet") as f: df = create_dataset() f.close() df.to_parquet(f.name) file_source = FileSource( file_format=ParquetFormat(), file_url=f"file://{f.name}", event_timestamp_column="ts", created_timestamp_column="created_ts", date_partition_column="", field_mapping={ "ts_1": "ts", "id": "driver_id" }, ) fv = get_feature_view(file_source) with tempfile.TemporaryDirectory( ) as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name: config = RepoConfig( registry=str(Path(repo_dir_name) / "registry.db"), project= f"test_bq_correctness_{str(uuid.uuid4()).replace('-', '')}", provider="local", online_store=OnlineStoreConfig(local=LocalOnlineStoreConfig( path=str(Path(data_dir_name) / "online_store.db"))), ) fs = FeatureStore(config=config) fs.apply([fv]) yield fs, fv
def check_offline_and_online_features( fs: FeatureStore, fv: FeatureView, driver_id: int, event_timestamp: datetime, expected_value: Optional[float], ) -> None: # Check online store response_dict = fs.get_online_features( [f"{fv.name}:value"], [{"driver": driver_id}] ).to_dict() if expected_value: assert abs(response_dict[f"{fv.name}__value"][0] - expected_value) < 1e-6 else: assert response_dict[f"{fv.name}__value"][0] is None # Check offline store df = fs.get_historical_features( entity_df=pd.DataFrame.from_dict( {"driver_id": [driver_id], "event_timestamp": [event_timestamp]} ), feature_refs=[f"{fv.name}:value"], ).to_df() if expected_value: assert abs(df.to_dict()[f"{fv.name}__value"][0] - expected_value) < 1e-6 else: df = df.where(pd.notnull(df), None) assert df.to_dict()[f"{fv.name}__value"][0] is None
def prep_dynamodb_fs_and_fv() -> Iterator[Tuple[FeatureStore, FeatureView]]: with tempfile.NamedTemporaryFile(suffix=".parquet") as f: df = create_dataset() f.close() df.to_parquet(f.name) file_source = FileSource( file_format=ParquetFormat(), file_url=f"file://{f.name}", event_timestamp_column="ts", created_timestamp_column="created_ts", date_partition_column="", field_mapping={"ts_1": "ts", "id": "driver_id"}, ) fv = get_feature_view(file_source) e = Entity( name="driver", description="id for driver", join_key="driver_id", value_type=ValueType.INT32, ) with tempfile.TemporaryDirectory() as repo_dir_name: config = RepoConfig( registry=str(Path(repo_dir_name) / "registry.db"), project=f"test_bq_correctness_{str(uuid.uuid4()).replace('-', '')}", provider="aws", online_store=DynamoDBOnlineStoreConfig(region="us-west-2"), offline_store=FileOfflineStoreConfig(), ) fs = FeatureStore(config=config) fs.apply([fv, e]) yield fs, fv
def test_non_local_feature_repo() -> None: """ Test running apply on a sample repo, and make sure the infra gets created. """ runner = CliRunner() with tempfile.TemporaryDirectory() as repo_dir_name: # Construct an example repo in a temporary dir repo_path = Path(repo_dir_name) repo_config = repo_path / "feature_store.yaml" repo_config.write_text( dedent(""" project: foo registry: data/registry.db provider: local online_store: path: data/online_store.db """)) repo_example = repo_path / "example.py" repo_example.write_text( (Path(__file__).parent / "example_feature_repo_1.py").read_text()) result = runner.run(["apply"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) fs = FeatureStore(repo_path=str(repo_path)) assertpy.assert_that(fs.list_feature_views()).is_length(3) result = runner.run(["teardown"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0)
def test_usage_on(dummy_exporter, enabling_toggle): _reload_feast() from feast.feature_store import FeatureStore with tempfile.TemporaryDirectory() as temp_dir: test_feature_store = FeatureStore(config=RepoConfig( registry=os.path.join(temp_dir, "registry.db"), project="fake_project", provider="local", online_store=SqliteOnlineStoreConfig( path=os.path.join(temp_dir, "online.db")), )) entity = Entity( name="driver_car_id", description="Car driver id", value_type=ValueType.STRING, tags={"team": "matchmaking"}, ) test_feature_store.apply([entity]) assert len(dummy_exporter) == 3 assert { "entrypoint": "feast.infra.local.LocalRegistryStore.get_registry_proto" }.items() <= dummy_exporter[0].items() assert { "entrypoint": "feast.infra.local.LocalRegistryStore.update_registry_proto" }.items() <= dummy_exporter[1].items() assert { "entrypoint": "feast.feature_store.FeatureStore.apply" }.items() <= dummy_exporter[2].items()
def serve_command(ctx: click.Context, host: str, port: int, no_access_log: bool): """Start a feature server locally on a given port.""" repo = ctx.obj["CHDIR"] cli_check_repo(repo) store = FeatureStore(repo_path=str(repo)) store.serve(host, port, no_access_log)
def prep_bq_fs_and_fv( bq_source_type: str, ) -> Iterator[Tuple[FeatureStore, FeatureView]]: client = bigquery.Client() gcp_project = client.project bigquery_dataset = "test_ingestion" dataset = bigquery.Dataset(f"{gcp_project}.{bigquery_dataset}") client.create_dataset(dataset, exists_ok=True) dataset.default_table_expiration_ms = (1000 * 60 * 60 * 24 * 14 ) # 2 weeks in milliseconds client.update_dataset(dataset, ["default_table_expiration_ms"]) df = create_dataset() job_config = bigquery.LoadJobConfig() table_ref = f"{gcp_project}.{bigquery_dataset}.{bq_source_type}_correctness_{int(time.time())}" query = f"SELECT * FROM `{table_ref}`" job = client.load_table_from_dataframe(df, table_ref, job_config=job_config) job.result() bigquery_source = BigQuerySource( table_ref=table_ref if bq_source_type == "table" else None, query=query if bq_source_type == "query" else None, event_timestamp_column="ts", created_timestamp_column="created_ts", date_partition_column="", field_mapping={ "ts_1": "ts", "id": "driver_id" }, ) fv = get_feature_view(bigquery_source) e = Entity( name="driver", description="id for driver", join_key="driver_id", value_type=ValueType.INT32, ) with tempfile.TemporaryDirectory() as repo_dir_name: config = RepoConfig( registry=str(Path(repo_dir_name) / "registry.db"), project=f"test_bq_correctness_{str(uuid.uuid4()).replace('-', '')}", provider="gcp", online_store=DatastoreOnlineStoreConfig( namespace="integration_test"), ) fs = FeatureStore(config=config) fs.apply([fv, e]) yield fs, fv
def feature_view_list(): """ List all feature views """ cli_check_repo(Path.cwd()) store = FeatureStore(repo_path=str(Path.cwd())) table = [] for feature_view in store.list_feature_views(): table.append([feature_view.name, feature_view.entities]) from tabulate import tabulate print(tabulate(table, headers=["NAME", "ENTITIES"], tablefmt="plain"))
def check_offline_and_online_features( fs: FeatureStore, fv: FeatureView, driver_id: int, event_timestamp: datetime, expected_value: Optional[float], full_feature_names: bool, ) -> None: # Check online store response_dict = fs.get_online_features( [f"{fv.name}:value"], [{ "driver": driver_id }], full_feature_names=full_feature_names, ).to_dict() if full_feature_names: if expected_value: assert abs(response_dict[f"{fv.name}__value"][0] - expected_value) < 1e-6 else: assert response_dict[f"{fv.name}__value"][0] is None else: if expected_value: assert abs(response_dict["value"][0] - expected_value) < 1e-6 else: assert response_dict["value"][0] is None # Check offline store df = fs.get_historical_features( entity_df=pd.DataFrame.from_dict({ "driver_id": [driver_id], "event_timestamp": [event_timestamp] }), features=[f"{fv.name}:value"], full_feature_names=full_feature_names, ).to_df() if full_feature_names: if expected_value: assert abs(df.to_dict()[f"{fv.name}__value"][0] - expected_value) < 1e-6 else: assert math.isnan(df.to_dict()[f"{fv.name}__value"][0]) else: if expected_value: assert abs(df.to_dict()["value"][0] - expected_value) < 1e-6 else: assert math.isnan(df.to_dict()["value"][0])
def endpoint(ctx: click.Context): """ Display feature server endpoints. """ repo = ctx.obj["CHDIR"] cli_check_repo(repo) store = FeatureStore(repo_path=str(repo)) endpoint = store.get_feature_server_endpoint() if endpoint is not None: _logger.info( f"Feature server endpoint: {Style.BRIGHT + Fore.GREEN}{endpoint}{Style.RESET_ALL}" ) else: _logger.info("There is no active feature server.")
def on_demand_feature_view_list(ctx: click.Context): """ [Experimental] List all on demand feature views """ repo = ctx.obj["CHDIR"] cli_check_repo(repo) store = FeatureStore(repo_path=str(repo)) table = [] for on_demand_feature_view in store.list_on_demand_feature_views(): table.append([on_demand_feature_view.name]) from tabulate import tabulate print(tabulate(table, headers=["NAME"], tablefmt="plain"))
def feature_view_list(ctx: click.Context): """ List all feature views """ repo = ctx.obj["CHDIR"] cli_check_repo(repo) store = FeatureStore(repo_path=str(repo)) table = [] for feature_view in store.list_feature_views(): table.append([feature_view.name, feature_view.entities]) from tabulate import tabulate print(tabulate(table, headers=["NAME", "ENTITIES"], tablefmt="plain"))
def materialize_command(repo_path: str, start_ts: str, end_ts: str, views: List[str]): """ Run a (non-incremental) materialization job to ingest data into the online store. Feast will read all data between START_TS and END_TS from the offline store and write it to the online store. If you don't specify feature view names using --views, all registred Feature Views will be materialized. START_TS and END_TS should be in ISO 8601 format, e.g. '2021-07-16T19:20:01' """ store = FeatureStore(repo_path=repo_path) store.materialize( feature_views=None if not views else views, start_date=datetime.fromisoformat(start_ts).replace(tzinfo=utc), end_date=datetime.fromisoformat(end_ts).replace(tzinfo=utc), )
def materialize_incremental_command(end_ts: str, views: List[str]): """ Run an incremental materialization job to ingest new data into the online store. Feast will read all data from the previously ingested point to END_TS from the offline store and write it to the online store. If you don't specify feature view names using --views, all registered Feature Views will be incrementally materialized. END_TS should be in ISO 8601 format, e.g. '2021-07-16T19:20:01' """ cli_check_repo(Path.cwd()) store = FeatureStore(repo_path=str(Path.cwd())) store.materialize_incremental( feature_views=None if not views else views, end_date=datetime.fromisoformat(end_ts), )
def apply_total_with_repo_instance( store: FeatureStore, project: str, registry: Registry, repo: RepoContents, skip_source_validation: bool, ): if not skip_source_validation: data_sources = [t.batch_source for t in repo.feature_views] # Make sure the data source used by this feature view is supported by Feast for data_source in data_sources: data_source.validate(store.config) # For each object in the registry, determine whether it should be kept or deleted. ( all_to_apply, all_to_delete, views_to_delete, views_to_keep, ) = extract_objects_for_apply_delete(project, registry, repo) diff = store.apply(all_to_apply, objects_to_delete=all_to_delete, partial=False) log_cli_output(diff, views_to_delete, views_to_keep)
def entity_list(): """ List all entities """ cli_check_repo(Path.cwd()) store = FeatureStore(repo_path=str(Path.cwd())) table = [] for entity in store.list_entities(): table.append([entity.name, entity.description, entity.value_type]) from tabulate import tabulate print( tabulate(table, headers=["NAME", "DESCRIPTION", "TYPE"], tablefmt="plain"))
def feature_view_describe(name: str): """ Describe a feature view """ cli_check_repo(Path.cwd()) store = FeatureStore(repo_path=str(Path.cwd())) try: feature_view = store.get_feature_view(name) except FeastObjectNotFoundException as e: print(e) exit(1) print( yaml.dump(yaml.safe_load(str(feature_view)), default_flow_style=False, sort_keys=False))
def entity_describe(name: str): """ Describe an entity """ cli_check_repo(Path.cwd()) store = FeatureStore(repo_path=str(Path.cwd())) try: entity = store.get_entity(name) except FeastObjectNotFoundException as e: print(e) exit(1) print( yaml.dump(yaml.safe_load(str(entity)), default_flow_style=False, sort_keys=False))
def entity_list(ctx: click.Context): """ List all entities """ repo = ctx.obj["CHDIR"] cli_check_repo(repo) store = FeatureStore(repo_path=str(repo)) table = [] for entity in store.list_entities(): table.append([entity.name, entity.description, entity.value_type]) from tabulate import tabulate print( tabulate(table, headers=["NAME", "DESCRIPTION", "TYPE"], tablefmt="plain"))
def run_materialization_test(fs: FeatureStore, fv: FeatureView) -> None: now = datetime.utcnow() # Run materialize() # use both tz-naive & tz-aware timestamps to test that they're both correctly handled start_date = (now - timedelta(hours=5)).replace(tzinfo=utc) end_date = now - timedelta(hours=2) fs.materialize([fv.name], start_date, end_date) # check result of materialize() response_dict = fs.get_online_features([f"{fv.name}:value"], [{ "driver_id": 1 }]).to_dict() assert abs(response_dict[f"{fv.name}__value"][0] - 0.3) < 1e-6 # check prior value for materialize_incremental() response_dict = fs.get_online_features([f"{fv.name}:value"], [{ "driver_id": 3 }]).to_dict() assert abs(response_dict[f"{fv.name}__value"][0] - 4) < 1e-6 # run materialize_incremental() fs.materialize_incremental( [fv.name], now - timedelta(seconds=0), ) # check result of materialize_incremental() response_dict = fs.get_online_features([f"{fv.name}:value"], [{ "driver_id": 3 }]).to_dict() assert abs(response_dict[f"{fv.name}__value"][0] - 5) < 1e-6
def feature_store_with_local_registry(): fd, registry_path = mkstemp() fd, online_store_path = mkstemp() return FeatureStore(config=RepoConfig( registry=registry_path, project="default", provider="local", online_store=SqliteOnlineStoreConfig(path=online_store_path), ))
def feature_store_with_s3_registry(): return FeatureStore(config=RepoConfig( registry= f"s3://feast-integration-tests/registries/{int(time.time() * 1000)}/registry.db", project="default", provider="aws", online_store=DynamoDBOnlineStoreConfig(region="us-west-2"), offline_store=FileOfflineStoreConfig(), ))
def entity_describe(ctx: click.Context, name: str): """ Describe an entity """ repo = ctx.obj["CHDIR"] cli_check_repo(repo) store = FeatureStore(repo_path=str(repo)) try: entity = store.get_entity(name) except FeastObjectNotFoundException as e: print(e) exit(1) print( yaml.dump(yaml.safe_load(str(entity)), default_flow_style=False, sort_keys=False))
def materialize_command(ctx: click.Context, start_ts: str, end_ts: str, views: List[str]): """ Run a (non-incremental) materialization job to ingest data into the online store. Feast will read all data between START_TS and END_TS from the offline store and write it to the online store. If you don't specify feature view names using --views, all registered Feature Views will be materialized. START_TS and END_TS should be in ISO 8601 format, e.g. '2021-07-16T19:20:01' """ repo = ctx.obj["CHDIR"] cli_check_repo(repo) store = FeatureStore(repo_path=str(repo)) store.materialize( feature_views=None if not views else views, start_date=utils.make_tzaware(datetime.fromisoformat(start_ts)), end_date=utils.make_tzaware(datetime.fromisoformat(end_ts)), )
def test_apply_remote_repo(): fd, registry_path = mkstemp() fd, online_store_path = mkstemp() return FeatureStore(config=RepoConfig( registry=registry_path, project="default", provider="local", online_store=SqliteOnlineStoreConfig(path=online_store_path), ))
def data_source_list(ctx: click.Context): """ List all data sources """ repo = ctx.obj["CHDIR"] cli_check_repo(repo) store = FeatureStore(repo_path=str(repo)) table = [] for datasource in store.list_data_sources(): table.append([datasource.name, datasource.__class__]) from tabulate import tabulate warnings.warn( "Listing data sources will only work properly if all data sources have names or table names specified. " "Starting Feast 0.21, data source unique names will be required to encourage data source discovery", RuntimeWarning, ) print(tabulate(table, headers=["NAME", "CLASS"], tablefmt="plain"))
def feature_store_with_local_registry(self): fd, registry_path = mkstemp() fd, online_store_path = mkstemp() return FeatureStore(config=RepoConfig( metadata_store=registry_path, project="default", provider="local", online_store=OnlineStoreConfig(local=LocalOnlineStoreConfig( path=online_store_path)), ))