def build_feature_store(s3_bucket: str, registry_path: str, online_store_path: str) -> FeatureStore: feature_store_config = FeatureStoreConfig( project="horsecolic", s3_bucket=s3_bucket, registry_path=registry_path, online_store_path=online_store_path, ) return FeatureStore(config=feature_store_config)
def main(): pd.set_option("display.max_columns", None) pd.set_option("display.width", 1000) # Load the feature store from the current path fs = FeatureStore(repo_path=".") # Deploy the feature store to GCP print("Deploying feature store to GCP...") fs.apply([driver, driver_stats_fv]) # Select features feature_refs = ["driver_hourly_stats:conv_rate", "driver_hourly_stats:acc_rate"] # Create an entity dataframe. This is the dataframe that will be enriched with historical features entity_df = pd.DataFrame( { "event_timestamp": [ pd.Timestamp(dt, unit="ms", tz="UTC").round("ms") for dt in pd.date_range( start=datetime.now() - timedelta(days=3), end=datetime.now(), periods=3, ) ], "driver_id": [1001, 1002, 1003], } ) print("Retrieving training data...") # Retrieve historical features by joining the entity dataframe to the BigQuery table source training_df = fs.get_historical_features( feature_refs=feature_refs, entity_df=entity_df ).to_df() print() print(training_df) print() print("Loading features into the online store...") fs.materialize_incremental(end_date=datetime.now()) print() print("Retrieving online features...") # Retrieve features from the online store (Firestore) online_features = fs.get_online_features( feature_refs=feature_refs, entity_rows=[{"driver_id": 1001}, {"driver_id": 1002}], ).to_dict() print() print(pd.DataFrame.from_dict(online_features))
def generate_saved_dataset(): store = FeatureStore(repo_path=".") entity_df = pd.read_parquet(path="data/loan_table.parquet") fs = store.get_feature_service("credit_score_v1") job = store.get_historical_features(entity_df=entity_df, features=fs,) store.create_saved_dataset( from_=job, name="my_training_ds", storage=SavedDatasetFileStorage(path="my_training_ds.parquet"), feature_service=fs, profiler=credit_profiler, )
def test_exception_usage_on(): old_environ = dict(os.environ) test_usage_id = str(uuid.uuid4()) os.environ["FEAST_FORCE_USAGE_UUID"] = test_usage_id os.environ["FEAST_IS_USAGE_TEST"] = "True" os.environ["FEAST_USAGE"] = "True" try: test_feature_store = FeatureStore("/tmp/non_existent_directory") except: pass os.environ.clear() os.environ.update(old_environ) ensure_bigquery_usage_id_with_retry(test_usage_id)
def run_demo(): store = FeatureStore(repo_path=".") print("--- Historical features (from saved dataset) ---") ds = store.get_saved_dataset("my_training_ds") print(ds.to_df()) print("\n--- Online features ---") features = store.get_online_features( features=store.get_feature_service("credit_score_v3"), entity_rows=[ {"zipcode": 30721, "dob_ssn": "19530219_5179", "transaction_amt": 1023} ], ).to_dict() for key, value in sorted(features.items()): print(key, " : ", value)
def test_exception_usage_off(): old_environ = dict(os.environ) test_usage_id = str(uuid.uuid4()) os.environ["FEAST_IS_USAGE_TEST"] = "True" os.environ["FEAST_USAGE"] = "False" os.environ["FEAST_FORCE_USAGE_UUID"] = test_usage_id try: test_feature_store = FeatureStore("/tmp/non_existent_directory") except: pass os.environ.clear() os.environ.update(old_environ) sleep(30) rows = read_bigquery_usage_id(test_usage_id) assert rows.total_rows == 0
def get_historical_features(): """Retrieve historical features for training.""" # Entities to pull data for (should dynamically read this from somewhere) project_ids = [1, 2, 3] now = datetime.now() timestamps = [datetime(now.year, now.month, now.day)] * len(project_ids) entity_df = pd.DataFrame.from_dict({"id": project_ids, "event_timestamp": timestamps}) # Get historical features store = FeatureStore(repo_path=Path(config.BASE_DIR, "features")) training_df = store.get_historical_features( entity_df=entity_df, feature_refs=["project_details:text", "project_details:tags"], ).to_df() # Store in location for training task to pick up print(training_df.head())
def test_telemetry_on(): old_environ = dict(os.environ) test_telemetry_id = str(uuid.uuid4()) os.environ["FEAST_FORCE_TELEMETRY_UUID"] = test_telemetry_id os.environ["FEAST_IS_TELEMETRY_TEST"] = "True" os.environ["FEAST_TELEMETRY"] = "True" test_feature_store = FeatureStore() entity = Entity( name="driver_car_id", description="Car driver id", value_type=ValueType.STRING, labels={"team": "matchmaking"}, ) test_feature_store.apply([entity]) os.environ.clear() os.environ.update(old_environ) ensure_bigquery_telemetry_id_with_retry(test_telemetry_id)
def test_telemetry_off(): old_environ = dict(os.environ) test_telemetry_id = str(uuid.uuid4()) os.environ["FEAST_IS_TELEMETRY_TEST"] = "True" os.environ["FEAST_TELEMETRY"] = "False" os.environ["FEAST_FORCE_TELEMETRY_UUID"] = test_telemetry_id test_feature_store = FeatureStore() entity = Entity( name="driver_car_id", description="Car driver id", value_type=ValueType.STRING, labels={"team": "matchmaking"}, ) test_feature_store.apply([entity]) os.environ.clear() os.environ.update(old_environ) sleep(30) rows = read_bigquery_telemetry_id(test_telemetry_id) assert rows.total_rows == 0
def setup_feature_store(): """Prepares the local environment for a FeatureStore docstring test.""" from datetime import datetime, timedelta from feast import Entity, FeatureStore, FeatureView, Field, FileSource, ValueType from feast.repo_operations import init_repo from feast.types import Float32, Int64 init_repo("feature_repo", "local") fs = FeatureStore(repo_path="feature_repo") driver = Entity( name="driver_id", value_type=ValueType.INT64, description="driver id", ) driver_hourly_stats = FileSource( path="feature_repo/data/driver_stats.parquet", timestamp_field="event_timestamp", created_timestamp_column="created", ) driver_hourly_stats_view = FeatureView( name="driver_hourly_stats", entities=["driver_id"], ttl=timedelta(seconds=86400 * 1), schema=[ Field(name="conv_rate", dtype=Float32), Field(name="acc_rate", dtype=Float32), Field(name="avg_daily_trips", dtype=Int64), ], batch_source=driver_hourly_stats, ) fs.apply([driver_hourly_stats_view, driver]) fs.materialize( start_date=datetime.utcnow() - timedelta(hours=3), end_date=datetime.utcnow() - timedelta(minutes=10), )
def construct_test_environment( test_repo_config: IntegrationTestRepoConfig, test_suite_name: str = "integration_test", ) -> Environment: project = f"{test_suite_name}_{str(uuid.uuid4()).replace('-', '')[:8]}" offline_creator: DataSourceCreator = test_repo_config.offline_store_creator( project) offline_store_config = offline_creator.create_offline_store_config() online_store = test_repo_config.online_store with tempfile.TemporaryDirectory() as repo_dir_name: config = RepoConfig( registry=str(Path(repo_dir_name) / "registry.db"), project=project, provider=test_repo_config.provider, offline_store=offline_store_config, online_store=online_store, repo_path=repo_dir_name, ) fs = FeatureStore(config=config) # We need to initialize the registry, because if nothing is applied in the test before tearing down # the feature store, that will cause the teardown method to blow up. fs.registry._initialize_registry() environment = Environment( name=project, test_repo_config=test_repo_config, feature_store=fs, data_source_creator=offline_creator, ) try: yield environment finally: fs.teardown()
from datetime import datetime, timedelta import pandas as pd from feast import FeatureStore from joblib import dump from sklearn.linear_model import LinearRegression import helpers # Load driver order data orders = pd.read_csv("driver_orders.csv", sep="\t") orders["event_timestamp"] = pd.to_datetime(orders["event_timestamp"]) # Set up feature store fs = FeatureStore(repo_path="driver_ranking/") # Retrieve training data from BigQuery training_df = fs.get_historical_features( entity_df=orders, feature_refs=[ "driver_hourly_stats:conv_rate", "driver_hourly_stats:acc_rate", "driver_hourly_stats:avg_daily_trips", ], ).to_df() # # Print output # print(training_df) # Train model target = "trip_completed"
def test_ge(): store = FeatureStore(repo_path=".") print("--- Historical features (from saved dataset) ---") ds = store.get_saved_dataset("my_training_ds") print(ds._profile)
def test_online() -> None: """ Test reading from the online store in local mode. """ runner = CliRunner() with runner.local_repo( get_example_repo("example_feature_repo_1.py")) as store: # Write some data to two tables driver_locations_fv = store.get_feature_view(name="driver_locations") customer_profile_fv = store.get_feature_view(name="customer_profile") customer_driver_combined_fv = store.get_feature_view( name="customer_driver_combined") provider = store._get_provider() driver_key = EntityKeyProto(join_keys=["driver"], entity_values=[ValueProto(int64_val=1)]) provider.online_write_batch( project=store.config.project, table=driver_locations_fv, data=[( driver_key, { "lat": ValueProto(double_val=0.1), "lon": ValueProto(string_val="1.0"), }, datetime.utcnow(), datetime.utcnow(), )], progress=None, ) customer_key = EntityKeyProto(join_keys=["customer"], entity_values=[ValueProto(int64_val=5)]) provider.online_write_batch( project=store.config.project, table=customer_profile_fv, data=[( customer_key, { "avg_orders_day": ValueProto(float_val=1.0), "name": ValueProto(string_val="John"), "age": ValueProto(int64_val=3), }, datetime.utcnow(), datetime.utcnow(), )], progress=None, ) customer_key = EntityKeyProto( join_keys=["customer", "driver"], entity_values=[ValueProto(int64_val=5), ValueProto(int64_val=1)], ) provider.online_write_batch( project=store.config.project, table=customer_driver_combined_fv, data=[( customer_key, { "trips": ValueProto(int64_val=7) }, datetime.utcnow(), datetime.utcnow(), )], progress=None, ) # Retrieve two features using two keys, one valid one non-existing result = store.get_online_features( feature_refs=[ "driver_locations:lon", "customer_profile:avg_orders_day", "customer_profile:name", "customer_driver_combined:trips", ], entity_rows=[{ "driver": 1, "customer": 5 }, { "driver": 1, "customer": 5 }], ).to_dict() assert "driver_locations__lon" in result assert "customer_profile__avg_orders_day" in result assert "customer_profile__name" in result assert result["driver"] == [1, 1] assert result["customer"] == [5, 5] assert result["driver_locations__lon"] == ["1.0", "1.0"] assert result["customer_profile__avg_orders_day"] == [1.0, 1.0] assert result["customer_profile__name"] == ["John", "John"] assert result["customer_driver_combined__trips"] == [7, 7] # Ensure features are still in result when keys not found result = store.get_online_features( feature_refs=["customer_driver_combined:trips"], entity_rows=[{ "driver": 0, "customer": 0 }], ).to_dict() assert "customer_driver_combined__trips" in result # invalid table reference with pytest.raises(FeatureViewNotFoundException): store.get_online_features( feature_refs=["driver_locations_bad:lon"], entity_rows=[{ "driver": 1 }], ) # Create new FeatureStore object with fast cache invalidation cache_ttl = 1 fs_fast_ttl = FeatureStore(config=RepoConfig( registry=RegistryConfig(path=store.config.registry, cache_ttl_seconds=cache_ttl), online_store=store.config.online_store, project=store.config.project, provider=store.config.provider, )) # Should download the registry and cache it permanently (or until manually refreshed) result = fs_fast_ttl.get_online_features( feature_refs=[ "driver_locations:lon", "customer_profile:avg_orders_day", "customer_profile:name", "customer_driver_combined:trips", ], entity_rows=[{ "driver": 1, "customer": 5 }], ).to_dict() assert result["driver_locations__lon"] == ["1.0"] assert result["customer_driver_combined__trips"] == [7] # Rename the registry.db so that it cant be used for refreshes os.rename(store.config.registry, store.config.registry + "_fake") # Wait for registry to expire time.sleep(cache_ttl) # Will try to reload registry because it has expired (it will fail because we deleted the actual registry file) with pytest.raises(FileNotFoundError): fs_fast_ttl.get_online_features( feature_refs=[ "driver_locations:lon", "customer_profile:avg_orders_day", "customer_profile:name", "customer_driver_combined:trips", ], entity_rows=[{ "driver": 1, "customer": 5 }], ).to_dict() # Restore registry.db so that we can see if it actually reloads registry os.rename(store.config.registry + "_fake", store.config.registry) # Test if registry is actually reloaded and whether results return result = fs_fast_ttl.get_online_features( feature_refs=[ "driver_locations:lon", "customer_profile:avg_orders_day", "customer_profile:name", "customer_driver_combined:trips", ], entity_rows=[{ "driver": 1, "customer": 5 }], ).to_dict() assert result["driver_locations__lon"] == ["1.0"] assert result["customer_driver_combined__trips"] == [7] # Create a registry with infinite cache (for users that want to manually refresh the registry) fs_infinite_ttl = FeatureStore(config=RepoConfig( registry=RegistryConfig(path=store.config.registry, cache_ttl_seconds=0), online_store=store.config.online_store, project=store.config.project, provider=store.config.provider, )) # Should return results (and fill the registry cache) result = fs_infinite_ttl.get_online_features( feature_refs=[ "driver_locations:lon", "customer_profile:avg_orders_day", "customer_profile:name", "customer_driver_combined:trips", ], entity_rows=[{ "driver": 1, "customer": 5 }], ).to_dict() assert result["driver_locations__lon"] == ["1.0"] assert result["customer_driver_combined__trips"] == [7] # Wait a bit so that an arbitrary TTL would take effect time.sleep(2) # Rename the registry.db so that it cant be used for refreshes os.rename(store.config.registry, store.config.registry + "_fake") # TTL is infinite so this method should use registry cache result = fs_infinite_ttl.get_online_features( feature_refs=[ "driver_locations:lon", "customer_profile:avg_orders_day", "customer_profile:name", "customer_driver_combined:trips", ], entity_rows=[{ "driver": 1, "customer": 5 }], ).to_dict() assert result["driver_locations__lon"] == ["1.0"] assert result["customer_driver_combined__trips"] == [7] # Force registry reload (should fail because file is missing) with pytest.raises(FileNotFoundError): fs_infinite_ttl.refresh_registry() # Restore registry.db so that teardown works os.rename(store.config.registry + "_fake", store.config.registry)
raw_config = yaml.safe_load(config_string) registry = raw_config["registry"] registry_path = registry["path"] if isinstance(registry, dict) else registry registry_store_class = get_registry_store_class_from_scheme(registry_path) if registry_store_class == LocalRegistryStore and not os.path.exists( registry_path): registry_base64 = os.environ[REGISTRY_ENV_NAME] registry_bytes = base64.b64decode(registry_base64) registry_dir = os.path.dirname(registry_path) if not os.path.exists(repo_path / registry_dir): os.makedirs(repo_path / registry_dir) with open(repo_path / registry_path, "wb") as f: f.write(registry_bytes) # Initialize the feature store store = FeatureStore(repo_path=str(repo_path.resolve())) if isinstance(registry, dict) and registry.get("cache_ttl_seconds", 0) > 0: # disable synchronous refresh store.config.registry.cache_ttl_seconds = 0 # enable asynchronous refresh def async_refresh(): store.refresh_registry() threading.Timer(registry["cache_ttl_seconds"], async_refresh).start() async_refresh() # Start the feature transformation server port = (os.environ.get(FEATURE_TRANSFORMATION_SERVER_PORT_ENV_NAME) or DEFAULT_FEATURE_TRANSFORMATION_SERVER_PORT)
def __init__(self): # Load model self.model = load("driver_model.bin") # Set up feature store self.fs = FeatureStore(repo_path="driver_ranking/")
def construct_test_environment( test_repo_config: IntegrationTestRepoConfig, test_suite_name: str = "integration_test", worker_id: str = "worker_id", offline_container: Optional[DockerContainer] = None, ) -> Environment: _uuid = str(uuid.uuid4()).replace("-", "")[:6] run_id = os.getenv("GITHUB_RUN_ID", default=None) run_id = f"gh_run_{run_id}_{_uuid}" if run_id else _uuid run_num = os.getenv("GITHUB_RUN_NUMBER", default=1) project = f"{test_suite_name}_{run_id}_{run_num}" offline_creator: DataSourceCreator = test_repo_config.offline_store_creator( project, offline_container=offline_container) offline_store_config = offline_creator.create_offline_store_config() if test_repo_config.online_store_creator: online_creator = test_repo_config.online_store_creator(project) online_store = (test_repo_config.online_store ) = online_creator.create_online_store() else: online_creator = None online_store = test_repo_config.online_store repo_dir_name = tempfile.mkdtemp() if test_repo_config.python_feature_server and test_repo_config.provider == "aws": from feast.infra.feature_servers.aws_lambda.config import ( AwsLambdaFeatureServerConfig, ) feature_server = AwsLambdaFeatureServerConfig( enabled=True, execution_role_name= "arn:aws:iam::402087665549:role/lambda_execution_role", ) registry = ( f"s3://feast-integration-tests/registries/{project}/registry.db" ) # type: Union[str, RegistryConfig] else: # Note: even if it's a local feature server, the repo config does not have this configured feature_server = None registry = RegistryConfig( path=str(Path(repo_dir_name) / "registry.db"), cache_ttl_seconds=1, ) config = RepoConfig( registry=registry, project=project, provider=test_repo_config.provider, offline_store=offline_store_config, online_store=online_store, repo_path=repo_dir_name, feature_server=feature_server, go_feature_retrieval=test_repo_config.go_feature_retrieval, ) # Create feature_store.yaml out of the config with open(Path(repo_dir_name) / "feature_store.yaml", "w") as f: yaml.safe_dump(json.loads(config.json()), f) fs = FeatureStore(repo_dir_name) # We need to initialize the registry, because if nothing is applied in the test before tearing down # the feature store, that will cause the teardown method to blow up. fs.registry._initialize_registry() environment = Environment( name=project, test_repo_config=test_repo_config, feature_store=fs, data_source_creator=offline_creator, python_feature_server=test_repo_config.python_feature_server, worker_id=worker_id, online_store_creator=online_creator, ) return environment
) benchmark_feature_views = [ FeatureView( name=f"feature_view_{i}", entities=["entity"], ttl=Duration(seconds=86400), features=[ Feature(name=f"feature_{10 * i + j}", dtype=ValueType.INT64) for j in range(10) ], online=True, batch_source=generated_data_source, ) for i in range(25) ] benchmark_feature_service = FeatureService( name=f"benchmark_feature_service", features=benchmark_feature_views, ) fs = FeatureStore(".") fs.apply([ driver_hourly_stats_view, driver, entity, benchmark_feature_service, *benchmark_feature_views ]) now = datetime.now() fs.materialize(start, now) print("Materialization finished")
import time from feast import FeatureStore, ValueType import pandas as pd from datetime import datetime store = FeatureStore(repo_path="feast_repo") startTime = time.time() entity_df = pd.DataFrame.from_dict({ "user_id": [1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008], "event_timestamp": [ datetime(2021, 4, 21, 17, 58, 9), datetime(2021, 4, 21, 17, 58, 9), datetime(2021, 4, 21, 17, 58, 9), datetime(2021, 4, 21, 17, 58, 9), datetime(2021, 4, 21, 17, 58, 9), datetime(2021, 4, 21, 17, 58, 9), datetime(2021, 4, 21, 17, 58, 9), datetime(2021, 4, 21, 17, 58, 9), ] }) training_df = store.get_historical_features( entity_df=entity_df, feature_refs=[ 'driver_hourly_stats:daily_transactions', 'driver_hourly_stats:total_transactions', ], ).to_df() print(training_df) executionTime = (time.time() - startTime) print('Execution time in seconds: ' + str(executionTime))
def start_test_local_server(repo_path: str, port: int): fs = FeatureStore(repo_path) fs.serve("localhost", port, no_access_log=True)
def test_universal_cli(test_repo_config) -> None: project = f"test_universal_cli_{str(uuid.uuid4()).replace('-', '')[:8]}" runner = CliRunner() with tempfile.TemporaryDirectory() as repo_dir_name: feature_store_yaml = make_feature_store_yaml(project, test_repo_config, repo_dir_name) repo_path = Path(repo_dir_name) repo_config = repo_path / "feature_store.yaml" repo_config.write_text(dedent(feature_store_yaml)) repo_example = repo_path / "example.py" repo_example.write_text(get_example_repo("example_feature_repo_1.py")) result = runner.run(["apply"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) # Store registry contents, to be compared later. fs = FeatureStore(repo_path=str(repo_path)) registry_dict = fs.registry.to_dict(project=project) # entity & feature view list commands should succeed result = runner.run(["entities", "list"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) result = runner.run(["feature-views", "list"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) result = runner.run(["feature-services", "list"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) # entity & feature view describe commands should succeed when objects exist result = runner.run(["entities", "describe", "driver"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) result = runner.run(["feature-views", "describe", "driver_locations"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) result = runner.run( ["feature-services", "describe", "driver_locations_service"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) assertpy.assert_that(fs.list_feature_views()).is_length(3) # entity & feature view describe commands should fail when objects don't exist result = runner.run(["entities", "describe", "foo"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(1) result = runner.run(["feature-views", "describe", "foo"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(1) result = runner.run(["feature-services", "describe", "foo"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(1) # Doing another apply should be a no op, and should not cause errors result = runner.run(["apply"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) basic_rw_test( FeatureStore(repo_path=str(repo_path), config=None), view_name="driver_locations", ) # Confirm that registry contents have not changed. assertpy.assert_that(registry_dict).is_equal_to( fs.registry.to_dict(project=project)) result = runner.run(["teardown"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0)
def construct_test_environment( test_repo_config: TestRepoConfig, create_and_apply: bool = False, materialize: bool = False, ) -> Environment: """ This method should take in the parameters from the test repo config and created a feature repo, apply it, and return the constructed feature store object to callers. This feature store object can be interacted for the purposes of tests. The user is *not* expected to perform any clean up actions. :param test_repo_config: configuration :return: A feature store built using the supplied configuration. """ df = create_dataset() project = f"test_correctness_{str(uuid.uuid4()).replace('-', '')[:8]}" module_name, config_class_name = test_repo_config.offline_store_creator.rsplit( ".", 1) offline_creator: DataSourceCreator = importer.get_class_from_type( module_name, config_class_name, "DataSourceCreator")(project) ds = offline_creator.create_data_source(project, df, field_mapping={ "ts_1": "ts", "id": "driver_id" }) offline_store = offline_creator.create_offline_store_config() online_store = test_repo_config.online_store with tempfile.TemporaryDirectory() as repo_dir_name: config = RepoConfig( registry=str(Path(repo_dir_name) / "registry.db"), project=project, provider=test_repo_config.provider, offline_store=offline_store, online_store=online_store, repo_path=repo_dir_name, ) fs = FeatureStore(config=config) environment = Environment( name=project, test_repo_config=test_repo_config, feature_store=fs, data_source=ds, data_source_creator=offline_creator, ) fvs = [] entities = [] try: if create_and_apply: entities.extend([driver(), customer()]) fvs.extend([ environment.driver_stats_feature_view(), environment.customer_feature_view(), ]) fs.apply(fvs + entities) if materialize: fs.materialize(environment.start_date, environment.end_date) yield environment finally: offline_creator.teardown() fs.teardown()
# Define an entity for the driver. You can think of entity as a primary key used to # fetch features. driver = Entity( name="driver_id", value_type=ValueType.INT64, description="driver id", ) # Our parquet files contain sample data that includes a driver_id column, timestamps and # three feature column. Here we define a Feature View that will allow us to serve this # data to our model online. driver_hourly_stats_view = FeatureView( name="driver_hourly_stats", entities=["driver_id"], ttl=Duration(seconds=86400 * 365), features=[ Feature(name="conv_rate", dtype=ValueType.DOUBLE), Feature(name="acc_rate", dtype=ValueType.FLOAT), Feature(name="avg_daily_trips", dtype=ValueType.INT64), ], online=True, batch_source=driver_hourly_stats, tags={}, ) fs = FeatureStore("") fs.apply([driver_hourly_stats_view, driver]) now = datetime.now() fs.materialize_incremental(now)
def test_universal_cli(environment: Environment): project = f"test_universal_cli_{str(uuid.uuid4()).replace('-', '')[:8]}" runner = CliRunner() with tempfile.TemporaryDirectory() as repo_dir_name: try: repo_path = Path(repo_dir_name) feature_store_yaml = make_feature_store_yaml( project, environment.test_repo_config, repo_path) repo_config = repo_path / "feature_store.yaml" repo_config.write_text(dedent(feature_store_yaml)) repo_example = repo_path / "example.py" repo_example.write_text( get_example_repo("example_feature_repo_1.py")) result = runner.run(["apply"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) # Store registry contents, to be compared later. fs = FeatureStore(repo_path=str(repo_path)) registry_dict = fs.registry.to_dict(project=project) # Save only the specs, not the metadata. registry_specs = { key: [fco["spec"] if "spec" in fco else fco for fco in value] for key, value in registry_dict.items() } # entity & feature view list commands should succeed result = runner.run(["entities", "list"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) result = runner.run(["feature-views", "list"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) result = runner.run(["feature-services", "list"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) result = runner.run(["data-sources", "list"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) # entity & feature view describe commands should succeed when objects exist result = runner.run(["entities", "describe", "driver"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) result = runner.run( ["feature-views", "describe", "driver_locations"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) result = runner.run( ["feature-services", "describe", "driver_locations_service"], cwd=repo_path, ) assertpy.assert_that(result.returncode).is_equal_to(0) assertpy.assert_that(fs.list_feature_views()).is_length(4) result = runner.run( ["data-sources", "describe", "customer_profile_source"], cwd=repo_path, ) assertpy.assert_that(result.returncode).is_equal_to(0) assertpy.assert_that(fs.list_data_sources()).is_length(4) # entity & feature view describe commands should fail when objects don't exist result = runner.run(["entities", "describe", "foo"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(1) result = runner.run(["feature-views", "describe", "foo"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(1) result = runner.run(["feature-services", "describe", "foo"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(1) result = runner.run(["data-sources", "describe", "foo"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(1) # Doing another apply should be a no op, and should not cause errors result = runner.run(["apply"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) basic_rw_test( FeatureStore(repo_path=str(repo_path), config=None), view_name="driver_locations", ) # Confirm that registry contents have not changed. registry_dict = fs.registry.to_dict(project=project) assertpy.assert_that(registry_specs).is_equal_to({ key: [fco["spec"] if "spec" in fco else fco for fco in value] for key, value in registry_dict.items() }) result = runner.run(["teardown"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) finally: runner.run(["teardown"], cwd=repo_path)
def __init__(self, config: FeastRepositorySourceConfig, ctx: PipelineContext): super().__init__(ctx) self.source_config = config self.report = SourceReport() self.feature_store = FeatureStore(self.source_config.path)
from feast import FeatureStore, ValueType import pandas as pd from datetime import datetime entity_df = pd.DataFrame.from_dict({ "driver_id": [1001, 1002, 1003, 1004], "event_timestamp": [ datetime(2021, 4, 12, 10, 59, 42), datetime(2021, 4, 12, 8, 12, 10), datetime(2021, 4, 12, 16, 40, 26), datetime(2021, 4, 12, 15, 1 , 12) ] }) store = FeatureStore(repo_path="feast_repo") training_df = store.get_historical_features( entity_df=entity_df, feature_refs = [ 'driver_hourly_stats:conv_rate', 'driver_hourly_stats:acc_rate', 'driver_hourly_stats:avg_daily_trips' ], ).to_df() print(training_df.head()) # another feature store store = FeatureStore(repo_path="feature_transaction")