def test_repo_init_with_underscore_in_project_name() -> None: """ Test `feast init` with underscore in the project name """ with tempfile.TemporaryDirectory() as temp_dir: temp_path = Path(temp_dir) runner = CliRunner() # `feast init` should fail with repo names start with underscore invalid_repo_names = ["_test", "_test_1"] for repo_name in invalid_repo_names: result = runner.run(["init", repo_name], cwd=temp_path) assert result.returncode != 0 # `feast init` should succeed with underscore in repo name valid_repo_names = ["test_1"] for repo_name in valid_repo_names: result = runner.run(["init", repo_name], cwd=temp_path) assert result.returncode == 0 # `feast apply` should fail with underscore in project name project_name = "test_1" repo_dir = temp_path / project_name data_dir = repo_dir / "data" repo_config = repo_dir / "feature_store.yaml" repo_config.write_text( dedent(f""" project: __foo registry: {data_dir / "registry.db"} provider: local online_store: path: {data_dir / "online_store.db"} """)) result = runner.run(["apply"], cwd=repo_dir) assert result.returncode != 0
def test_partial() -> None: """ Add another table to existing repo using partial apply API. Make sure both the table applied via CLI apply and the new table are passing RW test. """ runner = CliRunner() with runner.local_repo(get_example_repo("example_feature_repo_1.py"), "bigquery") as store: driver_locations_source = BigQuerySource( table="feast-oss.public.drivers", timestamp_field="event_timestamp", created_timestamp_column="created_timestamp", ) driver_locations_100 = FeatureView( name="driver_locations_100", entities=["driver"], ttl=timedelta(days=1), schema=[ Field(name="lat", dtype=Float32), Field(name="lon", dtype=String), Field(name="name", dtype=String), ], online=True, batch_source=driver_locations_source, tags={}, ) store.apply([driver_locations_100]) basic_rw_test(store, view_name="driver_locations") basic_rw_test(store, view_name="driver_locations_100")
def test_partial() -> None: """ Add another table to existing repo using partial apply API. Make sure both the table applied via CLI apply and the new table are passing RW test. """ runner = CliRunner() with runner.local_repo(get_example_repo("example_feature_repo_1.py"), "bigquery") as store: driver_locations_source = BigQuerySource( table_ref="feast-oss.public.drivers", event_timestamp_column="event_timestamp", created_timestamp_column="created_timestamp", ) driver_locations_100 = FeatureView( name="driver_locations_100", entities=["driver"], ttl=Duration(seconds=86400 * 1), features=[ Feature(name="lat", dtype=ValueType.FLOAT), Feature(name="lon", dtype=ValueType.STRING), Feature(name="name", dtype=ValueType.STRING), ], online=True, batch_source=driver_locations_source, tags={}, ) store.apply([driver_locations_100]) basic_rw_test(store, view_name="driver_locations") basic_rw_test(store, view_name="driver_locations_100")
def test_3rd_party_providers() -> None: """ Test running apply on third party providers """ runner = CliRunner() # Check with incorrect built-in provider name (no dots) with setup_third_party_provider_repo("feast123") as repo_path: return_code, output = runner.run_with_output(["apply"], cwd=repo_path) assertpy.assert_that(return_code).is_equal_to(1) assertpy.assert_that(output).contains( b"Provider 'feast123' is not implemented") # Check with incorrect third-party provider name (with dots) with setup_third_party_provider_repo("feast_foo.Provider") as repo_path: return_code, output = runner.run_with_output(["apply"], cwd=repo_path) assertpy.assert_that(return_code).is_equal_to(1) assertpy.assert_that(output).contains( b"Could not import Provider module 'feast_foo'") # Check with incorrect third-party provider name (with dots) with setup_third_party_provider_repo("foo.FooProvider") as repo_path: return_code, output = runner.run_with_output(["apply"], cwd=repo_path) assertpy.assert_that(return_code).is_equal_to(1) assertpy.assert_that(output).contains( b"Could not import Provider 'FooProvider' from module 'foo'") # Check with correct third-party provider name with setup_third_party_provider_repo( "foo.provider.FooProvider") as repo_path: return_code, output = runner.run_with_output(["apply"], cwd=repo_path) assertpy.assert_that(return_code).is_equal_to(0)
def test_3rd_party_registry_store() -> None: """ Test running apply on third party registry stores """ runner = CliRunner() # Check with incorrect built-in provider name (no dots) with setup_third_party_registry_store_repo("feast123") as repo_path: return_code, output = runner.run_with_output(["apply"], cwd=repo_path) assertpy.assert_that(return_code).is_equal_to(1) assertpy.assert_that(output).contains( b'Registry store class name should end with "RegistryStore"') # Check with incorrect third-party registry store name (with dots) with setup_third_party_registry_store_repo( "feast_foo.RegistryStore") as repo_path: return_code, output = runner.run_with_output(["apply"], cwd=repo_path) assertpy.assert_that(return_code).is_equal_to(1) assertpy.assert_that(output).contains( b"Could not import RegistryStore module 'feast_foo'") # Check with incorrect third-party registry store name (with dots) with setup_third_party_registry_store_repo( "foo.FooRegistryStore") as repo_path: return_code, output = runner.run_with_output(["apply"], cwd=repo_path) assertpy.assert_that(return_code).is_equal_to(1) assertpy.assert_that(output).contains( b"Could not import RegistryStore 'FooRegistryStore' from module 'foo'" ) # Check with correct third-party registry store name with setup_third_party_registry_store_repo( "foo.registry_store.FooRegistryStore") as repo_path: return_code, output = runner.run_with_output(["apply"], cwd=repo_path) assertpy.assert_that(return_code).is_equal_to(0)
def test_missing_bq_source_fail() -> None: project_id = "".join( random.choice(string.ascii_lowercase + string.digits) for _ in range(10)) runner = CliRunner() with tempfile.TemporaryDirectory( ) as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name: repo_path = Path(repo_dir_name) data_path = Path(data_dir_name) repo_config = repo_path / "feature_store.yaml" repo_config.write_text( dedent(f""" project: {project_id} registry: {data_path / "registry.db"} provider: gcp """)) repo_example = repo_path / "example.py" repo_example.write_text( get_example_repo("example_feature_repo_with_missing_bq_source.py")) returncode, output = runner.run_with_output(["apply"], cwd=repo_path) assert returncode == 1 assert b"DataSourceNotFoundException" in output
def test_cli_apply_duplicated_featureview_names() -> None: """ Test apply feature views with duplicated names and single py file in a feature repo using CLI """ with tempfile.TemporaryDirectory( ) as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name: runner = CliRunner() # Construct an example repo in a temporary dir repo_path = Path(repo_dir_name) data_path = Path(data_dir_name) repo_config = repo_path / "feature_store.yaml" repo_config.write_text( dedent(f""" project: foo registry: {data_path / "registry.db"} provider: local online_store: path: {data_path / "online_store.db"} """)) repo_example = repo_path / "example.py" repo_example.write_text( get_example_repo( "example_feature_repo_with_duplicated_featureview_names.py")) rc, output = runner.run_with_output(["apply"], cwd=repo_path) assert (rc != 0 and b"Please ensure that all feature view names are unique" in output)
def test_cli_chdir() -> None: """ This test simply makes sure that you can run 'feast --chdir COMMAND' to switch to a feature repository before running a COMMAND. """ runner = CliRunner() with tempfile.TemporaryDirectory() as temp_dir: # Make sure the path is absolute by resolving any symlinks temp_path = Path(temp_dir).resolve() result = runner.run(["init", "my_project"], cwd=temp_path) repo_path = temp_path / "my_project" assert result.returncode == 0 result = runner.run(["--chdir", repo_path, "apply"], cwd=temp_path) assert result.returncode == 0 result = runner.run(["--chdir", repo_path, "entities", "list"], cwd=temp_path) assert result.returncode == 0 result = runner.run(["--chdir", repo_path, "feature-views", "list"], cwd=temp_path) assert result.returncode == 0 end_date = datetime.utcnow() start_date = end_date - timedelta(days=100) result = runner.run( [ "--chdir", repo_path, "materialize", start_date.isoformat(), end_date.isoformat(), ], cwd=temp_path, ) assert result.returncode == 0 result = runner.run( [ "--chdir", repo_path, "materialize-incremental", end_date.isoformat() ], cwd=temp_path, ) assert result.returncode == 0 result = runner.run(["--chdir", repo_path, "registry-dump"], cwd=temp_path) assert result.returncode == 0 result = runner.run(["--chdir", repo_path, "teardown"], cwd=temp_path) assert result.returncode == 0
def test_cli_apply_imported_featureview_with_duplication() -> None: """ Test apply feature views with duplicated names and single py file in a feature repo using CLI """ with tempfile.TemporaryDirectory() as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name: runner = CliRunner() # Construct an example repo in a temporary dir repo_path = Path(repo_dir_name) data_path = Path(data_dir_name) repo_config = repo_path / "feature_store.yaml" repo_config.write_text( dedent( f""" project: foo registry: {data_path / "registry.db"} provider: local online_store: path: {data_path / "online_store.db"} """ ) ) repo_example = repo_path / "example.py" repo_example.write_text(get_example_repo("example_feature_repo_2.py")) repo_example_2 = repo_path / "example_2.py" repo_example_2.write_text( "from datetime import timedelta\n" "from example import driver_hourly_stats, driver_hourly_stats_view\n" "from feast import FeatureService, FeatureView\n" "a_feature_service = FeatureService(\n" " name='driver_locations_service',\n" " features=[driver_hourly_stats_view],\n" ")\n" "driver_hourly_stats_view_2 = FeatureView(\n" " name='driver_hourly_stats',\n" " entities=['driver_id'],\n" " ttl=timedelta(days=1),\n" " online=True,\n" " batch_source=driver_hourly_stats,\n" " tags={'dummy': 'true'})\n" ) rc, output = runner.run_with_output(["apply"], cwd=repo_path) assert rc != 0 assert ( b"More than one feature view with name driver_hourly_stats found." in output )
def test_feature_service_read() -> None: """ Read feature values from the FeatureStore using a FeatureService. """ runner = CliRunner() with runner.local_repo(get_example_repo("example_feature_repo_1.py"), "bigquery") as store: basic_rw_test( store, view_name="driver_locations", feature_service_name="driver_locations_service", )
def test_basic() -> None: project_id = "".join( random.choice(string.ascii_lowercase + string.digits) for _ in range(10)) runner = CliRunner() with tempfile.TemporaryDirectory( ) as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name: repo_path = Path(repo_dir_name) data_path = Path(data_dir_name) repo_config = repo_path / "feature_store.yaml" repo_config.write_text( dedent(f""" project: {project_id} registry: {data_path / "registry.db"} provider: aws online_store: type: dynamodb region: us-west-2 offline_store: type: redshift cluster_id: feast-integration-tests region: us-west-2 user: admin database: feast s3_staging_location: s3://feast-integration-tests/redshift iam_role: arn:aws:iam::402087665549:role/redshift_s3_access_role """)) repo_example = repo_path / "example.py" repo_example.write_text(get_example_repo("example_feature_repo_1.py")) result = runner.run(["apply"], cwd=repo_path) assert result.returncode == 0 # Doing another apply should be a no op, and should not cause errors result = runner.run(["apply"], cwd=repo_path) assert result.returncode == 0 basic_rw_test( FeatureStore(repo_path=str(repo_path), config=None), view_name="driver_locations", ) result = runner.run(["teardown"], cwd=repo_path) assert result.returncode == 0
def test_basic() -> None: project_id = "".join( random.choice(string.ascii_lowercase + string.digits) for _ in range(10)) runner = CliRunner() with tempfile.TemporaryDirectory( ) as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name: repo_path = Path(repo_dir_name) data_path = Path(data_dir_name) repo_config = repo_path / "feature_store.yaml" repo_config.write_text( dedent(f""" project: {project_id} registry: {data_path / "registry.db"} provider: local offline_store: type: bigquery online_store: type: redis connection_string: localhost:6379,db=0 """)) repo_example = repo_path / "example.py" repo_example.write_text(get_example_repo("example_feature_repo_1.py")) result = runner.run(["apply"], cwd=repo_path) assert result.returncode == 0 # Doing another apply should be a no op, and should not cause errors result = runner.run(["apply"], cwd=repo_path) assert result.returncode == 0 basic_rw_test( FeatureStore(repo_path=str(repo_path), config=None), view_name="driver_locations", ) result = runner.run(["teardown"], cwd=repo_path) assert result.returncode == 0
def test_cli_apply_imported_featureview() -> None: """ Test apply feature views with duplicated names and single py file in a feature repo using CLI """ with tempfile.TemporaryDirectory() as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name: runner = CliRunner() # Construct an example repo in a temporary dir repo_path = Path(repo_dir_name) data_path = Path(data_dir_name) repo_config = repo_path / "feature_store.yaml" repo_config.write_text( dedent( f""" project: foo registry: {data_path / "registry.db"} provider: local online_store: path: {data_path / "online_store.db"} """ ) ) repo_example = repo_path / "example.py" repo_example.write_text(get_example_repo("example_feature_repo_2.py")) repo_example_2 = repo_path / "example_2.py" repo_example_2.write_text( "from example import driver_hourly_stats_view\n" "from feast import FeatureService\n" "a_feature_service = FeatureService(\n" " name='driver_locations_service',\n" " features=[driver_hourly_stats_view],\n" ")\n" ) rc, output = runner.run_with_output(["apply"], cwd=repo_path) assert rc == 0 assert b"Created feature service driver_locations_service" in output
def test_read_pre_applied() -> None: """ Read feature values from the FeatureStore using a FeatureService. """ runner = CliRunner() with runner.local_repo(get_example_repo("example_feature_repo_1.py"), "bigquery") as store: assert len(store.list_feature_services()) == 1 fs = store.get_feature_service("driver_locations_service") assert len(fs.tags) == 1 assert fs.tags["release"] == "production" fv = store.get_feature_view("driver_locations") fs = FeatureService(name="new_feature_service", features=[fv[["lon"]]]) store.apply([fs]) assert len(store.list_feature_services()) == 2 store.get_feature_service("new_feature_service")
def test_nullable_online_store(test_nullable_online_store) -> None: project = f"test_nullable_online_store{str(uuid.uuid4()).replace('-', '')[:8]}" runner = CliRunner() with tempfile.TemporaryDirectory() as repo_dir_name: try: repo_path = Path(repo_dir_name) feature_store_yaml = make_feature_store_yaml( project, test_nullable_online_store, repo_path) repo_config = repo_path / "feature_store.yaml" repo_config.write_text(dedent(feature_store_yaml)) repo_example = repo_path / "example.py" repo_example.write_text( get_example_repo("example_feature_repo_1.py")) result = runner.run(["apply"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) finally: runner.run(["teardown"], cwd=repo_path)
def test_repo_init() -> None: """ This test simply makes sure that you can run `feast apply && feast materialize` on the repo created by "feast init" without errors. """ runner = CliRunner() with tempfile.TemporaryDirectory() as temp_dir: temp_path = Path(temp_dir) result = runner.run(["init", "my_project"], cwd=temp_path) repo_path = temp_path / "my_project" assert result.returncode == 0 result = runner.run(["apply"], cwd=repo_path) assert result.returncode == 0 end_date = datetime.utcnow() start_date = end_date - timedelta(days=100) result = runner.run( ["materialize", start_date.isoformat(), end_date.isoformat()], cwd=repo_path) assert result.returncode == 0
def test_non_local_feature_repo() -> None: """ Test running apply on a sample repo, and make sure the infra gets created. """ runner = CliRunner() with tempfile.TemporaryDirectory() as repo_dir_name: # Construct an example repo in a temporary dir repo_path = Path(repo_dir_name) repo_config = repo_path / "feature_store.yaml" repo_config.write_text( dedent( """ project: foo registry: data/registry.db provider: local online_store: path: data/online_store.db offline_store: type: bigquery """ ) ) repo_example = repo_path / "example.py" repo_example.write_text(get_example_repo("example_feature_repo_1.py")) result = runner.run(["apply"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) fs = FeatureStore(repo_path=str(repo_path)) assertpy.assert_that(fs.list_feature_views()).is_length(3) result = runner.run(["teardown"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0)
def test_connection_error() -> None: project_id = "".join( random.choice(string.ascii_lowercase + string.digits) for _ in range(10)) runner = CliRunner() with tempfile.TemporaryDirectory( ) as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name: repo_path = Path(repo_dir_name) data_path = Path(data_dir_name) repo_config = repo_path / "feature_store.yaml" repo_config.write_text( dedent(f""" project: {project_id} registry: {data_path / "registry.db"} provider: local offline_store: type: file online_store: type: redis connection_string: localhost:6379,db=0= """)) repo_example = repo_path / "example.py" repo_example.write_text(get_example_repo("example_feature_repo_2.py")) result = runner.run(["apply"], cwd=repo_path) assert result.returncode == 0 # Redis does not support names for its databases. with pytest.raises(redis.exceptions.ResponseError): basic_rw_test( FeatureStore(repo_path=str(repo_path), config=None), view_name="driver_hourly_stats", )
def run_simple_apply_test(example_repo_file_name: str, expected_error: bytes): with tempfile.TemporaryDirectory( ) as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name: runner = CliRunner() # Construct an example repo in a temporary dir repo_path = Path(repo_dir_name) data_path = Path(data_dir_name) repo_config = repo_path / "feature_store.yaml" repo_config.write_text( dedent(f""" project: foo registry: {data_path / "registry.db"} provider: local online_store: path: {data_path / "online_store.db"} """)) repo_example = repo_path / "example.py" repo_example.write_text(get_example_repo(example_repo_file_name)) rc, output = runner.run_with_output(["apply"], cwd=repo_path) assert rc != 0 and expected_error in output
def test_workflow() -> None: """ Test running apply on a sample repo, and make sure the infra gets created. """ runner = CliRunner() with tempfile.TemporaryDirectory() as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name: # Construct an example repo in a temporary dir repo_path = Path(repo_dir_name) data_path = Path(data_dir_name) repo_config = repo_path / "feature_store.yaml" repo_config.write_text( dedent( f""" project: foo registry: {data_path / "registry.db"} provider: local online_store: path: {data_path / "online_store.db"} offline_store: type: bigquery """ ) ) repo_example = repo_path / "example.py" repo_example.write_text(get_example_repo("example_feature_repo_1.py")) result = runner.run(["apply"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) # entity & feature view list commands should succeed result = runner.run(["entities", "list"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) result = runner.run(["feature-views", "list"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) result = runner.run(["feature-services", "list"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) # entity & feature view describe commands should succeed when objects exist result = runner.run(["entities", "describe", "driver"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) result = runner.run( ["feature-views", "describe", "driver_locations"], cwd=repo_path ) assertpy.assert_that(result.returncode).is_equal_to(0) result = runner.run( ["feature-services", "describe", "driver_locations_service"], cwd=repo_path ) assertpy.assert_that(result.returncode).is_equal_to(0) # entity & feature view describe commands should fail when objects don't exist result = runner.run(["entities", "describe", "foo"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(1) result = runner.run(["feature-views", "describe", "foo"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(1) result = runner.run(["feature-services", "describe", "foo"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(1) # Doing another apply should be a no op, and should not cause errors result = runner.run(["apply"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) basic_rw_test( FeatureStore(repo_path=str(repo_path), config=None), view_name="driver_locations", ) result = runner.run(["teardown"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0)
def test_e2e_local() -> None: """ A more comprehensive than "basic" test, using local provider. 1. Create a repo. 2. Apply 3. Ingest some data to online store from parquet 4. Read from the online store to make sure it made it there. """ runner = CliRunner() with tempfile.TemporaryDirectory() as data_dir: # Generate some test data in parquet format. end_date = datetime.now().replace(microsecond=0, second=0, minute=0) start_date = end_date - timedelta(days=15) driver_entities = [1001, 1002, 1003, 1004, 1005] driver_df = driver_data.create_driver_hourly_stats_df( driver_entities, start_date, end_date ) driver_stats_path = os.path.join(data_dir, "driver_stats.parquet") driver_df.to_parquet(path=driver_stats_path, allow_truncated_timestamps=True) global_df = driver_data.create_global_daily_stats_df(start_date, end_date) global_stats_path = os.path.join(data_dir, "global_stats.parquet") global_df.to_parquet(path=global_stats_path, allow_truncated_timestamps=True) # Note that runner takes care of running apply/teardown for us here. # We patch python code in example_feature_repo_2.py to set the path to Parquet files. with runner.local_repo( get_example_repo("example_feature_repo_2.py") .replace("%PARQUET_PATH%", driver_stats_path) .replace("%PARQUET_PATH_GLOBAL%", global_stats_path), "file", ) as store: assert store.repo_path is not None # feast materialize r = runner.run( [ "materialize", start_date.isoformat(), (end_date - timedelta(days=7)).isoformat(), ], cwd=Path(store.repo_path), ) assert r.returncode == 0 _assert_online_features(store, driver_df, end_date - timedelta(days=7)) # feast materialize-incremental r = runner.run( ["materialize-incremental", end_date.isoformat()], cwd=Path(store.repo_path), ) assert r.returncode == 0 _assert_online_features(store, driver_df, end_date) # Test a failure case when the parquet file doesn't include a join key with runner.local_repo( get_example_repo("example_feature_repo_with_entity_join_key.py").replace( "%PARQUET_PATH%", driver_stats_path ), "file", ) as store: assert store.repo_path is not None # feast materialize returncode, output = runner.run_with_output( [ "materialize", start_date.isoformat(), (end_date - timedelta(days=7)).isoformat(), ], cwd=Path(store.repo_path), ) assert returncode != 0 assert "feast.errors.FeastJoinKeysDuringMaterialization" in str(output)
def test_online() -> None: """ Test reading from the online store in local mode. """ runner = CliRunner() with runner.local_repo(get_example_repo("example_feature_repo_1.py"), "bigquery") as store: # Write some data to two tables driver_locations_fv = store.get_feature_view(name="driver_locations") customer_profile_fv = store.get_feature_view(name="customer_profile") customer_driver_combined_fv = store.get_feature_view( name="customer_driver_combined") provider = store._get_provider() driver_key = EntityKeyProto(join_keys=["driver_id"], entity_values=[ValueProto(int64_val=1)]) provider.online_write_batch( config=store.config, table=driver_locations_fv, data=[( driver_key, { "lat": ValueProto(double_val=0.1), "lon": ValueProto(string_val="1.0"), }, datetime.utcnow(), datetime.utcnow(), )], progress=None, ) customer_key = EntityKeyProto( join_keys=["customer_id"], entity_values=[ValueProto(string_val="5")]) provider.online_write_batch( config=store.config, table=customer_profile_fv, data=[( customer_key, { "avg_orders_day": ValueProto(float_val=1.0), "name": ValueProto(string_val="John"), "age": ValueProto(int64_val=3), }, datetime.utcnow(), datetime.utcnow(), )], progress=None, ) customer_key = EntityKeyProto( join_keys=["customer_id", "driver_id"], entity_values=[ ValueProto(string_val="5"), ValueProto(int64_val=1) ], ) provider.online_write_batch( config=store.config, table=customer_driver_combined_fv, data=[( customer_key, { "trips": ValueProto(int64_val=7) }, datetime.utcnow(), datetime.utcnow(), )], progress=None, ) # Retrieve two features using two keys, one valid one non-existing result = store.get_online_features( features=[ "driver_locations:lon", "customer_profile:avg_orders_day", "customer_profile:name", "customer_driver_combined:trips", ], entity_rows=[ { "driver_id": 1, "customer_id": "5" }, { "driver_id": 1, "customer_id": 5 }, ], full_feature_names=False, ).to_dict() assert "lon" in result assert "avg_orders_day" in result assert "name" in result assert result["driver_id"] == [1, 1] assert result["customer_id"] == ["5", "5"] assert result["lon"] == ["1.0", "1.0"] assert result["avg_orders_day"] == [1.0, 1.0] assert result["name"] == ["John", "John"] assert result["trips"] == [7, 7] # Ensure features are still in result when keys not found result = store.get_online_features( features=["customer_driver_combined:trips"], entity_rows=[{ "driver_id": 0, "customer_id": 0 }], full_feature_names=False, ).to_dict() assert "trips" in result # invalid table reference with pytest.raises(FeatureViewNotFoundException): store.get_online_features( features=["driver_locations_bad:lon"], entity_rows=[{ "driver_id": 1 }], full_feature_names=False, ) # Create new FeatureStore object with fast cache invalidation cache_ttl = 1 fs_fast_ttl = FeatureStore(config=RepoConfig( registry=RegistryConfig(path=store.config.registry, cache_ttl_seconds=cache_ttl), online_store=store.config.online_store, project=store.project, provider=store.config.provider, )) # Should download the registry and cache it permanently (or until manually refreshed) result = fs_fast_ttl.get_online_features( features=[ "driver_locations:lon", "customer_profile:avg_orders_day", "customer_profile:name", "customer_driver_combined:trips", ], entity_rows=[{ "driver_id": 1, "customer_id": 5 }], full_feature_names=False, ).to_dict() assert result["lon"] == ["1.0"] assert result["trips"] == [7] # Rename the registry.db so that it cant be used for refreshes os.rename(store.config.registry, store.config.registry + "_fake") # Wait for registry to expire time.sleep(cache_ttl) # Will try to reload registry because it has expired (it will fail because we deleted the actual registry file) with pytest.raises(FileNotFoundError): fs_fast_ttl.get_online_features( features=[ "driver_locations:lon", "customer_profile:avg_orders_day", "customer_profile:name", "customer_driver_combined:trips", ], entity_rows=[{ "driver_id": 1, "customer_id": 5 }], full_feature_names=False, ).to_dict() # Restore registry.db so that we can see if it actually reloads registry os.rename(store.config.registry + "_fake", store.config.registry) # Test if registry is actually reloaded and whether results return result = fs_fast_ttl.get_online_features( features=[ "driver_locations:lon", "customer_profile:avg_orders_day", "customer_profile:name", "customer_driver_combined:trips", ], entity_rows=[{ "driver_id": 1, "customer_id": 5 }], full_feature_names=False, ).to_dict() assert result["lon"] == ["1.0"] assert result["trips"] == [7] # Create a registry with infinite cache (for users that want to manually refresh the registry) fs_infinite_ttl = FeatureStore(config=RepoConfig( registry=RegistryConfig(path=store.config.registry, cache_ttl_seconds=0), online_store=store.config.online_store, project=store.project, provider=store.config.provider, )) # Should return results (and fill the registry cache) result = fs_infinite_ttl.get_online_features( features=[ "driver_locations:lon", "customer_profile:avg_orders_day", "customer_profile:name", "customer_driver_combined:trips", ], entity_rows=[{ "driver_id": 1, "customer_id": 5 }], full_feature_names=False, ).to_dict() assert result["lon"] == ["1.0"] assert result["trips"] == [7] # Wait a bit so that an arbitrary TTL would take effect time.sleep(2) # Rename the registry.db so that it cant be used for refreshes os.rename(store.config.registry, store.config.registry + "_fake") # TTL is infinite so this method should use registry cache result = fs_infinite_ttl.get_online_features( features=[ "driver_locations:lon", "customer_profile:avg_orders_day", "customer_profile:name", "customer_driver_combined:trips", ], entity_rows=[{ "driver_id": 1, "customer_id": 5 }], full_feature_names=False, ).to_dict() assert result["lon"] == ["1.0"] assert result["trips"] == [7] # Force registry reload (should fail because file is missing) with pytest.raises(FileNotFoundError): fs_infinite_ttl.refresh_registry() # Restore registry.db so that teardown works os.rename(store.config.registry + "_fake", store.config.registry)
def test_universal_cli(test_repo_config) -> None: project = f"test_universal_cli_{str(uuid.uuid4()).replace('-', '')[:8]}" runner = CliRunner() with tempfile.TemporaryDirectory() as repo_dir_name: feature_store_yaml = make_feature_store_yaml(project, test_repo_config, repo_dir_name) repo_path = Path(repo_dir_name) repo_config = repo_path / "feature_store.yaml" repo_config.write_text(dedent(feature_store_yaml)) repo_example = repo_path / "example.py" repo_example.write_text(get_example_repo("example_feature_repo_1.py")) result = runner.run(["apply"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) # Store registry contents, to be compared later. fs = FeatureStore(repo_path=str(repo_path)) registry_dict = fs.registry.to_dict(project=project) # entity & feature view list commands should succeed result = runner.run(["entities", "list"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) result = runner.run(["feature-views", "list"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) result = runner.run(["feature-services", "list"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) # entity & feature view describe commands should succeed when objects exist result = runner.run(["entities", "describe", "driver"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) result = runner.run(["feature-views", "describe", "driver_locations"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) result = runner.run( ["feature-services", "describe", "driver_locations_service"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) assertpy.assert_that(fs.list_feature_views()).is_length(3) # entity & feature view describe commands should fail when objects don't exist result = runner.run(["entities", "describe", "foo"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(1) result = runner.run(["feature-views", "describe", "foo"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(1) result = runner.run(["feature-services", "describe", "foo"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(1) # Doing another apply should be a no op, and should not cause errors result = runner.run(["apply"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) basic_rw_test( FeatureStore(repo_path=str(repo_path), config=None), view_name="driver_locations", ) # Confirm that registry contents have not changed. assertpy.assert_that(registry_dict).is_equal_to( fs.registry.to_dict(project=project)) result = runner.run(["teardown"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0)
def test_online_to_df(): """ Test dataframe conversion. Make sure the response columns and rows are the same order as the request. """ driver_ids = [1, 2, 3] customer_ids = [4, 5, 6] name = "foo" lon_multiply = 1.0 lat_multiply = 0.1 age_multiply = 10 avg_order_day_multiply = 1.0 runner = CliRunner() with runner.local_repo(get_example_repo("example_feature_repo_1.py"), "bigquery") as store: # Write three tables to online store driver_locations_fv = store.get_feature_view(name="driver_locations") customer_profile_fv = store.get_feature_view(name="customer_profile") customer_driver_combined_fv = store.get_feature_view( name="customer_driver_combined") provider = store._get_provider() for (d, c) in zip(driver_ids, customer_ids): """ driver table: lon lat 1 1.0 0.1 2 2.0 0.2 3 3.0 0.3 """ driver_key = EntityKeyProto( join_keys=["driver_id"], entity_values=[ValueProto(int64_val=d)]) provider.online_write_batch( config=store.config, table=driver_locations_fv, data=[( driver_key, { "lat": ValueProto(double_val=d * lat_multiply), "lon": ValueProto(string_val=str(d * lon_multiply)), }, datetime.utcnow(), datetime.utcnow(), )], progress=None, ) """ customer table customer avg_orders_day name age 4 4.0 foo4 40 5 5.0 foo5 50 6 6.0 foo6 60 """ customer_key = EntityKeyProto( join_keys=["customer_id"], entity_values=[ValueProto(string_val=str(c))]) provider.online_write_batch( config=store.config, table=customer_profile_fv, data=[( customer_key, { "avg_orders_day": ValueProto(float_val=c * avg_order_day_multiply), "name": ValueProto(string_val=name + str(c)), "age": ValueProto(int64_val=c * age_multiply), }, datetime.utcnow(), datetime.utcnow(), )], progress=None, ) """ customer_driver_combined table customer driver trips 4 1 4 5 2 10 6 3 18 """ combo_keys = EntityKeyProto( join_keys=["customer_id", "driver_id"], entity_values=[ ValueProto(string_val=str(c)), ValueProto(int64_val=d) ], ) provider.online_write_batch( config=store.config, table=customer_driver_combined_fv, data=[( combo_keys, { "trips": ValueProto(int64_val=c * d) }, datetime.utcnow(), datetime.utcnow(), )], progress=None, ) # Get online features in dataframe result_df = store.get_online_features( features=[ "driver_locations:lon", "driver_locations:lat", "customer_profile:avg_orders_day", "customer_profile:name", "customer_profile:age", "customer_driver_combined:trips", ], # Reverse the row order entity_rows=[{ "driver_id": d, "customer_id": c } for (d, c) in zip(reversed(driver_ids), reversed(customer_ids))], ).to_df() """ Construct the expected dataframe with reversed row order like so: driver customer lon lat avg_orders_day name age trips 3 6 3.0 0.3 6.0 foo6 60 18 2 5 2.0 0.2 5.0 foo5 50 10 1 4 1.0 0.1 4.0 foo4 40 4 """ df_dict = { "driver_id": driver_ids, "customer_id": [str(c) for c in customer_ids], "lon": [str(d * lon_multiply) for d in driver_ids], "lat": [d * lat_multiply for d in driver_ids], "avg_orders_day": [c * avg_order_day_multiply for c in customer_ids], "name": [name + str(c) for c in customer_ids], "age": [c * age_multiply for c in customer_ids], "trips": [d * c for (d, c) in zip(driver_ids, customer_ids)], } # Requested column order ordered_column = [ "driver_id", "customer_id", "lon", "lat", "avg_orders_day", "name", "age", "trips", ] expected_df = pd.DataFrame( {k: reversed(v) for (k, v) in df_dict.items()}) assert_frame_equal(result_df[ordered_column], expected_df)
def test_universal_cli(environment: Environment): project = f"test_universal_cli_{str(uuid.uuid4()).replace('-', '')[:8]}" runner = CliRunner() with tempfile.TemporaryDirectory() as repo_dir_name: try: repo_path = Path(repo_dir_name) feature_store_yaml = make_feature_store_yaml( project, environment.test_repo_config, repo_path) repo_config = repo_path / "feature_store.yaml" repo_config.write_text(dedent(feature_store_yaml)) repo_example = repo_path / "example.py" repo_example.write_text( get_example_repo("example_feature_repo_1.py")) result = runner.run(["apply"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) # Store registry contents, to be compared later. fs = FeatureStore(repo_path=str(repo_path)) registry_dict = fs.registry.to_dict(project=project) # Save only the specs, not the metadata. registry_specs = { key: [fco["spec"] if "spec" in fco else fco for fco in value] for key, value in registry_dict.items() } # entity & feature view list commands should succeed result = runner.run(["entities", "list"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) result = runner.run(["feature-views", "list"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) result = runner.run(["feature-services", "list"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) result = runner.run(["data-sources", "list"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) # entity & feature view describe commands should succeed when objects exist result = runner.run(["entities", "describe", "driver"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) result = runner.run( ["feature-views", "describe", "driver_locations"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) result = runner.run( ["feature-services", "describe", "driver_locations_service"], cwd=repo_path, ) assertpy.assert_that(result.returncode).is_equal_to(0) assertpy.assert_that(fs.list_feature_views()).is_length(4) result = runner.run( ["data-sources", "describe", "customer_profile_source"], cwd=repo_path, ) assertpy.assert_that(result.returncode).is_equal_to(0) assertpy.assert_that(fs.list_data_sources()).is_length(4) # entity & feature view describe commands should fail when objects don't exist result = runner.run(["entities", "describe", "foo"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(1) result = runner.run(["feature-views", "describe", "foo"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(1) result = runner.run(["feature-services", "describe", "foo"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(1) result = runner.run(["data-sources", "describe", "foo"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(1) # Doing another apply should be a no op, and should not cause errors result = runner.run(["apply"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) basic_rw_test( FeatureStore(repo_path=str(repo_path), config=None), view_name="driver_locations", ) # Confirm that registry contents have not changed. registry_dict = fs.registry.to_dict(project=project) assertpy.assert_that(registry_specs).is_equal_to({ key: [fco["spec"] if "spec" in fco else fco for fco in value] for key, value in registry_dict.items() }) result = runner.run(["teardown"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) finally: runner.run(["teardown"], cwd=repo_path)