def test_3rd_party_registry_store() -> None: """ Test running apply on third party registry stores """ runner = CliRunner() # Check with incorrect built-in provider name (no dots) with setup_third_party_registry_store_repo("feast123") as repo_path: return_code, output = runner.run_with_output(["apply"], cwd=repo_path) assertpy.assert_that(return_code).is_equal_to(1) assertpy.assert_that(output).contains( b'Registry store class name should end with "RegistryStore"') # Check with incorrect third-party registry store name (with dots) with setup_third_party_registry_store_repo( "feast_foo.RegistryStore") as repo_path: return_code, output = runner.run_with_output(["apply"], cwd=repo_path) assertpy.assert_that(return_code).is_equal_to(1) assertpy.assert_that(output).contains( b"Could not import RegistryStore module 'feast_foo'") # Check with incorrect third-party registry store name (with dots) with setup_third_party_registry_store_repo( "foo.FooRegistryStore") as repo_path: return_code, output = runner.run_with_output(["apply"], cwd=repo_path) assertpy.assert_that(return_code).is_equal_to(1) assertpy.assert_that(output).contains( b"Could not import RegistryStore 'FooRegistryStore' from module 'foo'" ) # Check with correct third-party registry store name with setup_third_party_registry_store_repo( "foo.registry_store.FooRegistryStore") as repo_path: return_code, output = runner.run_with_output(["apply"], cwd=repo_path) assertpy.assert_that(return_code).is_equal_to(0)
def test_3rd_party_providers() -> None: """ Test running apply on third party providers """ runner = CliRunner() # Check with incorrect built-in provider name (no dots) with setup_third_party_provider_repo("feast123") as repo_path: return_code, output = runner.run_with_output(["apply"], cwd=repo_path) assertpy.assert_that(return_code).is_equal_to(1) assertpy.assert_that(output).contains( b"Provider 'feast123' is not implemented") # Check with incorrect third-party provider name (with dots) with setup_third_party_provider_repo("feast_foo.Provider") as repo_path: return_code, output = runner.run_with_output(["apply"], cwd=repo_path) assertpy.assert_that(return_code).is_equal_to(1) assertpy.assert_that(output).contains( b"Could not import Provider module 'feast_foo'") # Check with incorrect third-party provider name (with dots) with setup_third_party_provider_repo("foo.FooProvider") as repo_path: return_code, output = runner.run_with_output(["apply"], cwd=repo_path) assertpy.assert_that(return_code).is_equal_to(1) assertpy.assert_that(output).contains( b"Could not import Provider 'FooProvider' from module 'foo'") # Check with correct third-party provider name with setup_third_party_provider_repo( "foo.provider.FooProvider") as repo_path: return_code, output = runner.run_with_output(["apply"], cwd=repo_path) assertpy.assert_that(return_code).is_equal_to(0)
def test_cli_apply_duplicated_featureview_names() -> None: """ Test apply feature views with duplicated names and single py file in a feature repo using CLI """ with tempfile.TemporaryDirectory( ) as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name: runner = CliRunner() # Construct an example repo in a temporary dir repo_path = Path(repo_dir_name) data_path = Path(data_dir_name) repo_config = repo_path / "feature_store.yaml" repo_config.write_text( dedent(f""" project: foo registry: {data_path / "registry.db"} provider: local online_store: path: {data_path / "online_store.db"} """)) repo_example = repo_path / "example.py" repo_example.write_text( get_example_repo( "example_feature_repo_with_duplicated_featureview_names.py")) rc, output = runner.run_with_output(["apply"], cwd=repo_path) assert (rc != 0 and b"Please ensure that all feature view names are unique" in output)
def test_missing_bq_source_fail() -> None: project_id = "".join( random.choice(string.ascii_lowercase + string.digits) for _ in range(10)) runner = CliRunner() with tempfile.TemporaryDirectory( ) as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name: repo_path = Path(repo_dir_name) data_path = Path(data_dir_name) repo_config = repo_path / "feature_store.yaml" repo_config.write_text( dedent(f""" project: {project_id} registry: {data_path / "registry.db"} provider: gcp """)) repo_example = repo_path / "example.py" repo_example.write_text( get_example_repo("example_feature_repo_with_missing_bq_source.py")) returncode, output = runner.run_with_output(["apply"], cwd=repo_path) assert returncode == 1 assert b"DataSourceNotFoundException" in output
def test_cli_apply_imported_featureview_with_duplication() -> None: """ Test apply feature views with duplicated names and single py file in a feature repo using CLI """ with tempfile.TemporaryDirectory() as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name: runner = CliRunner() # Construct an example repo in a temporary dir repo_path = Path(repo_dir_name) data_path = Path(data_dir_name) repo_config = repo_path / "feature_store.yaml" repo_config.write_text( dedent( f""" project: foo registry: {data_path / "registry.db"} provider: local online_store: path: {data_path / "online_store.db"} """ ) ) repo_example = repo_path / "example.py" repo_example.write_text(get_example_repo("example_feature_repo_2.py")) repo_example_2 = repo_path / "example_2.py" repo_example_2.write_text( "from datetime import timedelta\n" "from example import driver_hourly_stats, driver_hourly_stats_view\n" "from feast import FeatureService, FeatureView\n" "a_feature_service = FeatureService(\n" " name='driver_locations_service',\n" " features=[driver_hourly_stats_view],\n" ")\n" "driver_hourly_stats_view_2 = FeatureView(\n" " name='driver_hourly_stats',\n" " entities=['driver_id'],\n" " ttl=timedelta(days=1),\n" " online=True,\n" " batch_source=driver_hourly_stats,\n" " tags={'dummy': 'true'})\n" ) rc, output = runner.run_with_output(["apply"], cwd=repo_path) assert rc != 0 assert ( b"More than one feature view with name driver_hourly_stats found." in output )
def test_cli_apply_imported_featureview() -> None: """ Test apply feature views with duplicated names and single py file in a feature repo using CLI """ with tempfile.TemporaryDirectory() as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name: runner = CliRunner() # Construct an example repo in a temporary dir repo_path = Path(repo_dir_name) data_path = Path(data_dir_name) repo_config = repo_path / "feature_store.yaml" repo_config.write_text( dedent( f""" project: foo registry: {data_path / "registry.db"} provider: local online_store: path: {data_path / "online_store.db"} """ ) ) repo_example = repo_path / "example.py" repo_example.write_text(get_example_repo("example_feature_repo_2.py")) repo_example_2 = repo_path / "example_2.py" repo_example_2.write_text( "from example import driver_hourly_stats_view\n" "from feast import FeatureService\n" "a_feature_service = FeatureService(\n" " name='driver_locations_service',\n" " features=[driver_hourly_stats_view],\n" ")\n" ) rc, output = runner.run_with_output(["apply"], cwd=repo_path) assert rc == 0 assert b"Created feature service driver_locations_service" in output
def run_simple_apply_test(example_repo_file_name: str, expected_error: bytes): with tempfile.TemporaryDirectory( ) as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name: runner = CliRunner() # Construct an example repo in a temporary dir repo_path = Path(repo_dir_name) data_path = Path(data_dir_name) repo_config = repo_path / "feature_store.yaml" repo_config.write_text( dedent(f""" project: foo registry: {data_path / "registry.db"} provider: local online_store: path: {data_path / "online_store.db"} """)) repo_example = repo_path / "example.py" repo_example.write_text(get_example_repo(example_repo_file_name)) rc, output = runner.run_with_output(["apply"], cwd=repo_path) assert rc != 0 and expected_error in output
def test_e2e_local() -> None: """ A more comprehensive than "basic" test, using local provider. 1. Create a repo. 2. Apply 3. Ingest some data to online store from parquet 4. Read from the online store to make sure it made it there. """ runner = CliRunner() with tempfile.TemporaryDirectory() as data_dir: # Generate some test data in parquet format. end_date = datetime.now().replace(microsecond=0, second=0, minute=0) start_date = end_date - timedelta(days=15) driver_entities = [1001, 1002, 1003, 1004, 1005] driver_df = driver_data.create_driver_hourly_stats_df( driver_entities, start_date, end_date ) driver_stats_path = os.path.join(data_dir, "driver_stats.parquet") driver_df.to_parquet(path=driver_stats_path, allow_truncated_timestamps=True) global_df = driver_data.create_global_daily_stats_df(start_date, end_date) global_stats_path = os.path.join(data_dir, "global_stats.parquet") global_df.to_parquet(path=global_stats_path, allow_truncated_timestamps=True) # Note that runner takes care of running apply/teardown for us here. # We patch python code in example_feature_repo_2.py to set the path to Parquet files. with runner.local_repo( get_example_repo("example_feature_repo_2.py") .replace("%PARQUET_PATH%", driver_stats_path) .replace("%PARQUET_PATH_GLOBAL%", global_stats_path), "file", ) as store: assert store.repo_path is not None # feast materialize r = runner.run( [ "materialize", start_date.isoformat(), (end_date - timedelta(days=7)).isoformat(), ], cwd=Path(store.repo_path), ) assert r.returncode == 0 _assert_online_features(store, driver_df, end_date - timedelta(days=7)) # feast materialize-incremental r = runner.run( ["materialize-incremental", end_date.isoformat()], cwd=Path(store.repo_path), ) assert r.returncode == 0 _assert_online_features(store, driver_df, end_date) # Test a failure case when the parquet file doesn't include a join key with runner.local_repo( get_example_repo("example_feature_repo_with_entity_join_key.py").replace( "%PARQUET_PATH%", driver_stats_path ), "file", ) as store: assert store.repo_path is not None # feast materialize returncode, output = runner.run_with_output( [ "materialize", start_date.isoformat(), (end_date - timedelta(days=7)).isoformat(), ], cwd=Path(store.repo_path), ) assert returncode != 0 assert "feast.errors.FeastJoinKeysDuringMaterialization" in str(output)