def s3_registry(): registry_config = RegistryConfig( path= f"s3://feast-integration-tests/registries/{int(time.time() * 1000)}/registry.db", cache_ttl_seconds=600, ) return Registry(registry_config, None)
def gcs_registry(): from google.cloud import storage storage_client = storage.Client() bucket_name = f"feast-registry-test-{int(time.time() * 1000)}" bucket = storage_client.bucket(bucket_name) bucket = storage_client.create_bucket(bucket) bucket.add_lifecycle_delete_rule( age=14) # delete buckets automatically after 14 days bucket.patch() bucket.blob("registry.db") registry_config = RegistryConfig(path=f"gs://{bucket_name}/registry.db", cache_ttl_seconds=600) return Registry(registry_config, None)
def local_registry(): fd, registry_path = mkstemp() registry_config = RegistryConfig(path=registry_path, cache_ttl_seconds=600) return Registry(registry_config, None)
def test_commit(): fd, registry_path = mkstemp() registry_config = RegistryConfig(path=registry_path, cache_ttl_seconds=600) test_registry = Registry(registry_config, None) entity = Entity( name="driver_car_id", description="Car driver id", value_type=ValueType.STRING, labels={"team": "matchmaking"}, ) project = "project" # Register Entity without commiting test_registry.apply_entity(entity, project, commit=False) # Retrieving the entity should still succeed entities = test_registry.list_entities(project, allow_cache=True) entity = entities[0] assert (len(entities) == 1 and entity.name == "driver_car_id" and entity.value_type == ValueType(ValueProto.ValueType.STRING) and entity.description == "Car driver id" and "team" in entity.labels and entity.labels["team"] == "matchmaking") entity = test_registry.get_entity("driver_car_id", project, allow_cache=True) assert (entity.name == "driver_car_id" and entity.value_type == ValueType(ValueProto.ValueType.STRING) and entity.description == "Car driver id" and "team" in entity.labels and entity.labels["team"] == "matchmaking") # Create new registry that points to the same store registry_with_same_store = Registry(registry_config, None) # Retrieving the entity should fail since the store is empty entities = registry_with_same_store.list_entities(project) assert len(entities) == 0 # commit from the original registry test_registry.commit() # Reconstruct the new registry in order to read the newly written store registry_with_same_store = Registry(registry_config, None) # Retrieving the entity should now succeed entities = registry_with_same_store.list_entities(project) entity = entities[0] assert (len(entities) == 1 and entity.name == "driver_car_id" and entity.value_type == ValueType(ValueProto.ValueType.STRING) and entity.description == "Car driver id" and "team" in entity.labels and entity.labels["team"] == "matchmaking") entity = test_registry.get_entity("driver_car_id", project) assert (entity.name == "driver_car_id" and entity.value_type == ValueType(ValueProto.ValueType.STRING) and entity.description == "Car driver id" and "team" in entity.labels and entity.labels["team"] == "matchmaking") test_registry.teardown() # Will try to reload registry, which will fail because the file has been deleted with pytest.raises(FileNotFoundError): test_registry._get_registry_proto()
def test_online() -> None: """ Test reading from the online store in local mode. """ runner = CliRunner() with runner.local_repo( get_example_repo("example_feature_repo_1.py")) as store: # Write some data to two tables driver_locations_fv = store.get_feature_view(name="driver_locations") customer_profile_fv = store.get_feature_view(name="customer_profile") customer_driver_combined_fv = store.get_feature_view( name="customer_driver_combined") provider = store._get_provider() driver_key = EntityKeyProto(join_keys=["driver"], entity_values=[ValueProto(int64_val=1)]) provider.online_write_batch( project=store.config.project, table=driver_locations_fv, data=[( driver_key, { "lat": ValueProto(double_val=0.1), "lon": ValueProto(string_val="1.0"), }, datetime.utcnow(), datetime.utcnow(), )], progress=None, ) customer_key = EntityKeyProto(join_keys=["customer"], entity_values=[ValueProto(int64_val=5)]) provider.online_write_batch( project=store.config.project, table=customer_profile_fv, data=[( customer_key, { "avg_orders_day": ValueProto(float_val=1.0), "name": ValueProto(string_val="John"), "age": ValueProto(int64_val=3), }, datetime.utcnow(), datetime.utcnow(), )], progress=None, ) customer_key = EntityKeyProto( join_keys=["customer", "driver"], entity_values=[ValueProto(int64_val=5), ValueProto(int64_val=1)], ) provider.online_write_batch( project=store.config.project, table=customer_driver_combined_fv, data=[( customer_key, { "trips": ValueProto(int64_val=7) }, datetime.utcnow(), datetime.utcnow(), )], progress=None, ) # Retrieve two features using two keys, one valid one non-existing result = store.get_online_features( feature_refs=[ "driver_locations:lon", "customer_profile:avg_orders_day", "customer_profile:name", "customer_driver_combined:trips", ], entity_rows=[{ "driver": 1, "customer": 5 }, { "driver": 1, "customer": 5 }], ).to_dict() assert "driver_locations__lon" in result assert "customer_profile__avg_orders_day" in result assert "customer_profile__name" in result assert result["driver"] == [1, 1] assert result["customer"] == [5, 5] assert result["driver_locations__lon"] == ["1.0", "1.0"] assert result["customer_profile__avg_orders_day"] == [1.0, 1.0] assert result["customer_profile__name"] == ["John", "John"] assert result["customer_driver_combined__trips"] == [7, 7] # Ensure features are still in result when keys not found result = store.get_online_features( feature_refs=["customer_driver_combined:trips"], entity_rows=[{ "driver": 0, "customer": 0 }], ).to_dict() assert "customer_driver_combined__trips" in result # invalid table reference with pytest.raises(FeatureViewNotFoundException): store.get_online_features( feature_refs=["driver_locations_bad:lon"], entity_rows=[{ "driver": 1 }], ) # Create new FeatureStore object with fast cache invalidation cache_ttl = 1 fs_fast_ttl = FeatureStore(config=RepoConfig( registry=RegistryConfig(path=store.config.registry, cache_ttl_seconds=cache_ttl), online_store=store.config.online_store, project=store.config.project, provider=store.config.provider, )) # Should download the registry and cache it permanently (or until manually refreshed) result = fs_fast_ttl.get_online_features( feature_refs=[ "driver_locations:lon", "customer_profile:avg_orders_day", "customer_profile:name", "customer_driver_combined:trips", ], entity_rows=[{ "driver": 1, "customer": 5 }], ).to_dict() assert result["driver_locations__lon"] == ["1.0"] assert result["customer_driver_combined__trips"] == [7] # Rename the registry.db so that it cant be used for refreshes os.rename(store.config.registry, store.config.registry + "_fake") # Wait for registry to expire time.sleep(cache_ttl) # Will try to reload registry because it has expired (it will fail because we deleted the actual registry file) with pytest.raises(FileNotFoundError): fs_fast_ttl.get_online_features( feature_refs=[ "driver_locations:lon", "customer_profile:avg_orders_day", "customer_profile:name", "customer_driver_combined:trips", ], entity_rows=[{ "driver": 1, "customer": 5 }], ).to_dict() # Restore registry.db so that we can see if it actually reloads registry os.rename(store.config.registry + "_fake", store.config.registry) # Test if registry is actually reloaded and whether results return result = fs_fast_ttl.get_online_features( feature_refs=[ "driver_locations:lon", "customer_profile:avg_orders_day", "customer_profile:name", "customer_driver_combined:trips", ], entity_rows=[{ "driver": 1, "customer": 5 }], ).to_dict() assert result["driver_locations__lon"] == ["1.0"] assert result["customer_driver_combined__trips"] == [7] # Create a registry with infinite cache (for users that want to manually refresh the registry) fs_infinite_ttl = FeatureStore(config=RepoConfig( registry=RegistryConfig(path=store.config.registry, cache_ttl_seconds=0), online_store=store.config.online_store, project=store.config.project, provider=store.config.provider, )) # Should return results (and fill the registry cache) result = fs_infinite_ttl.get_online_features( feature_refs=[ "driver_locations:lon", "customer_profile:avg_orders_day", "customer_profile:name", "customer_driver_combined:trips", ], entity_rows=[{ "driver": 1, "customer": 5 }], ).to_dict() assert result["driver_locations__lon"] == ["1.0"] assert result["customer_driver_combined__trips"] == [7] # Wait a bit so that an arbitrary TTL would take effect time.sleep(2) # Rename the registry.db so that it cant be used for refreshes os.rename(store.config.registry, store.config.registry + "_fake") # TTL is infinite so this method should use registry cache result = fs_infinite_ttl.get_online_features( feature_refs=[ "driver_locations:lon", "customer_profile:avg_orders_day", "customer_profile:name", "customer_driver_combined:trips", ], entity_rows=[{ "driver": 1, "customer": 5 }], ).to_dict() assert result["driver_locations__lon"] == ["1.0"] assert result["customer_driver_combined__trips"] == [7] # Force registry reload (should fail because file is missing) with pytest.raises(FileNotFoundError): fs_infinite_ttl.refresh_registry() # Restore registry.db so that teardown works os.rename(store.config.registry + "_fake", store.config.registry)
def construct_test_environment( test_repo_config: IntegrationTestRepoConfig, test_suite_name: str = "integration_test", worker_id: str = "worker_id", offline_container: Optional[DockerContainer] = None, ) -> Environment: _uuid = str(uuid.uuid4()).replace("-", "")[:6] run_id = os.getenv("GITHUB_RUN_ID", default=None) run_id = f"gh_run_{run_id}_{_uuid}" if run_id else _uuid run_num = os.getenv("GITHUB_RUN_NUMBER", default=1) project = f"{test_suite_name}_{run_id}_{run_num}" offline_creator: DataSourceCreator = test_repo_config.offline_store_creator( project, offline_container=offline_container) offline_store_config = offline_creator.create_offline_store_config() if test_repo_config.online_store_creator: online_creator = test_repo_config.online_store_creator(project) online_store = (test_repo_config.online_store ) = online_creator.create_online_store() else: online_creator = None online_store = test_repo_config.online_store repo_dir_name = tempfile.mkdtemp() if test_repo_config.python_feature_server and test_repo_config.provider == "aws": from feast.infra.feature_servers.aws_lambda.config import ( AwsLambdaFeatureServerConfig, ) feature_server = AwsLambdaFeatureServerConfig( enabled=True, execution_role_name= "arn:aws:iam::402087665549:role/lambda_execution_role", ) registry = ( f"s3://feast-integration-tests/registries/{project}/registry.db" ) # type: Union[str, RegistryConfig] else: # Note: even if it's a local feature server, the repo config does not have this configured feature_server = None registry = RegistryConfig( path=str(Path(repo_dir_name) / "registry.db"), cache_ttl_seconds=1, ) config = RepoConfig( registry=registry, project=project, provider=test_repo_config.provider, offline_store=offline_store_config, online_store=online_store, repo_path=repo_dir_name, feature_server=feature_server, go_feature_retrieval=test_repo_config.go_feature_retrieval, ) # Create feature_store.yaml out of the config with open(Path(repo_dir_name) / "feature_store.yaml", "w") as f: yaml.safe_dump(json.loads(config.json()), f) fs = FeatureStore(repo_dir_name) # We need to initialize the registry, because if nothing is applied in the test before tearing down # the feature store, that will cause the teardown method to blow up. fs.registry._initialize_registry() environment = Environment( name=project, test_repo_config=test_repo_config, feature_store=fs, data_source_creator=offline_creator, python_feature_server=test_repo_config.python_feature_server, worker_id=worker_id, online_store_creator=online_creator, ) return environment