def test_basic_register_feature_set_success(client): # Register feature set without project cust_trans_fs_expected = FeatureSet.from_yaml( f"{DIR_PATH}/basic/cust_trans_fs.yaml") driver_fs_expected = FeatureSet.from_yaml( f"{DIR_PATH}/basic/driver_fs.yaml") client.apply(cust_trans_fs_expected) client.apply(driver_fs_expected) cust_trans_fs_actual = client.get_feature_set("customer_transactions") assert cust_trans_fs_actual == cust_trans_fs_expected driver_fs_actual = client.get_feature_set("driver") assert driver_fs_actual == driver_fs_expected # Register feature set with project cust_trans_fs_expected = FeatureSet.from_yaml( f"{DIR_PATH}/basic/cust_trans_fs.yaml") client.set_project(PROJECT_NAME) client.apply(cust_trans_fs_expected) cust_trans_fs_actual = client.get_feature_set("customer_transactions", project=PROJECT_NAME) assert cust_trans_fs_actual == cust_trans_fs_expected # Register feature set with labels driver_unlabelled_fs = FeatureSet( "driver_unlabelled", features=[ Feature("rating", ValueType.FLOAT), Feature("cost", ValueType.FLOAT) ], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) driver_labeled_fs_expected = FeatureSet( "driver_labeled", features=[ Feature("rating", ValueType.FLOAT), Feature("cost", ValueType.FLOAT) ], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), labels={"key1": "val1"}, ) client.set_project(PROJECT_NAME) client.apply(driver_unlabelled_fs) client.apply(driver_labeled_fs_expected) driver_fs_actual = client.list_feature_sets(project=PROJECT_NAME, labels={"key1": "val1"})[0] assert driver_fs_actual == driver_labeled_fs_expected # reset client's project for other tests client.set_project()
def test_sources_deduplicate_ingest_jobs(client): source = KafkaSource("localhost:9092", "feast-features") alt_source = KafkaSource("localhost:9092", "feast-data") def get_running_jobs(): return [ job for job in client.list_ingest_jobs() if job.status == IngestionJobStatus.RUNNING ] # stop all ingest jobs ingest_jobs = client.list_ingest_jobs() for ingest_job in ingest_jobs: client.stop_ingest_job(ingest_job) for ingest_job in ingest_jobs: ingest_job.wait(IngestionJobStatus.ABORTED) # register multiple featuresets with the same source # only one ingest job should spawned due to test ingest job deduplication cust_trans_fs = FeatureSet.from_yaml( f"{DIR_PATH}/basic/cust_trans_fs.yaml") driver_fs = FeatureSet.from_yaml(f"{DIR_PATH}/basic/driver_fs.yaml") cust_trans_fs.source, driver_fs.source = source, source client.apply(cust_trans_fs) client.apply(driver_fs) while len(get_running_jobs()) != 1: assert 0 <= len(get_running_jobs()) <= 1 time.sleep(1) # update feature sets with different sources, should spawn 2 ingest jobs driver_fs.source = alt_source client.apply(driver_fs) while len(get_running_jobs()) != 2: assert 1 <= len(get_running_jobs()) <= 2 time.sleep(1) # update feature sets with same source again, should spawn only 1 ingest job driver_fs.source = source client.apply(driver_fs) while len(get_running_jobs()) != 1: assert 1 <= len(get_running_jobs()) <= 2 time.sleep(1)
def test_basic_register_feature_set_success(client): # Register feature set without project cust_trans_fs_expected = FeatureSet.from_yaml("basic/cust_trans_fs.yaml") driver_fs_expected = FeatureSet.from_yaml("basic/driver_fs.yaml") client.apply(cust_trans_fs_expected) client.apply(driver_fs_expected) cust_trans_fs_actual = client.get_feature_set("customer_transactions") assert cust_trans_fs_actual == cust_trans_fs_expected driver_fs_actual = client.get_feature_set("driver") assert driver_fs_actual == driver_fs_expected # Register feature set with project cust_trans_fs_expected = FeatureSet.from_yaml("basic/cust_trans_fs.yaml") client.set_project(PROJECT_NAME) client.apply(cust_trans_fs_expected) cust_trans_fs_actual = client.get_feature_set("customer_transactions", project=PROJECT_NAME) assert cust_trans_fs_actual == cust_trans_fs_expected # reset client's project for other tests client.set_project()
def test_export_tfx_schema(self): tests_folder = pathlib.Path(__file__).parent test_input_feature_set = FeatureSet.from_yaml( str(tests_folder / "data" / "tensorflow_metadata" / "bikeshare_feature_set.yaml")) expected_schema_json = open(tests_folder / "data" / "tensorflow_metadata" / "bikeshare_schema.json").read() expected_schema = schema_pb2.Schema() json_format.Parse(expected_schema_json, expected_schema) _make_tfx_schema_domain_info_inline(expected_schema) actual_schema = test_input_feature_set.export_tfx_schema() assert len(actual_schema.feature) == len(expected_schema.feature) for actual, expected in zip(actual_schema.feature, expected_schema.feature): assert actual.SerializeToString() == expected.SerializeToString()
def test_large_volume_register_feature_set_success(client): cust_trans_fs = FeatureSet.from_yaml( "large_volume/cust_trans_large_fs.yaml") # Register feature set client.apply(cust_trans_fs) # Feast Core needs some time to fully commit the FeatureSet applied # when there is no existing job yet for the Featureset time.sleep(10) cust_trans_fs_applied = client.get_feature_set( name="customer_transactions_large") if cust_trans_fs is None: raise Exception( "Client cannot retrieve 'customer_transactions' FeatureSet " "after registration. Either Feast Core does not save the " "FeatureSet correctly or the client needs to wait longer for FeatureSet " "to be committed.")
def test_basic_register_feature_set_success(client): # Load feature set from file cust_trans_fs_expected = FeatureSet.from_yaml("basic/cust_trans_fs.yaml") client.set_project(PROJECT_NAME) # Register feature set client.apply(cust_trans_fs_expected) cust_trans_fs_actual = client.get_feature_set(name="customer_transactions") assert cust_trans_fs_actual == cust_trans_fs_expected if cust_trans_fs_actual is None: raise Exception( "Client cannot retrieve 'customer_transactions' FeatureSet " "after registration. Either Feast Core does not save the " "FeatureSet correctly or the client needs to wait longer for FeatureSet " "to be committed.")
def test_feature_set_import_export_yaml(self): test_feature_set = FeatureSet( name="bikeshare", entities=[Entity(name="station_id", dtype=ValueType.INT64)], features=[ Feature(name="name", dtype=ValueType.STRING), Feature(name="longitude", dtype=ValueType.FLOAT), Feature(name="location", dtype=ValueType.STRING), ], ) # Create a string YAML representation of the feature set string_yaml = test_feature_set.to_yaml() # Create a new feature set object from the YAML string actual_feature_set_from_string = FeatureSet.from_yaml(string_yaml) # Ensure equality is upheld to original feature set assert test_feature_set == actual_feature_set_from_string
def test_basic_register_feature_set_success(client): # Load feature set from file cust_trans_fs_expected = FeatureSet.from_yaml("basic/cust_trans_fs.yaml") # Register feature set client.apply(cust_trans_fs_expected) # Feast Core needs some time to fully commit the FeatureSet applied # when there is no existing job yet for the Featureset time.sleep(15) cust_trans_fs_actual = client.get_feature_set(name="customer_transactions") assert cust_trans_fs_actual == cust_trans_fs_expected if cust_trans_fs_actual is None: raise Exception( "Client cannot retrieve 'customer_transactions' FeatureSet " "after registration. Either Feast Core does not save the " "FeatureSet correctly or the client needs to wait longer for FeatureSet " "to be committed.")
def test_all_types_parquet_register_feature_set_success(client): # Load feature set from file all_types_parquet_expected = FeatureSet.from_yaml( f"{DIR_PATH}/all_types_parquet/all_types_parquet.yaml") # Register feature set client.apply(all_types_parquet_expected) # Feast Core needs some time to fully commit the FeatureSet applied # when there is no existing job yet for the Featureset time.sleep(30) all_types_parquet_actual = client.get_feature_set(name="all_types_parquet") assert all_types_parquet_actual == all_types_parquet_expected if all_types_parquet_actual is None: raise Exception( "Client cannot retrieve 'customer_transactions' FeatureSet " "after registration. Either Feast Core does not save the " "FeatureSet correctly or the client needs to wait longer for FeatureSet " "to be committed.")
def test_basic(client): cust_trans_fs = client.get_feature_set(name="customer_transactions", version=1) # TODO: Fix source handling in Feast Core to support true idempotent # applies. In this case, applying a feature set without a source will # create a new feature set every time. if cust_trans_fs is None: # Load feature set from file cust_trans_fs = FeatureSet.from_yaml("basic/cust_trans_fs.yaml") # Register feature set client.apply(cust_trans_fs) # Feast Core needs some time to fully commit the FeatureSet applied # when there is no existing job yet for the Featureset time.sleep(15) cust_trans_fs = client.get_feature_set(name="customer_transactions", version=1) if cust_trans_fs is None: raise Exception( "Client cannot retrieve 'customer_transactions' FeatureSet " "after registration. Either Feast Core does not save the " "FeatureSet correctly or the client needs to wait longer for FeatureSet " "to be committed.") offset = random.randint(1000, 100000) # ensure a unique key space is used customer_data = pd.DataFrame({ "datetime": [datetime.utcnow().replace(tzinfo=pytz.utc) for _ in range(5)], "customer_id": [offset + inc for inc in range(5)], "daily_transactions": [np.random.rand() for _ in range(5)], "total_transactions": [512 for _ in range(5)], }) # Ingest customer transaction data cust_trans_fs.ingest(dataframe=customer_data) # Poll serving for feature values until the correct values are returned while True: time.sleep(1) response = client.get_online_features( entity_rows=[ GetOnlineFeaturesRequest.EntityRow( fields={ "customer_id": Value(int64_val=customer_data.iloc[0]["customer_id"]) }) ], feature_ids=[ "customer_transactions:1:daily_transactions", "customer_transactions:1:total_transactions", ], ) # type: GetOnlineFeaturesResponse if response is None: continue returned_daily_transactions = float( response.field_values[0]. fields["customer_transactions:1:daily_transactions"].float_val) sent_daily_transactions = float( customer_data.iloc[0]["daily_transactions"]) if math.isclose( sent_daily_transactions, returned_daily_transactions, abs_tol=FLOAT_TOLERANCE, ): break