def run_materialization_test(fs: FeatureStore, fv: FeatureView) -> None: now = datetime.utcnow() # Run materialize() # use both tz-naive & tz-aware timestamps to test that they're both correctly handled start_date = (now - timedelta(hours=5)).replace(tzinfo=utc) end_date = now - timedelta(hours=2) fs.materialize([fv.name], start_date, end_date) # check result of materialize() response_dict = fs.get_online_features([f"{fv.name}:value"], [{ "driver_id": 1 }]).to_dict() assert abs(response_dict[f"{fv.name}__value"][0] - 0.3) < 1e-6 # check prior value for materialize_incremental() response_dict = fs.get_online_features([f"{fv.name}:value"], [{ "driver_id": 3 }]).to_dict() assert abs(response_dict[f"{fv.name}__value"][0] - 4) < 1e-6 # run materialize_incremental() fs.materialize_incremental( [fv.name], now - timedelta(seconds=0), ) # check result of materialize_incremental() response_dict = fs.get_online_features([f"{fv.name}:value"], [{ "driver_id": 3 }]).to_dict() assert abs(response_dict[f"{fv.name}__value"][0] - 5) < 1e-6
def check_offline_and_online_features( fs: FeatureStore, fv: FeatureView, driver_id: int, event_timestamp: datetime, expected_value: Optional[float], ) -> None: # Check online store response_dict = fs.get_online_features( [f"{fv.name}:value"], [{"driver": driver_id}] ).to_dict() if expected_value: assert abs(response_dict[f"{fv.name}__value"][0] - expected_value) < 1e-6 else: assert response_dict[f"{fv.name}__value"][0] is None # Check offline store df = fs.get_historical_features( entity_df=pd.DataFrame.from_dict( {"driver_id": [driver_id], "event_timestamp": [event_timestamp]} ), feature_refs=[f"{fv.name}:value"], ).to_df() if expected_value: assert abs(df.to_dict()[f"{fv.name}__value"][0] - expected_value) < 1e-6 else: df = df.where(pd.notnull(df), None) assert df.to_dict()[f"{fv.name}__value"][0] is None
def test_bigquery_query_to_datastore_correctness(self): # create dataset ts = pd.Timestamp.now(tz="UTC").round("ms") data = { "id": [1, 2, 1], "value": [0.1, 0.2, 0.3], "ts_1": [ts - timedelta(minutes=2), ts, ts], "created_ts": [ts, ts, ts], } df = pd.DataFrame.from_dict(data) # load dataset into BigQuery job_config = bigquery.LoadJobConfig() table_id = f"{self.gcp_project}.{self.bigquery_dataset}.query_correctness_{int(time.time())}" query = f"SELECT * FROM `{table_id}`" job = self.client.load_table_from_dataframe(df, table_id, job_config=job_config) job.result() # create FeatureView fv = FeatureView( name="test_bq_query_correctness", entities=["driver_id"], features=[Feature("value", ValueType.FLOAT)], ttl=timedelta(minutes=5), input=BigQuerySource( event_timestamp_column="ts", created_timestamp_column="created_ts", field_mapping={ "ts_1": "ts", "id": "driver_id" }, date_partition_column="", query=query, ), ) config = RepoConfig( metadata_store="./metadata.db", project=f"test_bq_query_correctness_{int(time.time())}", provider="gcp", ) fs = FeatureStore(config=config) fs.apply([fv]) # run materialize() fs.materialize( [fv.name], datetime.utcnow() - timedelta(minutes=5), datetime.utcnow() - timedelta(minutes=0), ) # check result of materialize() response_dict = fs.get_online_features([f"{fv.name}:value"], [{ "driver_id": 1 }]).to_dict() assert abs(response_dict[f"{fv.name}:value"][0] - 0.3) < 1e-6
def check_offline_and_online_features( fs: FeatureStore, fv: FeatureView, driver_id: int, event_timestamp: datetime, expected_value: Optional[float], full_feature_names: bool, ) -> None: # Check online store response_dict = fs.get_online_features( [f"{fv.name}:value"], [{ "driver": driver_id }], full_feature_names=full_feature_names, ).to_dict() if full_feature_names: if expected_value: assert abs(response_dict[f"{fv.name}__value"][0] - expected_value) < 1e-6 else: assert response_dict[f"{fv.name}__value"][0] is None else: if expected_value: assert abs(response_dict["value"][0] - expected_value) < 1e-6 else: assert response_dict["value"][0] is None # Check offline store df = fs.get_historical_features( entity_df=pd.DataFrame.from_dict({ "driver_id": [driver_id], "event_timestamp": [event_timestamp] }), features=[f"{fv.name}:value"], full_feature_names=full_feature_names, ).to_df() if full_feature_names: if expected_value: assert abs(df.to_dict()[f"{fv.name}__value"][0] - expected_value) < 1e-6 else: assert math.isnan(df.to_dict()[f"{fv.name}__value"][0]) else: if expected_value: assert abs(df.to_dict()["value"][0] - expected_value) < 1e-6 else: assert math.isnan(df.to_dict()["value"][0])