def online_read( self, project: str, table: Union[FeatureTable, FeatureView], entity_key: EntityKeyProto, ) -> Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]: entity_key_bin = serialize_entity_key(entity_key) conn = self._get_conn() cur = conn.cursor() cur.execute( f"SELECT feature_name, value, event_ts FROM {_table_id(project, table)} WHERE entity_key = ?", (entity_key_bin, ), ) res = {} res_ts = None for feature_name, val_bin, ts in cur.fetchall(): val = ValueProto() val.ParseFromString(val_bin) res[feature_name] = val res_ts = ts if not res: return None, None else: return res_ts, res
def test_basic_retrieve_online_multiple_featureset(client, cust_trans_df, driver_df): # Poll serving for feature values until the correct values are returned while True: time.sleep(1) # Test retrieve with different variations of the string feature refs # ie feature set inference for feature refs without specified feature set feature_ref_df_mapping = [ ("customer_transactions:daily_transactions", cust_trans_df), ("driver:rating", driver_df), ("total_transactions", cust_trans_df), ] response = client.get_online_features( entity_rows=[ GetOnlineFeaturesRequest.EntityRow( fields={ "customer_id": Value( int64_val=cust_trans_df.iloc[0]["customer_id"] ), "driver_id": Value( int64_val=driver_df.iloc[0]["driver_id"] ) } ) ], feature_refs=[mapping[0] for mapping in feature_ref_df_mapping], ) # type: GetOnlineFeaturesResponse if response is None: continue def check_response(ingest_df, response, feature_ref): returned_value = float( response.field_values[0] .fields[feature_ref] .float_val ) feature_ref_splits = feature_ref.split(":") if len(feature_ref_splits) == 1: feature_name = feature_ref else: _, feature_name = feature_ref_splits sent_value = float( ingest_df.iloc[0][feature_name]) return math.isclose( sent_value, returned_value, abs_tol=FLOAT_TOLERANCE, ) if all([check_response(df, response, ref) for ref, df in feature_ref_df_mapping]): break
def test_large_volume_retrieve_online_success(client, large_volume_dataframe): # Poll serving for feature values until the correct values are returned while True: time.sleep(1) response = client.get_online_features( entity_rows=[ GetOnlineFeaturesRequest.EntityRow( fields={ "customer_id": Value(int64_val=large_volume_dataframe.iloc[0] ["customer_id"]) }) ], feature_ids=[ "customer_transactions_large:1:daily_transactions", "customer_transactions_large:1:total_transactions", ], ) # type: GetOnlineFeaturesResponse if response is None: continue returned_daily_transactions = float(response.field_values[0].fields[ "customer_transactions_large:1:daily_transactions"].float_val) sent_daily_transactions = float( large_volume_dataframe.iloc[0]["daily_transactions"]) if math.isclose( sent_daily_transactions, returned_daily_transactions, abs_tol=FLOAT_TOLERANCE, ): break
def test_basic_retrieve_online_success(client, cust_trans_df): # Poll serving for feature values until the correct values are returned while True: time.sleep(1) response = client.get_online_features( entity_rows=[ GetOnlineFeaturesRequest.EntityRow( fields={ "customer_id": Value(int64_val=cust_trans_df.iloc[0]["customer_id"]) }) ], # Test retrieve with different variations of the string feature refs feature_refs=[ "daily_transactions", "total_transactions", ]) # type: GetOnlineFeaturesResponse if response is None: continue returned_daily_transactions = float( response.field_values[0].fields["daily_transactions"].float_val) sent_daily_transactions = float( cust_trans_df.iloc[0]["daily_transactions"]) if math.isclose( sent_daily_transactions, returned_daily_transactions, abs_tol=FLOAT_TOLERANCE, ): break
def test_large_volume_retrieve_online_success(client, large_volume_dataframe): # Poll serving for feature values until the correct values are returned feature_refs = [ "daily_transactions_large", "total_transactions_large", ] while True: response = client.get_online_features( entity_rows=[ GetOnlineFeaturesRequest.EntityRow( fields={ "customer_id": Value(int64_val=large_volume_dataframe.iloc[0] ["customer_id"]) }) ], feature_refs=feature_refs, ) # type: GetOnlineFeaturesResponse is_ok = all([ check_online_response(ref, large_volume_dataframe, response) for ref in feature_refs ]) return None, is_ok wait_retry_backoff( retry_fn=try_get_features, timeout_secs=90, timeout_msg="Timed out trying to get online feature values")
def online_read( self, project: str, table: FeatureTable, entity_key: EntityKeyProto ) -> Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]: client = self._initialize_client() document_id = compute_datastore_entity_id(entity_key) key = client.key("Project", project, "Table", table.name, "Row", document_id) value = client.get(key) if value is not None: res = {} for feature_name, value_bin in value["values"].items(): val = ValueProto() val.ParseFromString(value_bin) res[feature_name] = val return value["event_ts"], res else: return None, None
def try_get_features(): feature_refs = [mapping[0] for mapping in feature_ref_df_mapping] response = client.get_online_features( entity_rows=[ GetOnlineFeaturesRequest.EntityRow( fields={ "customer_id": Value(int64_val=cust_trans_df.iloc[0]["customer_id"]), "driver_id": Value(int64_val=driver_df.iloc[0]["driver_id"]), }) ], feature_refs=feature_refs, ) # type: GetOnlineFeaturesResponse is_ok = all([ check_online_response(ref, df, response) for ref, df in feature_ref_df_mapping ]) return response, is_ok
def produce_feature_rows(entity_name, feature_infos, feature_values_filepath, bootstrap_servers, topic): producer = KafkaProducer(bootstrap_servers=bootstrap_servers) feature_values = pd.read_csv( feature_values_filepath, names=["id", "event_timestamp"] + [f["name"] for f in feature_infos], dtype=dict([("id", np.string_)] + [(f["name"], f["dtype"]) for f in feature_infos]), parse_dates=["event_timestamp"], ) for i, row in feature_values.iterrows(): feature_row = FeatureRow() feature_row.entityKey = row["id"] feature_row.entityName = entity_name timestamp = Timestamp() timestamp.FromJsonString( row["event_timestamp"].strftime("%Y-%m-%dT%H:%M:%SZ")) feature_row.eventTimestamp.CopyFrom(timestamp) for info in feature_infos: feature = Feature() feature.id = info["id"] feature_value = Value() feature_name = info["name"] if info["dtype"] is "Int64": feature_value.int64Val = row[feature_name] elif info["dtype"] is "Int32": feature_value.int32Val = row[feature_name] elif info["dtype"] is np.float64: feature_value.doubleVal = row[feature_name] else: raise RuntimeError( f"Unsupported dtype: {info['dtype']}\n" "Supported valueType: INT32, INT64, FLOAT, DOUBLE\n" "Please update your feature specs in testdata/feature_specs folder" ) feature.value.CopyFrom(feature_value) feature_row.features.extend([feature]) producer.send(topic, feature_row.SerializeToString()) producer.flush()
def _create_query_features_response(self, entity_name, entities): response = QueryFeaturesResponse(entityName=entity_name) for entity_id, feature_map in entities.items(): feature = {} for feature_id, feature_value in feature_map.items(): feature[feature_id] = FeatureValue( value=Value(int32Val=feature_value[0]), timestamp=feature_value[1]) entity_pb = serving_pb.Entity(features=feature) response.entities[entity_id].CopyFrom(entity_pb) return response
def try_get_features(): response = client.get_online_features( entity_rows=[ GetOnlineFeaturesRequest.EntityRow( fields={"user_id": Value( int64_val=all_types_dataframe.iloc[0]["user_id"])} ) ], feature_refs=feature_refs, ) # type: GetOnlineFeaturesResponse is_ok = check_online_response("float_feature", all_types_dataframe, response) return response, is_ok
def try_get_features(): response = client.get_online_features(entity_rows=[ GetOnlineFeaturesRequest.EntityRow( fields={ "customer_id": Value(int64_val=cust_trans_df.iloc[0]["customer_id"]) }) ], feature_refs=feature_refs) # type: GetOnlineFeaturesResponse is_ok = all([ check_online_response(ref, cust_trans_df, response) for ref in feature_refs ]) return response, is_ok
def test_all_types_retrieve_online_success(client, all_types_dataframe): # Poll serving for feature values until the correct values are returned while True: time.sleep(1) response = client.get_online_features( entity_rows=[ GetOnlineFeaturesRequest.EntityRow( fields={"user_id": Value( int64_val=all_types_dataframe.iloc[0]["user_id"])} ) ], feature_refs=[ "float_feature", "int64_feature", "int32_feature", "string_feature", "bytes_feature", "bool_feature", "double_feature", "float_list_feature", "int64_list_feature", "int32_list_feature", "string_list_feature", "bytes_list_feature", "double_list_feature", ], ) # type: GetOnlineFeaturesResponse if response is None: continue returned_float_list = ( response.field_values[0] .fields[PROJECT_NAME+"/float_list_feature"] .float_list_val.val ) sent_float_list = all_types_dataframe.iloc[0]["float_list_feature"] if math.isclose( returned_float_list[0], sent_float_list[0], abs_tol=FLOAT_TOLERANCE ): break
def test_basic_retrieve_online_entity_listform(client, list_entity_dataframe): # Case 1: Features retrieval with entity in list format check district_fs = FeatureSet( name="district", features=[ Feature(name="district_rating", dtype=ValueType.INT64), Feature(name="district_cost", dtype=ValueType.FLOAT), Feature(name="district_past_transactions_int", dtype=ValueType.INT64_LIST), Feature(name="district_past_transactions_double", dtype=ValueType.DOUBLE_LIST), Feature(name="district_past_transactions_float", dtype=ValueType.FLOAT_LIST), Feature(name="district_past_transactions_string", dtype=ValueType.STRING_LIST), Feature(name="district_past_transactions_bool", dtype=ValueType.BOOL_LIST), ], entities=[Entity("district_ids", dtype=ValueType.INT64_LIST)], max_age=Duration(seconds=3600), ) client.set_project(PROJECT_NAME) client.apply(district_fs) district_fs = client.get_feature_set(name="district") client.ingest(district_fs, list_entity_dataframe, timeout=600) time.sleep(15) online_request_entity = [{ "district_ids": [np.int64(1), np.int64(2), np.int64(3)] }] online_request_features = [ "district_rating", "district_cost", "district_past_transactions_int", "district_past_transactions_double", "district_past_transactions_float", "district_past_transactions_string", "district_past_transactions_bool", ] online_request_entity2 = [{ "district_ids": Value(int64_list_val=Int64List(val=[1, 2, 3])) }] def try_get_features1(): response = client.get_online_features( entity_rows=online_request_entity, feature_refs=online_request_features) return response, True def try_get_features2(): response = client.get_online_features( entity_rows=online_request_entity2, feature_refs=online_request_features) return response, True online_features_actual = wait_retry_backoff( retry_fn=try_get_features1, timeout_secs=90, timeout_msg="Timed out trying to get online feature values", ) online_features_actual2 = wait_retry_backoff( retry_fn=try_get_features2, timeout_secs=90, timeout_msg="Timed out trying to get online feature values", ) online_features_expected = { "district_ids": [[np.int64(1), np.int64(2), np.int64(3)]], "district_rating": [1], "district_cost": [1.5], "district_past_transactions_int": [[1, 3]], "district_past_transactions_double": [[1.5, 3.0]], "district_past_transactions_float": [[1.5, 3.0]], "district_past_transactions_string": [["first_1", "second_1"]], "district_past_transactions_bool": [[True, False]], } assert online_features_actual.to_dict() == online_features_expected assert online_features_actual2.to_dict() == online_features_expected # Case 2: Features retrieval with entity in list format check with mixed types with pytest.raises(ValueError) as excinfo: online_request_entity2 = [{ "district_ids": [np.int64(1), np.int64(2), True] }] online_features_actual2 = client.get_online_features( entity_rows=online_request_entity2, feature_refs=online_request_features) assert ( "List value type for field district_ids is inconsistent. ValueType.INT64 different from ValueType.BOOL." in str(excinfo.value))
def test_basic_retrieve_online_entity_nonlistform(client, nonlist_entity_dataframe, list_entity_dataframe): # Case 1: Feature retrieval with multiple entities retrieval check customer_fs = FeatureSet( name="customer2", features=[ Feature(name="customer2_rating", dtype=ValueType.INT64), Feature(name="customer2_cost", dtype=ValueType.FLOAT), Feature(name="customer2_past_transactions_int", dtype=ValueType.INT64_LIST), Feature(name="customer2_past_transactions_double", dtype=ValueType.DOUBLE_LIST), Feature(name="customer2_past_transactions_float", dtype=ValueType.FLOAT_LIST), Feature(name="customer2_past_transactions_string", dtype=ValueType.STRING_LIST), Feature(name="customer2_past_transactions_bool", dtype=ValueType.BOOL_LIST), ], entities=[Entity("customer_id2", ValueType.INT64)], max_age=Duration(seconds=3600), ) client.set_project(PROJECT_NAME) client.apply(customer_fs) customer_fs = client.get_feature_set(name="customer2") client.ingest(customer_fs, nonlist_entity_dataframe, timeout=600) time.sleep(15) online_request_entity = [{"customer_id2": 0}, {"customer_id2": 1}] online_request_features = [ "customer2_rating", "customer2_cost", "customer2_past_transactions_int", "customer2_past_transactions_double", "customer2_past_transactions_float", "customer2_past_transactions_string", "customer2_past_transactions_bool", ] online_request_entity2 = [ { "customer_id2": Value(int64_val=0) }, { "customer_id2": Value(int64_val=1) }, ] def try_get_features1(): response = client.get_online_features( entity_rows=online_request_entity, feature_refs=online_request_features) return response, True def try_get_features2(): response = client.get_online_features( entity_rows=online_request_entity2, feature_refs=online_request_features) return response, True online_features_actual1 = wait_retry_backoff( retry_fn=try_get_features1, timeout_secs=90, timeout_msg="Timed out trying to get online feature values", ) online_features_actual2 = wait_retry_backoff( retry_fn=try_get_features2, timeout_secs=90, timeout_msg="Timed out trying to get online feature values", ) online_features_expected = { "customer_id2": [0, 1], "customer2_rating": [0, 1], "customer2_cost": [0.5, 1.5], "customer2_past_transactions_int": [[0, 2], [1, 3]], "customer2_past_transactions_double": [[0.5, 2.0], [1.5, 3.0]], "customer2_past_transactions_float": [[0.5, 2.0], [1.5, 3.0]], "customer2_past_transactions_string": [ ["first_0", "second_0"], ["first_1", "second_1"], ], "customer2_past_transactions_bool": [[True, False], [True, False]], } assert online_features_actual1.to_dict() == online_features_expected assert online_features_actual2.to_dict() == online_features_expected # Case 2: Feature retrieval with multiple entities retrieval check with mixed types with pytest.raises(TypeError) as excinfo: online_request_entity2 = [{ "customer_id": 0 }, { "customer_id": "error_pls" }] online_features_actual2 = client.get_online_features( entity_rows=online_request_entity2, feature_refs=online_request_features) assert ( "Input entity customer_id has mixed types, ValueType.STRING and ValueType.INT64. That is not allowed." in str(excinfo.value))
def test_basic(client): cust_trans_fs = client.get_feature_set(name="customer_transactions", version=1) # TODO: Fix source handling in Feast Core to support true idempotent # applies. In this case, applying a feature set without a source will # create a new feature set every time. if cust_trans_fs is None: # Load feature set from file cust_trans_fs = FeatureSet.from_yaml("basic/cust_trans_fs.yaml") # Register feature set client.apply(cust_trans_fs) # Feast Core needs some time to fully commit the FeatureSet applied # when there is no existing job yet for the Featureset time.sleep(15) cust_trans_fs = client.get_feature_set(name="customer_transactions", version=1) if cust_trans_fs is None: raise Exception( "Client cannot retrieve 'customer_transactions' FeatureSet " "after registration. Either Feast Core does not save the " "FeatureSet correctly or the client needs to wait longer for FeatureSet " "to be committed.") offset = random.randint(1000, 100000) # ensure a unique key space is used customer_data = pd.DataFrame({ "datetime": [datetime.utcnow().replace(tzinfo=pytz.utc) for _ in range(5)], "customer_id": [offset + inc for inc in range(5)], "daily_transactions": [np.random.rand() for _ in range(5)], "total_transactions": [512 for _ in range(5)], }) # Ingest customer transaction data cust_trans_fs.ingest(dataframe=customer_data) # Poll serving for feature values until the correct values are returned while True: time.sleep(1) response = client.get_online_features( entity_rows=[ GetOnlineFeaturesRequest.EntityRow( fields={ "customer_id": Value(int64_val=customer_data.iloc[0]["customer_id"]) }) ], feature_ids=[ "customer_transactions:1:daily_transactions", "customer_transactions:1:total_transactions", ], ) # type: GetOnlineFeaturesResponse if response is None: continue returned_daily_transactions = float( response.field_values[0]. fields["customer_transactions:1:daily_transactions"].float_val) sent_daily_transactions = float( customer_data.iloc[0]["daily_transactions"]) if math.isclose( sent_daily_transactions, returned_daily_transactions, abs_tol=FLOAT_TOLERANCE, ): break
def test_all_types(client): all_types_fs = client.get_feature_set(name="all_types", version=1) if all_types_fs is None: # Register new feature set if it doesnt exist all_types_fs = FeatureSet( name="all_types", entities=[Entity(name="user_id", dtype=ValueType.INT64)], features=[ Feature(name="float_feature", dtype=ValueType.FLOAT), Feature(name="int64_feature", dtype=ValueType.INT64), Feature(name="int32_feature", dtype=ValueType.INT32), Feature(name="string_feature", dtype=ValueType.STRING), Feature(name="bytes_feature", dtype=ValueType.BYTES), Feature(name="bool_feature", dtype=ValueType.BOOL), Feature(name="double_feature", dtype=ValueType.DOUBLE), Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST), Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST), Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST), Feature(name="string_list_feature", dtype=ValueType.STRING_LIST), Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST), Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST), Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST), ], max_age=Duration(seconds=3600), ) # Register feature set client.apply(all_types_fs) # Feast Core needs some time to fully commit the FeatureSet applied # when there is no existing job yet for the Featureset time.sleep(10) all_types_fs = client.get_feature_set(name="all_types", version=1) if all_types_fs is None: raise Exception( "Client cannot retrieve 'all_types_fs' FeatureSet " "after registration. Either Feast Core does not save the " "FeatureSet correctly or the client needs to wait longer for FeatureSet " "to be committed.") all_types_df = pd.DataFrame({ "datetime": [datetime.utcnow().replace(tzinfo=pytz.utc) for _ in range(3)], "user_id": [1001, 1002, 1003], "int32_feature": [np.int32(1), np.int32(2), np.int32(3)], "int64_feature": [np.int64(1), np.int64(2), np.int64(3)], "float_feature": [np.float(0.1), np.float(0.2), np.float(0.3)], "double_feature": [np.float64(0.1), np.float64(0.2), np.float64(0.3)], "string_feature": ["one", "two", "three"], "bytes_feature": [b"one", b"two", b"three"], "bool_feature": [True, False, False], "int32_list_feature": [ np.array([1, 2, 3, 4], dtype=np.int32), np.array([1, 2, 3, 4], dtype=np.int32), np.array([1, 2, 3, 4], dtype=np.int32), ], "int64_list_feature": [ np.array([1, 2, 3, 4], dtype=np.int64), np.array([1, 2, 3, 4], dtype=np.int64), np.array([1, 2, 3, 4], dtype=np.int64), ], "float_list_feature": [ np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float32), np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float32), np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float32), ], "double_list_feature": [ np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float64), np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float64), np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float64), ], "string_list_feature": [ np.array(["one", "two", "three"]), np.array(["one", "two", "three"]), np.array(["one", "two", "three"]), ], "bytes_list_feature": [ np.array([b"one", b"two", b"three"]), np.array([b"one", b"two", b"three"]), np.array([b"one", b"two", b"three"]), ], "bool_list_feature": [ np.array([True, False, True]), np.array([True, False, True]), np.array([True, False, True]), ], }) # Ingest user embedding data all_types_fs.ingest(dataframe=all_types_df) time.sleep(3) # Poll serving for feature values until the correct values are returned while True: time.sleep(1) response = client.get_online_features( entity_rows=[ GetOnlineFeaturesRequest.EntityRow( fields={ "user_id": Value( int64_val=all_types_df.iloc[0]["user_id"]) }) ], feature_ids=[ "all_types:1:float_feature", "all_types:1:int64_feature", "all_types:1:int32_feature", "all_types:1:string_feature", "all_types:1:bytes_feature", "all_types:1:bool_feature", "all_types:1:double_feature", "all_types:1:float_list_feature", "all_types:1:int64_list_feature", "all_types:1:int32_list_feature", "all_types:1:string_list_feature", "all_types:1:bytes_list_feature", "all_types:1:bool_list_feature", "all_types:1:double_list_feature", ], ) # type: GetOnlineFeaturesResponse if response is None: continue returned_float_list = ( response.field_values[0].fields["all_types:1:float_list_feature"]. float_list_val.val) sent_float_list = all_types_df.iloc[0]["float_list_feature"] # TODO: Add tests for each value and type if math.isclose(returned_float_list[0], sent_float_list[0], abs_tol=FLOAT_TOLERANCE): break # Wait for values to appear in Serving time.sleep(1)