def test_large_volume_retrieve_online_success(client, large_volume_dataframe): # Poll serving for feature values until the correct values are returned while True: time.sleep(1) response = client.get_online_features( entity_rows=[ GetOnlineFeaturesRequest.EntityRow( fields={ "customer_id": Value(int64_val=large_volume_dataframe.iloc[0] ["customer_id"]) }) ], feature_ids=[ "customer_transactions_large:1:daily_transactions", "customer_transactions_large:1:total_transactions", ], ) # type: GetOnlineFeaturesResponse if response is None: continue returned_daily_transactions = float(response.field_values[0].fields[ "customer_transactions_large:1:daily_transactions"].float_val) sent_daily_transactions = float( large_volume_dataframe.iloc[0]["daily_transactions"]) if math.isclose( sent_daily_transactions, returned_daily_transactions, abs_tol=FLOAT_TOLERANCE, ): break
def test_large_volume_retrieve_online_success(client, large_volume_dataframe): # Poll serving for feature values until the correct values are returned feature_refs = [ "daily_transactions_large", "total_transactions_large", ] while True: response = client.get_online_features( entity_rows=[ GetOnlineFeaturesRequest.EntityRow( fields={ "customer_id": Value(int64_val=large_volume_dataframe.iloc[0] ["customer_id"]) }) ], feature_refs=feature_refs, ) # type: GetOnlineFeaturesResponse is_ok = all([ check_online_response(ref, large_volume_dataframe, response) for ref in feature_refs ]) return None, is_ok wait_retry_backoff( retry_fn=try_get_features, timeout_secs=90, timeout_msg="Timed out trying to get online feature values")
def test_basic_retrieve_online_success(client, cust_trans_df): # Poll serving for feature values until the correct values are returned while True: time.sleep(1) response = client.get_online_features( entity_rows=[ GetOnlineFeaturesRequest.EntityRow( fields={ "customer_id": Value(int64_val=cust_trans_df.iloc[0]["customer_id"]) }) ], # Test retrieve with different variations of the string feature refs feature_refs=[ "daily_transactions", "total_transactions", ]) # type: GetOnlineFeaturesResponse if response is None: continue returned_daily_transactions = float( response.field_values[0].fields["daily_transactions"].float_val) sent_daily_transactions = float( cust_trans_df.iloc[0]["daily_transactions"]) if math.isclose( sent_daily_transactions, returned_daily_transactions, abs_tol=FLOAT_TOLERANCE, ): break
def _infer_online_entity_rows( entity_rows: List[Union[GetOnlineFeaturesRequest.EntityRow, Dict[str, Any]]], ) -> List[GetOnlineFeaturesRequest.EntityRow]: """ Builds a list of EntityRow protos from Python native type format passed by user. Args: entity_rows: A list of dictionaries where each key is an entity and each value is feast.types.Value or Python native form. Returns: A list of EntityRow protos parsed from args. """ # Maintain backward compatibility with users providing EntityRow Proto if entity_rows and isinstance(entity_rows[0], GetOnlineFeaturesRequest.EntityRow): warnings.warn( "entity_rows parameter will only be accepting Dict format from Feast v0.7 onwards", DeprecationWarning, ) entity_rows_proto = cast( List[Union[GetOnlineFeaturesRequest.EntityRow]], entity_rows) return entity_rows_proto entity_rows_dicts = cast(List[Dict[str, Any]], entity_rows) entity_row_list = [] entity_type_map = dict() for entity in entity_rows_dicts: fields = {} for key, value in entity.items(): # Allow for feast.types.Value if isinstance(value, Value): proto_value = value else: # Infer the specific type for this row current_dtype = python_type_to_feast_value_type(name=key, value=value) if key not in entity_type_map: entity_type_map[key] = current_dtype else: if current_dtype != entity_type_map[key]: raise TypeError( f"Input entity {key} has mixed types, {current_dtype} and {entity_type_map[key]}. That is not allowed. " ) proto_value = _python_value_to_proto_value( current_dtype, value) fields[key] = proto_value entity_row_list.append( GetOnlineFeaturesRequest.EntityRow(fields=fields)) return entity_row_list
def try_get_features(): response = client.get_online_features( entity_rows=[ GetOnlineFeaturesRequest.EntityRow( fields={"user_id": Value( int64_val=all_types_dataframe.iloc[0]["user_id"])} ) ], feature_refs=feature_refs, ) # type: GetOnlineFeaturesResponse is_ok = check_online_response("float_feature", all_types_dataframe, response) return response, is_ok
def test_basic_retrieve_online_multiple_featureset(client, cust_trans_df, driver_df): # Poll serving for feature values until the correct values are returned while True: time.sleep(1) # Test retrieve with different variations of the string feature refs # ie feature set inference for feature refs without specified feature set feature_ref_df_mapping = [ ("customer_transactions:daily_transactions", cust_trans_df), ("driver:rating", driver_df), ("total_transactions", cust_trans_df), ] response = client.get_online_features( entity_rows=[ GetOnlineFeaturesRequest.EntityRow( fields={ "customer_id": Value( int64_val=cust_trans_df.iloc[0]["customer_id"] ), "driver_id": Value( int64_val=driver_df.iloc[0]["driver_id"] ) } ) ], feature_refs=[mapping[0] for mapping in feature_ref_df_mapping], ) # type: GetOnlineFeaturesResponse if response is None: continue def check_response(ingest_df, response, feature_ref): returned_value = float( response.field_values[0] .fields[feature_ref] .float_val ) feature_ref_splits = feature_ref.split(":") if len(feature_ref_splits) == 1: feature_name = feature_ref else: _, feature_name = feature_ref_splits sent_value = float( ingest_df.iloc[0][feature_name]) return math.isclose( sent_value, returned_value, abs_tol=FLOAT_TOLERANCE, ) if all([check_response(df, response, ref) for ref, df in feature_ref_df_mapping]): break
def test_get_online_features(self, mock_client, mocker): ROW_COUNT = 300 mock_client._serving_service_stub = Serving.ServingServiceStub( grpc.insecure_channel("") ) fields = dict() for feature_num in range(1, 10): fields["feature_set_1:1:feature_" + str(feature_num)] = ValueProto.Value( int64_val=feature_num ) field_values = GetOnlineFeaturesResponse.FieldValues(fields=fields) response = GetOnlineFeaturesResponse() entity_rows = [] for row_number in range(1, ROW_COUNT + 1): response.field_values.append(field_values) entity_rows.append( GetOnlineFeaturesRequest.EntityRow( fields={"customer_id": ValueProto.Value(int64_val=row_number)} ) ) mocker.patch.object( mock_client._serving_service_stub, "GetOnlineFeatures", return_value=response, ) response = mock_client.get_online_features( entity_rows=entity_rows, feature_ids=[ "feature_set_1:1:feature_1", "feature_set_1:1:feature_2", "feature_set_1:1:feature_3", "feature_set_1:1:feature_4", "feature_set_1:1:feature_5", "feature_set_1:1:feature_6", "feature_set_1:1:feature_7", "feature_set_1:1:feature_8", "feature_set_1:1:feature_9", ], ) # type: GetOnlineFeaturesResponse assert ( response.field_values[0].fields["feature_set_1:1:feature_1"].int64_val == 1 and response.field_values[0].fields["feature_set_1:1:feature_9"].int64_val == 9 )
def try_get_features(): response = client.get_online_features(entity_rows=[ GetOnlineFeaturesRequest.EntityRow( fields={ "customer_id": Value(int64_val=cust_trans_df.iloc[0]["customer_id"]) }) ], feature_refs=feature_refs) # type: GetOnlineFeaturesResponse is_ok = all([ check_online_response(ref, cust_trans_df, response) for ref in feature_refs ]) return response, is_ok
def test_get_online_features(self, mocked_client, mocker): ROW_COUNT = 300 mocked_client._serving_service_stub = Serving.ServingServiceStub( grpc.insecure_channel("")) def int_val(x): return ValueProto.Value(int64_val=x) request = GetOnlineFeaturesRequest() request.features.extend([ FeatureRefProto(project="driver_project", feature_set="driver", name="age"), FeatureRefProto(project="driver_project", name="rating"), ]) recieve_response = GetOnlineFeaturesResponse() for row_number in range(1, ROW_COUNT + 1): request.entity_rows.append( GetOnlineFeaturesRequest.EntityRow( fields={"driver_id": int_val(row_number)})), field_values = GetOnlineFeaturesResponse.FieldValues( fields={ "driver_id": int_val(row_number), "driver_project/driver:age": int_val(1), "driver_project/rating": int_val(9), }) recieve_response.field_values.append(field_values) mocker.patch.object( mocked_client._serving_service_stub, "GetOnlineFeatures", return_value=recieve_response, ) got_response = mocked_client.get_online_features( entity_rows=request.entity_rows, feature_refs=["driver:age", "rating"], project="driver_project", ) # type: GetOnlineFeaturesResponse mocked_client._serving_service_stub.GetOnlineFeatures.assert_called_with( request) got_fields = got_response.field_values[0].fields assert (got_fields["driver_id"] == int_val(1) and got_fields["driver:age"] == int_val(1) and got_fields["rating"] == int_val(9))
def test_all_types_retrieve_online_success(client, all_types_dataframe): # Poll serving for feature values until the correct values are returned while True: time.sleep(1) response = client.get_online_features( entity_rows=[ GetOnlineFeaturesRequest.EntityRow( fields={"user_id": Value( int64_val=all_types_dataframe.iloc[0]["user_id"])} ) ], feature_refs=[ "float_feature", "int64_feature", "int32_feature", "string_feature", "bytes_feature", "bool_feature", "double_feature", "float_list_feature", "int64_list_feature", "int32_list_feature", "string_list_feature", "bytes_list_feature", "double_list_feature", ], ) # type: GetOnlineFeaturesResponse if response is None: continue returned_float_list = ( response.field_values[0] .fields[PROJECT_NAME+"/float_list_feature"] .float_list_val.val ) sent_float_list = all_types_dataframe.iloc[0]["float_list_feature"] if math.isclose( returned_float_list[0], sent_float_list[0], abs_tol=FLOAT_TOLERANCE ): break
def try_get_features(): feature_refs = [mapping[0] for mapping in feature_ref_df_mapping] response = client.get_online_features( entity_rows=[ GetOnlineFeaturesRequest.EntityRow( fields={ "customer_id": Value(int64_val=cust_trans_df.iloc[0]["customer_id"]), "driver_id": Value(int64_val=driver_df.iloc[0]["driver_id"]), }) ], feature_refs=feature_refs, ) # type: GetOnlineFeaturesResponse is_ok = all([ check_online_response(ref, df, response) for ref, df in feature_ref_df_mapping ]) return response, is_ok
def test_get_online_features(self, mocked_client, mocker): ROW_COUNT = 300 mocked_client._serving_service_stub = Serving.ServingServiceStub( grpc.insecure_channel("")) def int_val(x): return ValueProto.Value(int64_val=x) # serving can return feature references with projects, # get_online_features() should strip the project part. field_values = GetOnlineFeaturesResponse.FieldValues( fields={ "driver_project/driver:driver_id": int_val(1), "driver_project/driver_id": int_val(9), }) response = GetOnlineFeaturesResponse() entity_rows = [] for row_number in range(1, ROW_COUNT + 1): response.field_values.append(field_values) entity_rows.append( GetOnlineFeaturesRequest.EntityRow( fields={"customer_id": int_val(row_number)})) mocker.patch.object( mocked_client._serving_service_stub, "GetOnlineFeatures", return_value=response, ) # NOTE: Feast Serving does not allow for feature references # that specify the same feature in the same request response = mocked_client.get_online_features( entity_rows=entity_rows, feature_refs=["driver:driver_id", "driver_id"], project="driver_project", ) # type: GetOnlineFeaturesResponse assert ( response.field_values[0].fields["driver:driver_id"].int64_val == 1 and response.field_values[0].fields["driver_id"].int64_val == 9)
def _infer_online_entity_rows( entity_rows: List[Dict[str, Any]], ) -> List[GetOnlineFeaturesRequest.EntityRow]: """ Builds a list of EntityRow protos from Python native type format passed by user. Args: entity_rows: A list of dictionaries where each key is an entity and each value is feast.types.Value or Python native form. Returns: A list of EntityRow protos parsed from args. """ entity_rows_dicts = cast(List[Dict[str, Any]], entity_rows) entity_row_list = [] entity_type_map = dict() for entity in entity_rows_dicts: fields = {} for key, value in entity.items(): # Allow for feast.types.Value if isinstance(value, Value): proto_value = value else: # Infer the specific type for this row current_dtype = python_type_to_feast_value_type(name=key, value=value) if key not in entity_type_map: entity_type_map[key] = current_dtype else: if current_dtype != entity_type_map[key]: raise TypeError( f"Input entity {key} has mixed types, {current_dtype} and {entity_type_map[key]}. That is not allowed. " ) proto_value = _python_value_to_proto_value( current_dtype, value) fields[key] = proto_value entity_row_list.append( GetOnlineFeaturesRequest.EntityRow(fields=fields)) return entity_row_list
def test_basic(client): cust_trans_fs = client.get_feature_set(name="customer_transactions", version=1) # TODO: Fix source handling in Feast Core to support true idempotent # applies. In this case, applying a feature set without a source will # create a new feature set every time. if cust_trans_fs is None: # Load feature set from file cust_trans_fs = FeatureSet.from_yaml("basic/cust_trans_fs.yaml") # Register feature set client.apply(cust_trans_fs) # Feast Core needs some time to fully commit the FeatureSet applied # when there is no existing job yet for the Featureset time.sleep(15) cust_trans_fs = client.get_feature_set(name="customer_transactions", version=1) if cust_trans_fs is None: raise Exception( "Client cannot retrieve 'customer_transactions' FeatureSet " "after registration. Either Feast Core does not save the " "FeatureSet correctly or the client needs to wait longer for FeatureSet " "to be committed.") offset = random.randint(1000, 100000) # ensure a unique key space is used customer_data = pd.DataFrame({ "datetime": [datetime.utcnow().replace(tzinfo=pytz.utc) for _ in range(5)], "customer_id": [offset + inc for inc in range(5)], "daily_transactions": [np.random.rand() for _ in range(5)], "total_transactions": [512 for _ in range(5)], }) # Ingest customer transaction data cust_trans_fs.ingest(dataframe=customer_data) # Poll serving for feature values until the correct values are returned while True: time.sleep(1) response = client.get_online_features( entity_rows=[ GetOnlineFeaturesRequest.EntityRow( fields={ "customer_id": Value(int64_val=customer_data.iloc[0]["customer_id"]) }) ], feature_ids=[ "customer_transactions:1:daily_transactions", "customer_transactions:1:total_transactions", ], ) # type: GetOnlineFeaturesResponse if response is None: continue returned_daily_transactions = float( response.field_values[0]. fields["customer_transactions:1:daily_transactions"].float_val) sent_daily_transactions = float( customer_data.iloc[0]["daily_transactions"]) if math.isclose( sent_daily_transactions, returned_daily_transactions, abs_tol=FLOAT_TOLERANCE, ): break
def test_all_types(client): all_types_fs = client.get_feature_set(name="all_types", version=1) if all_types_fs is None: # Register new feature set if it doesnt exist all_types_fs = FeatureSet( name="all_types", entities=[Entity(name="user_id", dtype=ValueType.INT64)], features=[ Feature(name="float_feature", dtype=ValueType.FLOAT), Feature(name="int64_feature", dtype=ValueType.INT64), Feature(name="int32_feature", dtype=ValueType.INT32), Feature(name="string_feature", dtype=ValueType.STRING), Feature(name="bytes_feature", dtype=ValueType.BYTES), Feature(name="bool_feature", dtype=ValueType.BOOL), Feature(name="double_feature", dtype=ValueType.DOUBLE), Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST), Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST), Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST), Feature(name="string_list_feature", dtype=ValueType.STRING_LIST), Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST), Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST), Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST), ], max_age=Duration(seconds=3600), ) # Register feature set client.apply(all_types_fs) # Feast Core needs some time to fully commit the FeatureSet applied # when there is no existing job yet for the Featureset time.sleep(10) all_types_fs = client.get_feature_set(name="all_types", version=1) if all_types_fs is None: raise Exception( "Client cannot retrieve 'all_types_fs' FeatureSet " "after registration. Either Feast Core does not save the " "FeatureSet correctly or the client needs to wait longer for FeatureSet " "to be committed.") all_types_df = pd.DataFrame({ "datetime": [datetime.utcnow().replace(tzinfo=pytz.utc) for _ in range(3)], "user_id": [1001, 1002, 1003], "int32_feature": [np.int32(1), np.int32(2), np.int32(3)], "int64_feature": [np.int64(1), np.int64(2), np.int64(3)], "float_feature": [np.float(0.1), np.float(0.2), np.float(0.3)], "double_feature": [np.float64(0.1), np.float64(0.2), np.float64(0.3)], "string_feature": ["one", "two", "three"], "bytes_feature": [b"one", b"two", b"three"], "bool_feature": [True, False, False], "int32_list_feature": [ np.array([1, 2, 3, 4], dtype=np.int32), np.array([1, 2, 3, 4], dtype=np.int32), np.array([1, 2, 3, 4], dtype=np.int32), ], "int64_list_feature": [ np.array([1, 2, 3, 4], dtype=np.int64), np.array([1, 2, 3, 4], dtype=np.int64), np.array([1, 2, 3, 4], dtype=np.int64), ], "float_list_feature": [ np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float32), np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float32), np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float32), ], "double_list_feature": [ np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float64), np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float64), np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float64), ], "string_list_feature": [ np.array(["one", "two", "three"]), np.array(["one", "two", "three"]), np.array(["one", "two", "three"]), ], "bytes_list_feature": [ np.array([b"one", b"two", b"three"]), np.array([b"one", b"two", b"three"]), np.array([b"one", b"two", b"three"]), ], "bool_list_feature": [ np.array([True, False, True]), np.array([True, False, True]), np.array([True, False, True]), ], }) # Ingest user embedding data all_types_fs.ingest(dataframe=all_types_df) time.sleep(3) # Poll serving for feature values until the correct values are returned while True: time.sleep(1) response = client.get_online_features( entity_rows=[ GetOnlineFeaturesRequest.EntityRow( fields={ "user_id": Value( int64_val=all_types_df.iloc[0]["user_id"]) }) ], feature_ids=[ "all_types:1:float_feature", "all_types:1:int64_feature", "all_types:1:int32_feature", "all_types:1:string_feature", "all_types:1:bytes_feature", "all_types:1:bool_feature", "all_types:1:double_feature", "all_types:1:float_list_feature", "all_types:1:int64_list_feature", "all_types:1:int32_list_feature", "all_types:1:string_list_feature", "all_types:1:bytes_list_feature", "all_types:1:bool_list_feature", "all_types:1:double_list_feature", ], ) # type: GetOnlineFeaturesResponse if response is None: continue returned_float_list = ( response.field_values[0].fields["all_types:1:float_list_feature"]. float_list_val.val) sent_float_list = all_types_df.iloc[0]["float_list_feature"] # TODO: Add tests for each value and type if math.isclose(returned_float_list[0], sent_float_list[0], abs_tol=FLOAT_TOLERANCE): break # Wait for values to appear in Serving time.sleep(1)
def test_get_online_features(self, mocked_client, auth_metadata, mocker): ROW_COUNT = 300 mocked_client._serving_service_stub = Serving.ServingServiceStub( grpc.insecure_channel("")) def int_val(x): return ValueProto.Value(int64_val=x) request = GetOnlineFeaturesRequest(project="driver_project") request.features.extend([ FeatureRefProto(feature_set="driver", name="age"), FeatureRefProto(name="rating"), FeatureRefProto(name="null_value"), ]) recieve_response = GetOnlineFeaturesResponse() entity_rows = [] for row_number in range(1, ROW_COUNT + 1): request.entity_rows.append( GetOnlineFeaturesRequest.EntityRow( fields={"driver_id": int_val(row_number)})) entity_rows.append({"driver_id": int_val(row_number)}) field_values = GetOnlineFeaturesResponse.FieldValues( fields={ "driver_id": int_val(row_number), "driver:age": int_val(1), "rating": int_val(9), "null_value": ValueProto.Value(), }, statuses={ "driver_id": GetOnlineFeaturesResponse.FieldStatus.PRESENT, "driver:age": GetOnlineFeaturesResponse.FieldStatus.PRESENT, "rating": GetOnlineFeaturesResponse.FieldStatus.PRESENT, "null_value": GetOnlineFeaturesResponse.FieldStatus.NULL_VALUE, }, ) recieve_response.field_values.append(field_values) mocker.patch.object( mocked_client._serving_service_stub, "GetOnlineFeatures", return_value=recieve_response, ) got_response = mocked_client.get_online_features( entity_rows=entity_rows, feature_refs=["driver:age", "rating", "null_value"], project="driver_project", ) # type: GetOnlineFeaturesResponse mocked_client._serving_service_stub.GetOnlineFeatures.assert_called_with( request, metadata=auth_metadata) got_fields = got_response.field_values[0].fields got_statuses = got_response.field_values[0].statuses assert (got_fields["driver_id"] == int_val(1) and got_statuses["driver_id"] == GetOnlineFeaturesResponse.FieldStatus.PRESENT and got_fields["driver:age"] == int_val(1) and got_statuses["driver:age"] == GetOnlineFeaturesResponse.FieldStatus.PRESENT and got_fields["rating"] == int_val(9) and got_statuses["rating"] == GetOnlineFeaturesResponse.FieldStatus.PRESENT and got_fields["null_value"] == ValueProto.Value() and got_statuses["null_value"] == GetOnlineFeaturesResponse.FieldStatus.NULL_VALUE)