Ejemplo n.º 1
0
    def get_online_features(
        self,
        feature_ids: List[str],
        entity_rows: List[GetOnlineFeaturesRequest.EntityRow],
    ) -> GetOnlineFeaturesResponse:
        """
        Retrieves the latest online feature data from Feast Serving
        :param feature_ids: List of feature Ids in the following format
                            [feature_set_name]:[version]:[feature_name]
                            example: ["feature_set_1:6:my_feature_1",
                                     "feature_set_1:6:my_feature_2",]

        :param entity_rows: List of GetFeaturesRequest.EntityRow where each row
                            contains entities. Timestamp should not be set for
                            online retrieval. All entity types within a feature
                            set must be provided for each entity key.
        :return: Returns a list of maps where each item in the list contains
                 the latest feature values for the provided entities
        """
        self._connect_serving()

        try:
            response = self._serving_service_stub.GetOnlineFeatures(
                GetOnlineFeaturesRequest(
                    feature_sets=_build_feature_set_request(feature_ids),
                    entity_rows=entity_rows,
                ))  # type: GetOnlineFeaturesResponse
        except grpc.RpcError as e:
            print(
                format_grpc_exception("GetOnlineFeatures", e.code(),
                                      e.details()))
        else:
            return response
Ejemplo n.º 2
0
def test_basic_retrieve_online_success(client, cust_trans_df):
    # Poll serving for feature values until the correct values are returned
    while True:
        time.sleep(1)
        response = client.get_online_features(
            entity_rows=[
                GetOnlineFeaturesRequest.EntityRow(
                    fields={
                        "customer_id":
                        Value(int64_val=cust_trans_df.iloc[0]["customer_id"])
                    })
            ],
            # Test retrieve with different variations of the string feature refs
            feature_refs=[
                "daily_transactions",
                "total_transactions",
            ])  # type: GetOnlineFeaturesResponse

        if response is None:
            continue

        returned_daily_transactions = float(
            response.field_values[0].fields["daily_transactions"].float_val)
        sent_daily_transactions = float(
            cust_trans_df.iloc[0]["daily_transactions"])

        if math.isclose(
                sent_daily_transactions,
                returned_daily_transactions,
                abs_tol=FLOAT_TOLERANCE,
        ):
            break
Ejemplo n.º 3
0
def test_large_volume_retrieve_online_success(client, large_volume_dataframe):
    # Poll serving for feature values until the correct values are returned
    feature_refs = [
        "daily_transactions_large",
        "total_transactions_large",
    ]
    while True:
        response = client.get_online_features(
            entity_rows=[
                GetOnlineFeaturesRequest.EntityRow(
                    fields={
                        "customer_id":
                        Value(int64_val=large_volume_dataframe.iloc[0]
                              ["customer_id"])
                    })
            ],
            feature_refs=feature_refs,
        )  # type: GetOnlineFeaturesResponse
        is_ok = all([
            check_online_response(ref, large_volume_dataframe, response)
            for ref in feature_refs
        ])
        return None, is_ok

    wait_retry_backoff(
        retry_fn=try_get_features,
        timeout_secs=90,
        timeout_msg="Timed out trying to get online feature values")
Ejemplo n.º 4
0
def test_large_volume_retrieve_online_success(client, large_volume_dataframe):
    # Poll serving for feature values until the correct values are returned
    while True:
        time.sleep(1)

        response = client.get_online_features(
            entity_rows=[
                GetOnlineFeaturesRequest.EntityRow(
                    fields={
                        "customer_id":
                        Value(int64_val=large_volume_dataframe.iloc[0]
                              ["customer_id"])
                    })
            ],
            feature_ids=[
                "customer_transactions_large:1:daily_transactions",
                "customer_transactions_large:1:total_transactions",
            ],
        )  # type: GetOnlineFeaturesResponse

        if response is None:
            continue

        returned_daily_transactions = float(response.field_values[0].fields[
            "customer_transactions_large:1:daily_transactions"].float_val)
        sent_daily_transactions = float(
            large_volume_dataframe.iloc[0]["daily_transactions"])

        if math.isclose(
                sent_daily_transactions,
                returned_daily_transactions,
                abs_tol=FLOAT_TOLERANCE,
        ):
            break
Ejemplo n.º 5
0
    def get_online_features(
        self,
        feature_ids: List[str],
        entity_rows: List[GetOnlineFeaturesRequest.EntityRow],
    ) -> GetOnlineFeaturesResponse:
        """
        Retrieves the latest online feature data from Feast Serving

        Args:
            feature_ids: List of feature Ids in the following format
                [feature_set_name]:[version]:[feature_name]
                example:
                    ["feature_set_1:6:my_feature_1",
                    "feature_set_1:6:my_feature_2",]
            entity_rows: List of GetFeaturesRequest.EntityRow where each row
                contains entities. Timestamp should not be set for online
                retrieval. All entity types within a feature

        Returns:
            Returns a list of maps where each item in the list contains the
            latest feature values for the provided entities
        """

        self._connect_serving()

        return self._serving_service_stub.GetOnlineFeatures(
            GetOnlineFeaturesRequest(
                feature_sets=_build_feature_set_request(feature_ids),
                entity_rows=entity_rows,
            ))  # type: GetOnlineFeaturesResponse
Ejemplo n.º 6
0
    def test_get_online_features(self, mocked_client, mocker):
        ROW_COUNT = 300

        mocked_client._serving_service_stub = Serving.ServingServiceStub(
            grpc.insecure_channel(""))

        def int_val(x):
            return ValueProto.Value(int64_val=x)

        request = GetOnlineFeaturesRequest()
        request.features.extend([
            FeatureRefProto(project="driver_project",
                            feature_set="driver",
                            name="age"),
            FeatureRefProto(project="driver_project", name="rating"),
        ])
        recieve_response = GetOnlineFeaturesResponse()
        for row_number in range(1, ROW_COUNT + 1):
            request.entity_rows.append(
                GetOnlineFeaturesRequest.EntityRow(
                    fields={"driver_id": int_val(row_number)})),
            field_values = GetOnlineFeaturesResponse.FieldValues(
                fields={
                    "driver_id": int_val(row_number),
                    "driver_project/driver:age": int_val(1),
                    "driver_project/rating": int_val(9),
                })
            recieve_response.field_values.append(field_values)

        mocker.patch.object(
            mocked_client._serving_service_stub,
            "GetOnlineFeatures",
            return_value=recieve_response,
        )
        got_response = mocked_client.get_online_features(
            entity_rows=request.entity_rows,
            feature_refs=["driver:age", "rating"],
            project="driver_project",
        )  # type: GetOnlineFeaturesResponse
        mocked_client._serving_service_stub.GetOnlineFeatures.assert_called_with(
            request)

        got_fields = got_response.field_values[0].fields
        assert (got_fields["driver_id"] == int_val(1)
                and got_fields["driver:age"] == int_val(1)
                and got_fields["rating"] == int_val(9))
Ejemplo n.º 7
0
    def get_online_features(
        self,
        feature_refs: List[str],
        entity_rows: List[Union[GetOnlineFeaturesRequest.EntityRow,
                                Dict[str, Any]]],
        project: Optional[str] = None,
        omit_entities: bool = False,
    ) -> OnlineResponse:
        """
        Retrieves the latest online feature data from Feast Serving

        Args:
            feature_refs: List of feature references that will be returned for each entity.
                Each feature reference should have the following format:
                "feature_set:feature" where "feature_set" & "feature" refer to
                the feature and feature set names respectively.
                Only the feature name is required.
            entity_rows: A list of dictionaries where each key is an entity and each value is
                feast.types.Value or Python native form.
            project: Optionally specify the the project override. If specified, uses given project for retrieval.
                Overrides the projects specified in Feature References if also are specified.
            omit_entities: If true will omit entity values in the returned feature data.
        Returns:
            GetOnlineFeaturesResponse containing the feature data in records.
            Each EntityRow provided will yield one record, which contains
            data fields with data value and field status metadata (if included).

        Examples:
            >>> from feast import Client
            >>>
            >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566")
            >>> feature_refs = ["daily_transactions"]
            >>> entity_rows = [{"customer_id": 0},{"customer_id": 1}]
            >>>
            >>> online_response = feast_client.get_online_features(
            >>>     feature_refs, entity_rows, project="my_project")
            >>> online_response_dict = online_response.to_dict()
            >>> print(online_response_dict)
            {'daily_transactions': [1.1,1.2], 'customer_id': [0,1]}
        """

        try:
            response = self._serving_service.GetOnlineFeatures(
                GetOnlineFeaturesRequest(
                    omit_entities_in_response=omit_entities,
                    features=_build_feature_references(
                        feature_ref_strs=feature_refs),
                    entity_rows=_infer_online_entity_rows(entity_rows),
                    project=project if project is not None else self.project,
                ),
                metadata=self._get_grpc_metadata(),
            )
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        response = OnlineResponse(response)
        return response
Ejemplo n.º 8
0
def _infer_online_entity_rows(
    entity_rows: List[Union[GetOnlineFeaturesRequest.EntityRow, Dict[str,
                                                                     Any]]],
) -> List[GetOnlineFeaturesRequest.EntityRow]:
    """
    Builds a list of EntityRow protos from Python native type format passed by user.

    Args:
        entity_rows: A list of dictionaries where each key is an entity and each value is
            feast.types.Value or Python native form.

    Returns:
        A list of EntityRow protos parsed from args.
    """

    # Maintain backward compatibility with users providing EntityRow Proto
    if entity_rows and isinstance(entity_rows[0],
                                  GetOnlineFeaturesRequest.EntityRow):
        warnings.warn(
            "entity_rows parameter will only be accepting Dict format from Feast v0.7 onwards",
            DeprecationWarning,
        )
        entity_rows_proto = cast(
            List[Union[GetOnlineFeaturesRequest.EntityRow]], entity_rows)
        return entity_rows_proto

    entity_rows_dicts = cast(List[Dict[str, Any]], entity_rows)
    entity_row_list = []
    entity_type_map = dict()

    for entity in entity_rows_dicts:
        fields = {}
        for key, value in entity.items():
            # Allow for feast.types.Value
            if isinstance(value, Value):
                proto_value = value
            else:
                # Infer the specific type for this row
                current_dtype = python_type_to_feast_value_type(name=key,
                                                                value=value)

                if key not in entity_type_map:
                    entity_type_map[key] = current_dtype
                else:
                    if current_dtype != entity_type_map[key]:
                        raise TypeError(
                            f"Input entity {key} has mixed types, {current_dtype} and {entity_type_map[key]}. That is not allowed. "
                        )
                proto_value = _python_value_to_proto_value(
                    current_dtype, value)
            fields[key] = proto_value
        entity_row_list.append(
            GetOnlineFeaturesRequest.EntityRow(fields=fields))
    return entity_row_list
Ejemplo n.º 9
0
 def try_get_features():
     response = client.get_online_features(
         entity_rows=[
             GetOnlineFeaturesRequest.EntityRow(
                 fields={"user_id": Value(
                     int64_val=all_types_dataframe.iloc[0]["user_id"])}
             )
         ],
         feature_refs=feature_refs,
     )  # type: GetOnlineFeaturesResponse
     is_ok = check_online_response("float_feature", all_types_dataframe, response)
     return response, is_ok
def test_basic_retrieve_online_multiple_featureset(client, cust_trans_df, driver_df):
    # Poll serving for feature values until the correct values are returned
    while True:
        time.sleep(1)
        # Test retrieve with different variations of the string feature refs
        # ie feature set inference for feature refs without specified feature set
        feature_ref_df_mapping = [
            ("customer_transactions:daily_transactions", cust_trans_df),
            ("driver:rating", driver_df),
            ("total_transactions", cust_trans_df),
        ]
        response = client.get_online_features(
            entity_rows=[
                GetOnlineFeaturesRequest.EntityRow(
                    fields={
                        "customer_id": Value(
                            int64_val=cust_trans_df.iloc[0]["customer_id"]
                        ),
                        "driver_id": Value(
                            int64_val=driver_df.iloc[0]["driver_id"]
                        )
                    }
                )
            ],
            feature_refs=[mapping[0] for mapping in feature_ref_df_mapping],
        )  # type: GetOnlineFeaturesResponse

        if response is None:
            continue

        def check_response(ingest_df, response, feature_ref):
            returned_value = float(
                response.field_values[0]
                .fields[feature_ref]
                .float_val
            )
            feature_ref_splits = feature_ref.split(":")
            if len(feature_ref_splits) == 1:
                feature_name = feature_ref
            else:
                _, feature_name = feature_ref_splits

            sent_value = float(
                ingest_df.iloc[0][feature_name])

            return math.isclose(
                sent_value,
                returned_value,
                abs_tol=FLOAT_TOLERANCE,
            )
        if all([check_response(df, response, ref) for ref, df in feature_ref_df_mapping]):
            break
Ejemplo n.º 11
0
    def test_get_online_features(self, mock_client, mocker):
        ROW_COUNT = 300

        mock_client._serving_service_stub = Serving.ServingServiceStub(
            grpc.insecure_channel("")
        )

        fields = dict()
        for feature_num in range(1, 10):
            fields["feature_set_1:1:feature_" + str(feature_num)] = ValueProto.Value(
                int64_val=feature_num
            )
        field_values = GetOnlineFeaturesResponse.FieldValues(fields=fields)

        response = GetOnlineFeaturesResponse()
        entity_rows = []
        for row_number in range(1, ROW_COUNT + 1):
            response.field_values.append(field_values)
            entity_rows.append(
                GetOnlineFeaturesRequest.EntityRow(
                    fields={"customer_id": ValueProto.Value(int64_val=row_number)}
                )
            )

        mocker.patch.object(
            mock_client._serving_service_stub,
            "GetOnlineFeatures",
            return_value=response,
        )

        response = mock_client.get_online_features(
            entity_rows=entity_rows,
            feature_ids=[
                "feature_set_1:1:feature_1",
                "feature_set_1:1:feature_2",
                "feature_set_1:1:feature_3",
                "feature_set_1:1:feature_4",
                "feature_set_1:1:feature_5",
                "feature_set_1:1:feature_6",
                "feature_set_1:1:feature_7",
                "feature_set_1:1:feature_8",
                "feature_set_1:1:feature_9",
            ],
        )  # type: GetOnlineFeaturesResponse

        assert (
            response.field_values[0].fields["feature_set_1:1:feature_1"].int64_val == 1
            and response.field_values[0].fields["feature_set_1:1:feature_9"].int64_val
            == 9
        )
Ejemplo n.º 12
0
 def try_get_features():
     response = client.get_online_features(entity_rows=[
         GetOnlineFeaturesRequest.EntityRow(
             fields={
                 "customer_id":
                 Value(int64_val=cust_trans_df.iloc[0]["customer_id"])
             })
     ],
                                           feature_refs=feature_refs)
     # type: GetOnlineFeaturesResponse
     is_ok = all([
         check_online_response(ref, cust_trans_df, response)
         for ref in feature_refs
     ])
     return response, is_ok
Ejemplo n.º 13
0
def test_all_types_retrieve_online_success(client, all_types_dataframe):
    # Poll serving for feature values until the correct values are returned
    while True:
        time.sleep(1)

        response = client.get_online_features(
            entity_rows=[
                GetOnlineFeaturesRequest.EntityRow(
                    fields={"user_id": Value(
                        int64_val=all_types_dataframe.iloc[0]["user_id"])}
                )
            ],
            feature_refs=[
                "float_feature",
                "int64_feature",
                "int32_feature",
                "string_feature",
                "bytes_feature",
                "bool_feature",
                "double_feature",
                "float_list_feature",
                "int64_list_feature",
                "int32_list_feature",
                "string_list_feature",
                "bytes_list_feature",
                "double_list_feature",
            ],
        )  # type: GetOnlineFeaturesResponse

        if response is None:
            continue


        returned_float_list = (
            response.field_values[0]
                .fields[PROJECT_NAME+"/float_list_feature"]
                .float_list_val.val
        )

        sent_float_list = all_types_dataframe.iloc[0]["float_list_feature"]

        if math.isclose(
            returned_float_list[0], sent_float_list[0], abs_tol=FLOAT_TOLERANCE
        ):
            break
Ejemplo n.º 14
0
    def get_online_features(
        self,
        feature_refs: List[str],
        entity_rows: List[GetOnlineFeaturesRequest.EntityRow],
        default_project: Optional[str] = None,
    ) -> GetOnlineFeaturesResponse:
        """
        Retrieves the latest online feature data from Feast Serving

        Args:
            feature_refs: List of feature references in the following format
                [project]/[feature_name]:[version]. Only the feature name
                is a required component in the reference.
                example:
                    ["my_project/my_feature_1:3",
                    "my_project3/my_feature_4:1",]
            entity_rows: List of GetFeaturesRequest.EntityRow where each row
                contains entities. Timestamp should not be set for online
                retrieval. All entity types within a feature
            default_project: This project will be used if the project name is
                not provided in the feature reference

        Returns:
            Returns a list of maps where each item in the list contains the
            latest feature values for the provided entities
        """
        self._connect_serving()

        try:
            response = self._serving_service_stub.GetOnlineFeatures(
                GetOnlineFeaturesRequest(
                    features=_build_feature_references(
                        feature_refs=feature_refs,
                        default_project=(
                            default_project if not self.project else self.project
                        ),
                    ),
                    entity_rows=entity_rows,
                )
            )
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        return response
Ejemplo n.º 15
0
 def try_get_features():
     feature_refs = [mapping[0] for mapping in feature_ref_df_mapping]
     response = client.get_online_features(
         entity_rows=[
             GetOnlineFeaturesRequest.EntityRow(
                 fields={
                     "customer_id":
                     Value(int64_val=cust_trans_df.iloc[0]["customer_id"]),
                     "driver_id":
                     Value(int64_val=driver_df.iloc[0]["driver_id"]),
                 })
         ],
         feature_refs=feature_refs,
     )  # type: GetOnlineFeaturesResponse
     is_ok = all([
         check_online_response(ref, df, response)
         for ref, df in feature_ref_df_mapping
     ])
     return response, is_ok
Ejemplo n.º 16
0
    def test_get_online_features(self, mocked_client, mocker):
        ROW_COUNT = 300

        mocked_client._serving_service_stub = Serving.ServingServiceStub(
            grpc.insecure_channel(""))

        def int_val(x):
            return ValueProto.Value(int64_val=x)

        # serving can return feature references with projects,
        # get_online_features() should strip the project part.
        field_values = GetOnlineFeaturesResponse.FieldValues(
            fields={
                "driver_project/driver:driver_id": int_val(1),
                "driver_project/driver_id": int_val(9),
            })

        response = GetOnlineFeaturesResponse()
        entity_rows = []
        for row_number in range(1, ROW_COUNT + 1):
            response.field_values.append(field_values)
            entity_rows.append(
                GetOnlineFeaturesRequest.EntityRow(
                    fields={"customer_id": int_val(row_number)}))

        mocker.patch.object(
            mocked_client._serving_service_stub,
            "GetOnlineFeatures",
            return_value=response,
        )

        # NOTE: Feast Serving does not allow for feature references
        # that specify the same feature in the same request
        response = mocked_client.get_online_features(
            entity_rows=entity_rows,
            feature_refs=["driver:driver_id", "driver_id"],
            project="driver_project",
        )  # type: GetOnlineFeaturesResponse

        assert (
            response.field_values[0].fields["driver:driver_id"].int64_val == 1
            and response.field_values[0].fields["driver_id"].int64_val == 9)
Ejemplo n.º 17
0
def _infer_online_entity_rows(
    entity_rows: List[Dict[str, Any]],
) -> List[GetOnlineFeaturesRequest.EntityRow]:
    """
    Builds a list of EntityRow protos from Python native type format passed by user.

    Args:
        entity_rows: A list of dictionaries where each key is an entity and each value is
            feast.types.Value or Python native form.

    Returns:
        A list of EntityRow protos parsed from args.
    """
    entity_rows_dicts = cast(List[Dict[str, Any]], entity_rows)
    entity_row_list = []
    entity_type_map = dict()

    for entity in entity_rows_dicts:
        fields = {}
        for key, value in entity.items():
            # Allow for feast.types.Value
            if isinstance(value, Value):
                proto_value = value
            else:
                # Infer the specific type for this row
                current_dtype = python_type_to_feast_value_type(name=key,
                                                                value=value)

                if key not in entity_type_map:
                    entity_type_map[key] = current_dtype
                else:
                    if current_dtype != entity_type_map[key]:
                        raise TypeError(
                            f"Input entity {key} has mixed types, {current_dtype} and {entity_type_map[key]}. That is not allowed. "
                        )
                proto_value = _python_value_to_proto_value(
                    current_dtype, value)
            fields[key] = proto_value
        entity_row_list.append(
            GetOnlineFeaturesRequest.EntityRow(fields=fields))
    return entity_row_list
Ejemplo n.º 18
0
    def test_get_online_features(self, mocked_client, auth_metadata, mocker):
        ROW_COUNT = 300

        mocked_client._serving_service_stub = Serving.ServingServiceStub(
            grpc.insecure_channel(""))

        def int_val(x):
            return ValueProto.Value(int64_val=x)

        request = GetOnlineFeaturesRequest(project="driver_project")
        request.features.extend([
            FeatureRefProto(feature_set="driver", name="age"),
            FeatureRefProto(name="rating"),
            FeatureRefProto(name="null_value"),
        ])
        recieve_response = GetOnlineFeaturesResponse()
        entity_rows = []
        for row_number in range(1, ROW_COUNT + 1):
            request.entity_rows.append(
                GetOnlineFeaturesRequest.EntityRow(
                    fields={"driver_id": int_val(row_number)}))
            entity_rows.append({"driver_id": int_val(row_number)})
            field_values = GetOnlineFeaturesResponse.FieldValues(
                fields={
                    "driver_id": int_val(row_number),
                    "driver:age": int_val(1),
                    "rating": int_val(9),
                    "null_value": ValueProto.Value(),
                },
                statuses={
                    "driver_id": GetOnlineFeaturesResponse.FieldStatus.PRESENT,
                    "driver:age":
                    GetOnlineFeaturesResponse.FieldStatus.PRESENT,
                    "rating": GetOnlineFeaturesResponse.FieldStatus.PRESENT,
                    "null_value":
                    GetOnlineFeaturesResponse.FieldStatus.NULL_VALUE,
                },
            )
            recieve_response.field_values.append(field_values)

        mocker.patch.object(
            mocked_client._serving_service_stub,
            "GetOnlineFeatures",
            return_value=recieve_response,
        )
        got_response = mocked_client.get_online_features(
            entity_rows=entity_rows,
            feature_refs=["driver:age", "rating", "null_value"],
            project="driver_project",
        )  # type: GetOnlineFeaturesResponse
        mocked_client._serving_service_stub.GetOnlineFeatures.assert_called_with(
            request, metadata=auth_metadata)

        got_fields = got_response.field_values[0].fields
        got_statuses = got_response.field_values[0].statuses
        assert (got_fields["driver_id"] == int_val(1)
                and got_statuses["driver_id"]
                == GetOnlineFeaturesResponse.FieldStatus.PRESENT
                and got_fields["driver:age"] == int_val(1)
                and got_statuses["driver:age"]
                == GetOnlineFeaturesResponse.FieldStatus.PRESENT
                and got_fields["rating"] == int_val(9)
                and got_statuses["rating"]
                == GetOnlineFeaturesResponse.FieldStatus.PRESENT
                and got_fields["null_value"] == ValueProto.Value()
                and got_statuses["null_value"]
                == GetOnlineFeaturesResponse.FieldStatus.NULL_VALUE)
Ejemplo n.º 19
0
def test_all_types(client):
    all_types_fs = client.get_feature_set(name="all_types", version=1)

    if all_types_fs is None:
        # Register new feature set if it doesnt exist
        all_types_fs = FeatureSet(
            name="all_types",
            entities=[Entity(name="user_id", dtype=ValueType.INT64)],
            features=[
                Feature(name="float_feature", dtype=ValueType.FLOAT),
                Feature(name="int64_feature", dtype=ValueType.INT64),
                Feature(name="int32_feature", dtype=ValueType.INT32),
                Feature(name="string_feature", dtype=ValueType.STRING),
                Feature(name="bytes_feature", dtype=ValueType.BYTES),
                Feature(name="bool_feature", dtype=ValueType.BOOL),
                Feature(name="double_feature", dtype=ValueType.DOUBLE),
                Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST),
                Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST),
                Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST),
                Feature(name="string_list_feature",
                        dtype=ValueType.STRING_LIST),
                Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST),
                Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST),
                Feature(name="double_list_feature",
                        dtype=ValueType.DOUBLE_LIST),
            ],
            max_age=Duration(seconds=3600),
        )

        # Register feature set
        client.apply(all_types_fs)

        # Feast Core needs some time to fully commit the FeatureSet applied
        # when there is no existing job yet for the Featureset
        time.sleep(10)
        all_types_fs = client.get_feature_set(name="all_types", version=1)

        if all_types_fs is None:
            raise Exception(
                "Client cannot retrieve 'all_types_fs' FeatureSet "
                "after registration. Either Feast Core does not save the "
                "FeatureSet correctly or the client needs to wait longer for FeatureSet "
                "to be committed.")

    all_types_df = pd.DataFrame({
        "datetime":
        [datetime.utcnow().replace(tzinfo=pytz.utc) for _ in range(3)],
        "user_id": [1001, 1002, 1003],
        "int32_feature": [np.int32(1), np.int32(2),
                          np.int32(3)],
        "int64_feature": [np.int64(1), np.int64(2),
                          np.int64(3)],
        "float_feature": [np.float(0.1),
                          np.float(0.2),
                          np.float(0.3)],
        "double_feature": [np.float64(0.1),
                           np.float64(0.2),
                           np.float64(0.3)],
        "string_feature": ["one", "two", "three"],
        "bytes_feature": [b"one", b"two", b"three"],
        "bool_feature": [True, False, False],
        "int32_list_feature": [
            np.array([1, 2, 3, 4], dtype=np.int32),
            np.array([1, 2, 3, 4], dtype=np.int32),
            np.array([1, 2, 3, 4], dtype=np.int32),
        ],
        "int64_list_feature": [
            np.array([1, 2, 3, 4], dtype=np.int64),
            np.array([1, 2, 3, 4], dtype=np.int64),
            np.array([1, 2, 3, 4], dtype=np.int64),
        ],
        "float_list_feature": [
            np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float32),
            np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float32),
            np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float32),
        ],
        "double_list_feature": [
            np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float64),
            np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float64),
            np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float64),
        ],
        "string_list_feature": [
            np.array(["one", "two", "three"]),
            np.array(["one", "two", "three"]),
            np.array(["one", "two", "three"]),
        ],
        "bytes_list_feature": [
            np.array([b"one", b"two", b"three"]),
            np.array([b"one", b"two", b"three"]),
            np.array([b"one", b"two", b"three"]),
        ],
        "bool_list_feature": [
            np.array([True, False, True]),
            np.array([True, False, True]),
            np.array([True, False, True]),
        ],
    })

    # Ingest user embedding data
    all_types_fs.ingest(dataframe=all_types_df)
    time.sleep(3)

    # Poll serving for feature values until the correct values are returned
    while True:
        time.sleep(1)

        response = client.get_online_features(
            entity_rows=[
                GetOnlineFeaturesRequest.EntityRow(
                    fields={
                        "user_id": Value(
                            int64_val=all_types_df.iloc[0]["user_id"])
                    })
            ],
            feature_ids=[
                "all_types:1:float_feature",
                "all_types:1:int64_feature",
                "all_types:1:int32_feature",
                "all_types:1:string_feature",
                "all_types:1:bytes_feature",
                "all_types:1:bool_feature",
                "all_types:1:double_feature",
                "all_types:1:float_list_feature",
                "all_types:1:int64_list_feature",
                "all_types:1:int32_list_feature",
                "all_types:1:string_list_feature",
                "all_types:1:bytes_list_feature",
                "all_types:1:bool_list_feature",
                "all_types:1:double_list_feature",
            ],
        )  # type: GetOnlineFeaturesResponse

        if response is None:
            continue

        returned_float_list = (
            response.field_values[0].fields["all_types:1:float_list_feature"].
            float_list_val.val)

        sent_float_list = all_types_df.iloc[0]["float_list_feature"]

        # TODO: Add tests for each value and type
        if math.isclose(returned_float_list[0],
                        sent_float_list[0],
                        abs_tol=FLOAT_TOLERANCE):
            break

        # Wait for values to appear in Serving
        time.sleep(1)
Ejemplo n.º 20
0
def test_basic(client):

    cust_trans_fs = client.get_feature_set(name="customer_transactions",
                                           version=1)

    # TODO: Fix source handling in Feast Core to support true idempotent
    #  applies. In this case, applying a feature set without a source will
    #  create a new feature set every time.

    if cust_trans_fs is None:
        # Load feature set from file
        cust_trans_fs = FeatureSet.from_yaml("basic/cust_trans_fs.yaml")

        # Register feature set
        client.apply(cust_trans_fs)

        # Feast Core needs some time to fully commit the FeatureSet applied
        # when there is no existing job yet for the Featureset
        time.sleep(15)
        cust_trans_fs = client.get_feature_set(name="customer_transactions",
                                               version=1)

        if cust_trans_fs is None:
            raise Exception(
                "Client cannot retrieve 'customer_transactions' FeatureSet "
                "after registration. Either Feast Core does not save the "
                "FeatureSet correctly or the client needs to wait longer for FeatureSet "
                "to be committed.")

    offset = random.randint(1000, 100000)  # ensure a unique key space is used
    customer_data = pd.DataFrame({
        "datetime":
        [datetime.utcnow().replace(tzinfo=pytz.utc) for _ in range(5)],
        "customer_id": [offset + inc for inc in range(5)],
        "daily_transactions": [np.random.rand() for _ in range(5)],
        "total_transactions": [512 for _ in range(5)],
    })

    # Ingest customer transaction data
    cust_trans_fs.ingest(dataframe=customer_data)

    # Poll serving for feature values until the correct values are returned
    while True:
        time.sleep(1)

        response = client.get_online_features(
            entity_rows=[
                GetOnlineFeaturesRequest.EntityRow(
                    fields={
                        "customer_id":
                        Value(int64_val=customer_data.iloc[0]["customer_id"])
                    })
            ],
            feature_ids=[
                "customer_transactions:1:daily_transactions",
                "customer_transactions:1:total_transactions",
            ],
        )  # type: GetOnlineFeaturesResponse

        if response is None:
            continue

        returned_daily_transactions = float(
            response.field_values[0].
            fields["customer_transactions:1:daily_transactions"].float_val)
        sent_daily_transactions = float(
            customer_data.iloc[0]["daily_transactions"])

        if math.isclose(
                sent_daily_transactions,
                returned_daily_transactions,
                abs_tol=FLOAT_TOLERANCE,
        ):
            break
Ejemplo n.º 21
0
    def get_online_features(
        self,
        feature_refs: List[str],
        entity_rows: List[GetOnlineFeaturesRequest.EntityRow],
        project: Optional[str] = None,
        omit_entities: bool = False,
    ) -> GetOnlineFeaturesResponse:
        """
        Retrieves the latest online feature data from Feast Serving

        Args:
            feature_refs: List of feature references that will be returned for each entity.
                Each feature reference should have the following format:
                "feature_set:feature" where "feature_set" & "feature" refer to
                the feature and feature set names respectively.
                Only the feature name is required.
            entity_rows: List of GetFeaturesRequest.EntityRow where each row
                contains entities. Timestamp should not be set for online
                retrieval. All entity types within a feature
            project: Specifies the project which contain the FeatureSets
                which the requested features belong to.
            omit_entities: If true will omit entity values in the returned feature data.

        Returns:
            GetOnlineFeaturesResponse containing the feature data in records.
            Each EntityRow provided will yield one record, which contains
            data fields with data value and field status metadata (if included).
        """

        try:
            response = self._serving_service.GetOnlineFeatures(
                GetOnlineFeaturesRequest(
                    omit_entities_in_response=omit_entities,
                    features=_build_feature_references(
                        feature_ref_strs=feature_refs,
                        project=project
                        if project is not None else self.project,
                    ),
                    entity_rows=entity_rows,
                ))

            entity_refs = {
                key
                for entity_row in entity_rows
                for key in entity_row.fields.keys()
            }
            # strip the project part the string feature references returned from serving
            strip = (
                lambda ref: repr(FeatureRef.from_str(ref, ignore_project=True))
                if ref not in entity_refs else ref)
            strip_field_values = []
            for field_value in response.field_values:
                keys, fields, statuses = (
                    field_value.fields.keys(),
                    field_value.fields,
                    field_value.statuses,
                )
                fields_and_statuses = [(strip(key), fields[key], statuses[key])
                                       for key in keys]
                keys, fields, statuses = zip(*fields_and_statuses)
                strip_field_values.append(
                    GetOnlineFeaturesResponse.FieldValues(
                        fields=dict(zip(keys, fields)),
                        statuses=dict(zip(keys, statuses)),
                    ))
            del response.field_values[:]
            response.field_values.extend(strip_field_values)

        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        return response
Ejemplo n.º 22
0
    def get_online_features(
        self,
        feature_refs: List[str],
        entity_rows: List[GetOnlineFeaturesRequest.EntityRow],
        project: Optional[str] = None,
    ) -> GetOnlineFeaturesResponse:
        """
        Retrieves the latest online feature data from Feast Serving

        Args:
            feature_refs: List of feature references that will be returned for each entity.
                Each feature reference should have the following format:
                "feature_set:feature" where "feature_set" & "feature" refer to
                the feature and feature set names respectively.
                Only the feature name is required.
            entity_rows: List of GetFeaturesRequest.EntityRow where each row
                contains entities. Timestamp should not be set for online
                retrieval. All entity types within a feature
            project: Specifies the project which contain the FeatureSets
                which the requested features belong to.

        Returns:
            Returns a list of maps where each item in the list contains the
            latest feature values for the provided entities
        """
        self._connect_serving()

        try:
            response = self._serving_service_stub.GetOnlineFeatures(
                GetOnlineFeaturesRequest(
                    features=_build_feature_references(
                        feature_ref_strs=feature_refs,
                        project=project
                        if project is not None else self.project,
                    ),
                    entity_rows=entity_rows,
                ))
            # collect entity row refs
            entity_refs = set()
            for entity_row in entity_rows:
                entity_refs.update(entity_row.fields.keys())

            strip_field_values = []
            for field_value in response.field_values:
                # strip the project part the string feature references returned from serving
                strip_fields = {}
                for ref_str, value in field_value.fields.items():
                    # find and ignore entities
                    if ref_str in entity_refs:
                        strip_fields[ref_str] = value
                    else:
                        strip_ref_str = repr(
                            FeatureRef.from_str(ref_str, ignore_project=True))
                        strip_fields[strip_ref_str] = value
                strip_field_values.append(
                    GetOnlineFeaturesResponse.FieldValues(fields=strip_fields))

            del response.field_values[:]
            response.field_values.extend(strip_field_values)

        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        return response