def _augment_response_with_on_demand_transforms( self, feature_refs: List[str], full_feature_names: bool, initial_response: OnlineResponse, result_rows: List[GetOnlineFeaturesResponse.FieldValues], ) -> OnlineResponse: all_on_demand_feature_views = { view.name: view for view in self._registry.list_on_demand_feature_views( project=self.project, allow_cache=True) } all_odfv_feature_names = all_on_demand_feature_views.keys() if len(all_on_demand_feature_views) == 0: return initial_response initial_response_df = initial_response.to_df() odfv_feature_refs = defaultdict(list) for feature_ref in feature_refs: view_name, feature_name = feature_ref.split(":") if view_name in all_odfv_feature_names: odfv_feature_refs[view_name].append(feature_name) # Apply on demand transformations for odfv_name, _feature_refs in odfv_feature_refs.items(): odfv = all_on_demand_feature_views[odfv_name] transformed_features_df = odfv.get_transformed_features_df( full_feature_names, initial_response_df) for row_idx in range(len(result_rows)): result_row = result_rows[row_idx] selected_subset = [ f for f in transformed_features_df.columns if f in _feature_refs ] for transformed_feature in selected_subset: transformed_feature_name = ( f"{odfv.name}__{transformed_feature}" if full_feature_names else transformed_feature) proto_value = python_value_to_proto_value( transformed_features_df[transformed_feature]. values[row_idx]) result_row.fields[transformed_feature_name].CopyFrom( proto_value) result_row.statuses[ transformed_feature_name] = GetOnlineFeaturesResponse.FieldStatus.PRESENT return OnlineResponse( GetOnlineFeaturesResponse(field_values=result_rows))
def get_online_features( self, feature_refs: List[str], entity_rows: List[Dict[str, Any]], project: Optional[str] = None, ) -> OnlineResponse: """ Retrieves the latest online feature data from Feast Serving. Args: feature_refs: List of feature references that will be returned for each entity. Each feature reference should have the following format: "feature_table:feature" where "feature_table" & "feature" refer to the feature and feature table names respectively. Only the feature name is required. entity_rows: A list of dictionaries where each key-value is an entity-name, entity-value pair. project: Optionally specify the the project override. If specified, uses given project for retrieval. Overrides the projects specified in Feature References if also are specified. Returns: GetOnlineFeaturesResponse containing the feature data in records. Each EntityRow provided will yield one record, which contains data fields with data value and field status metadata (if included). Examples: >>> from feast import Client >>> >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566") >>> feature_refs = ["sales:daily_transactions"] >>> entity_rows = [{"customer_id": 0},{"customer_id": 1}] >>> >>> online_response = feast_client.get_online_features( >>> feature_refs, entity_rows, project="my_project") >>> online_response_dict = online_response.to_dict() >>> print(online_response_dict) {'sales:daily_transactions': [1.1,1.2], 'sales:customer_id': [0,1]} """ if self._telemetry_enabled: if self._telemetry_counter["get_online_features"] % 1000 == 0: log_usage( "get_online_features", self._telemetry_id, datetime.utcnow(), self.version(sdk_only=True), ) self._telemetry_counter["get_online_features"] += 1 try: response = self._serving_service.GetOnlineFeaturesV2( GetOnlineFeaturesRequestV2( features=_build_feature_references( feature_ref_strs=feature_refs), entity_rows=_infer_online_entity_rows(entity_rows), project=project if project is not None else self.project, ), timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ) except grpc.RpcError as e: raise grpc.RpcError(e.details()) response = OnlineResponse(response) return response
def get_online_features( self, feature_refs: List[str], entity_rows: List[Dict[str, Any]], ) -> OnlineResponse: """ Retrieves the latest online feature data. Args: feature_refs: List of feature references that will be returned for each entity. Each feature reference should have the following format: "feature_table:feature" where "feature_table" & "feature" refer to the feature and feature table names respectively. Only the feature name is required. entity_rows: A list of dictionaries where each key-value is an entity-name, entity-value pair. Returns: OnlineResponse containing the feature data in records. Examples: >>> from feast import FeatureStore >>> >>> store = FeatureStore(repo_path="...") >>> feature_refs = ["sales:daily_transactions"] >>> entity_rows = [{"customer_id": 0},{"customer_id": 1}] >>> >>> online_response = store.get_online_features( >>> feature_refs, entity_rows, project="my_project") >>> online_response_dict = online_response.to_dict() >>> print(online_response_dict) {'sales:daily_transactions': [1.1,1.2], 'sales:customer_id': [0,1]} """ response = self._get_online_features( feature_refs=feature_refs, entity_rows=_infer_online_entity_rows(entity_rows), project=self.config.project, ) return OnlineResponse(response)
def get_online_features( self, feature_refs: List[str], entity_rows: List[Union[GetOnlineFeaturesRequest.EntityRow, Dict[str, Any]]], project: Optional[str] = None, omit_entities: bool = False, ) -> OnlineResponse: """ Retrieves the latest online feature data from Feast Serving Args: feature_refs: List of feature references that will be returned for each entity. Each feature reference should have the following format: "feature_set:feature" where "feature_set" & "feature" refer to the feature and feature set names respectively. Only the feature name is required. entity_rows: A list of dictionaries where each key is an entity and each value is feast.types.Value or Python native form. project: Optionally specify the the project override. If specified, uses given project for retrieval. Overrides the projects specified in Feature References if also are specified. omit_entities: If true will omit entity values in the returned feature data. Returns: GetOnlineFeaturesResponse containing the feature data in records. Each EntityRow provided will yield one record, which contains data fields with data value and field status metadata (if included). Examples: >>> from feast import Client >>> >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566") >>> feature_refs = ["daily_transactions"] >>> entity_rows = [{"customer_id": 0},{"customer_id": 1}] >>> >>> online_response = feast_client.get_online_features( >>> feature_refs, entity_rows, project="my_project") >>> online_response_dict = online_response.to_dict() >>> print(online_response_dict) {'daily_transactions': [1.1,1.2], 'customer_id': [0,1]} """ try: response = self._serving_service.GetOnlineFeatures( GetOnlineFeaturesRequest( omit_entities_in_response=omit_entities, features=_build_feature_references( feature_ref_strs=feature_refs), entity_rows=_infer_online_entity_rows(entity_rows), project=project if project is not None else self.project, ), metadata=self._get_grpc_metadata(), ) except grpc.RpcError as e: raise grpc.RpcError(e.details()) response = OnlineResponse(response) return response
def get_online_features( self, feature_refs: List[str], entity_rows: List[Dict[str, Any]], ) -> OnlineResponse: """ Retrieves the latest online feature data. Note: This method will download the full feature registry the first time it is run. If you are using a remote registry like GCS or S3 then that may take a few seconds. The registry remains cached up to a TTL duration (which can be set to infinitey). If the cached registry is stale (more time than the TTL has passed), then a new registry will be downloaded synchronously by this method. This download may introduce latency to online feature retrieval. In order to avoid synchronous downloads, please call refresh_registry() prior to the TTL being reached. Remember it is possible to set the cache TTL to infinity (cache forever). Args: feature_refs: List of feature references that will be returned for each entity. Each feature reference should have the following format: "feature_table:feature" where "feature_table" & "feature" refer to the feature and feature table names respectively. Only the feature name is required. entity_rows: A list of dictionaries where each key-value is an entity-name, entity-value pair. Returns: OnlineResponse containing the feature data in records. Examples: >>> from feast import FeatureStore >>> >>> store = FeatureStore(repo_path="...") >>> feature_refs = ["sales:daily_transactions"] >>> entity_rows = [{"customer_id": 0},{"customer_id": 1}] >>> >>> online_response = store.get_online_features( >>> feature_refs, entity_rows, project="my_project") >>> online_response_dict = online_response.to_dict() >>> print(online_response_dict) {'sales:daily_transactions': [1.1,1.2], 'sales:customer_id': [0,1]} """ response = self._get_online_features( feature_refs=feature_refs, entity_rows=_infer_online_entity_rows(entity_rows), project=self.config.project, ) return OnlineResponse(response)
def get_online_features( self, feature_refs: List[str], entity_rows: List[Dict[str, Any]], ) -> OnlineResponse: """ Retrieves the latest online feature data. Note: This method will download the full feature registry the first time it is run. If you are using a remote registry like GCS or S3 then that may take a few seconds. The registry remains cached up to a TTL duration (which can be set to infinitey). If the cached registry is stale (more time than the TTL has passed), then a new registry will be downloaded synchronously by this method. This download may introduce latency to online feature retrieval. In order to avoid synchronous downloads, please call refresh_registry() prior to the TTL being reached. Remember it is possible to set the cache TTL to infinity (cache forever). Args: feature_refs: List of feature references that will be returned for each entity. Each feature reference should have the following format: "feature_table:feature" where "feature_table" & "feature" refer to the feature and feature table names respectively. Only the feature name is required. entity_rows: A list of dictionaries where each key-value is an entity-name, entity-value pair. Returns: OnlineResponse containing the feature data in records. Examples: >>> from feast import FeatureStore >>> >>> store = FeatureStore(repo_path="...") >>> feature_refs = ["sales:daily_transactions"] >>> entity_rows = [{"customer_id": 0},{"customer_id": 1}] >>> >>> online_response = store.get_online_features( >>> feature_refs, entity_rows) >>> online_response_dict = online_response.to_dict() >>> print(online_response_dict) {'sales:daily_transactions': [1.1,1.2], 'sales:customer_id': [0,1]} """ provider = self._get_provider() entities = self.list_entities(allow_cache=True) entity_name_to_join_key_map = {} for entity in entities: entity_name_to_join_key_map[entity.name] = entity.join_key join_key_rows = [] for row in entity_rows: join_key_row = {} for entity_name, entity_value in row.items(): try: join_key = entity_name_to_join_key_map[entity_name] except KeyError: raise Exception( f"Entity {entity_name} does not exist in project {self.project}" ) join_key_row[join_key] = entity_value join_key_rows.append(join_key_row) entity_row_proto_list = _infer_online_entity_rows(join_key_rows) union_of_entity_keys = [] result_rows: List[GetOnlineFeaturesResponse.FieldValues] = [] for entity_row_proto in entity_row_proto_list: union_of_entity_keys.append(_entity_row_to_key(entity_row_proto)) result_rows.append(_entity_row_to_field_values(entity_row_proto)) all_feature_views = self._registry.list_feature_views( project=self.project, allow_cache=True) grouped_refs = _group_refs(feature_refs, all_feature_views) for table, requested_features in grouped_refs: entity_keys = _get_table_entity_keys(table, union_of_entity_keys, entity_name_to_join_key_map) read_rows = provider.online_read( project=self.project, table=table, entity_keys=entity_keys, ) for row_idx, read_row in enumerate(read_rows): row_ts, feature_data = read_row result_row = result_rows[row_idx] if feature_data is None: for feature_name in requested_features: feature_ref = f"{table.name}__{feature_name}" result_row.statuses[ feature_ref] = GetOnlineFeaturesResponse.FieldStatus.NOT_FOUND else: for feature_name in feature_data: feature_ref = f"{table.name}__{feature_name}" if feature_name in requested_features: result_row.fields[feature_ref].CopyFrom( feature_data[feature_name]) result_row.statuses[ feature_ref] = GetOnlineFeaturesResponse.FieldStatus.PRESENT return OnlineResponse( GetOnlineFeaturesResponse(field_values=result_rows))
def get_online_features( self, features: Union[List[str], FeatureService], entity_rows: List[Dict[str, Any]], feature_refs: Optional[List[str]] = None, full_feature_names: bool = False, ) -> OnlineResponse: """ Retrieves the latest online feature data. Note: This method will download the full feature registry the first time it is run. If you are using a remote registry like GCS or S3 then that may take a few seconds. The registry remains cached up to a TTL duration (which can be set to infinity). If the cached registry is stale (more time than the TTL has passed), then a new registry will be downloaded synchronously by this method. This download may introduce latency to online feature retrieval. In order to avoid synchronous downloads, please call refresh_registry() prior to the TTL being reached. Remember it is possible to set the cache TTL to infinity (cache forever). Args: features: List of feature references that will be returned for each entity. Each feature reference should have the following format: "feature_table:feature" where "feature_table" & "feature" refer to the feature and feature table names respectively. Only the feature name is required. entity_rows: A list of dictionaries where each key-value is an entity-name, entity-value pair. Returns: OnlineResponse containing the feature data in records. Raises: Exception: No entity with the specified name exists. Examples: Materialize all features into the online store over the interval from 3 hours ago to 10 minutes ago, and then retrieve these online features. >>> from feast import FeatureStore, RepoConfig >>> fs = FeatureStore(repo_path="feature_repo") >>> online_response = fs.get_online_features( ... features=[ ... "driver_hourly_stats:conv_rate", ... "driver_hourly_stats:acc_rate", ... "driver_hourly_stats:avg_daily_trips", ... ], ... entity_rows=[{"driver_id": 1001}, {"driver_id": 1002}, {"driver_id": 1003}, {"driver_id": 1004}], ... ) >>> online_response_dict = online_response.to_dict() """ _feature_refs = self._get_features(features, feature_refs) provider = self._get_provider() entities = self.list_entities(allow_cache=True) entity_name_to_join_key_map = {} for entity in entities: entity_name_to_join_key_map[entity.name] = entity.join_key join_key_rows = [] for row in entity_rows: join_key_row = {} for entity_name, entity_value in row.items(): try: join_key = entity_name_to_join_key_map[entity_name] except KeyError: raise EntityNotFoundException(entity_name, self.project) join_key_row[join_key] = entity_value join_key_rows.append(join_key_row) entity_row_proto_list = _infer_online_entity_rows(join_key_rows) union_of_entity_keys = [] result_rows: List[GetOnlineFeaturesResponse.FieldValues] = [] for entity_row_proto in entity_row_proto_list: union_of_entity_keys.append(_entity_row_to_key(entity_row_proto)) result_rows.append(_entity_row_to_field_values(entity_row_proto)) all_feature_views = self._registry.list_feature_views( project=self.project, allow_cache=True) _validate_feature_refs(_feature_refs, full_feature_names) grouped_refs = _group_feature_refs(_feature_refs, all_feature_views) for table, requested_features in grouped_refs: entity_keys = _get_table_entity_keys(table, union_of_entity_keys, entity_name_to_join_key_map) read_rows = provider.online_read( config=self.config, table=table, entity_keys=entity_keys, requested_features=requested_features, ) for row_idx, read_row in enumerate(read_rows): row_ts, feature_data = read_row result_row = result_rows[row_idx] if feature_data is None: for feature_name in requested_features: feature_ref = (f"{table.name}__{feature_name}" if full_feature_names else feature_name) result_row.statuses[ feature_ref] = GetOnlineFeaturesResponse.FieldStatus.NOT_FOUND else: for feature_name in feature_data: feature_ref = (f"{table.name}__{feature_name}" if full_feature_names else feature_name) if feature_name in requested_features: result_row.fields[feature_ref].CopyFrom( feature_data[feature_name]) result_row.statuses[ feature_ref] = GetOnlineFeaturesResponse.FieldStatus.PRESENT return OnlineResponse( GetOnlineFeaturesResponse(field_values=result_rows))
def get_online_features( self, features: Union[List[str], FeatureService], entity_rows: List[Dict[str, Any]], feature_refs: Optional[List[str]] = None, full_feature_names: bool = False, ) -> OnlineResponse: """ Retrieves the latest online feature data. Note: This method will download the full feature registry the first time it is run. If you are using a remote registry like GCS or S3 then that may take a few seconds. The registry remains cached up to a TTL duration (which can be set to infinity). If the cached registry is stale (more time than the TTL has passed), then a new registry will be downloaded synchronously by this method. This download may introduce latency to online feature retrieval. In order to avoid synchronous downloads, please call refresh_registry() prior to the TTL being reached. Remember it is possible to set the cache TTL to infinity (cache forever). Args: features: List of feature references that will be returned for each entity. Each feature reference should have the following format: "feature_table:feature" where "feature_table" & "feature" refer to the feature and feature table names respectively. Only the feature name is required. entity_rows: A list of dictionaries where each key-value is an entity-name, entity-value pair. Returns: OnlineResponse containing the feature data in records. Raises: Exception: No entity with the specified name exists. Examples: Materialize all features into the online store over the interval from 3 hours ago to 10 minutes ago, and then retrieve these online features. >>> from feast import FeatureStore, RepoConfig >>> fs = FeatureStore(repo_path="feature_repo") >>> online_response = fs.get_online_features( ... features=[ ... "driver_hourly_stats:conv_rate", ... "driver_hourly_stats:acc_rate", ... "driver_hourly_stats:avg_daily_trips", ... ], ... entity_rows=[{"driver_id": 1001}, {"driver_id": 1002}, {"driver_id": 1003}, {"driver_id": 1004}], ... ) >>> online_response_dict = online_response.to_dict() """ _feature_refs = self._get_features(features, feature_refs) all_feature_views = self._list_feature_views(allow_cache=True, hide_dummy_entity=False) all_on_demand_feature_views = self._registry.list_on_demand_feature_views( project=self.project, allow_cache=True) _validate_feature_refs(_feature_refs, full_feature_names) grouped_refs, grouped_odfv_refs = _group_feature_refs( _feature_refs, all_feature_views, all_on_demand_feature_views) if len(grouped_odfv_refs) > 0: log_event(UsageEvent.GET_ONLINE_FEATURES_WITH_ODFV) feature_views = list(view for view, _ in grouped_refs) entityless_case = DUMMY_ENTITY_NAME in [ entity_name for feature_view in feature_views for entity_name in feature_view.entities ] provider = self._get_provider() entities = self._list_entities(allow_cache=True, hide_dummy_entity=False) entity_name_to_join_key_map = {} for entity in entities: entity_name_to_join_key_map[entity.name] = entity.join_key needed_request_data_features = self._get_needed_request_data_features( grouped_odfv_refs) join_key_rows = [] request_data_features: Dict[str, List[Any]] = {} # Entity rows may be either entities or request data. for row in entity_rows: join_key_row = {} for entity_name, entity_value in row.items(): # Found request data if entity_name in needed_request_data_features: if entity_name not in request_data_features: request_data_features[entity_name] = [] request_data_features[entity_name].append(entity_value) continue try: join_key = entity_name_to_join_key_map[entity_name] except KeyError: raise EntityNotFoundException(entity_name, self.project) join_key_row[join_key] = entity_value if entityless_case: join_key_row[DUMMY_ENTITY_ID] = DUMMY_ENTITY_VAL if len(join_key_row) > 0: # May be empty if this entity row was request data join_key_rows.append(join_key_row) if len(needed_request_data_features) != len( request_data_features.keys()): raise RequestDataNotFoundInEntityRowsException( feature_names=needed_request_data_features) entity_row_proto_list = _infer_online_entity_rows(join_key_rows) union_of_entity_keys: List[EntityKeyProto] = [] result_rows: List[GetOnlineFeaturesResponse.FieldValues] = [] for entity_row_proto in entity_row_proto_list: # Create a list of entity keys to filter down for each feature view at lookup time. union_of_entity_keys.append(_entity_row_to_key(entity_row_proto)) # Also create entity values to append to the result result_rows.append(_entity_row_to_field_values(entity_row_proto)) # Add more feature values to the existing result rows for the request data features for feature_name, feature_values in request_data_features.items(): for row_idx, feature_value in enumerate(feature_values): result_row = result_rows[row_idx] result_row.fields[feature_name].CopyFrom( python_value_to_proto_value(feature_value)) result_row.statuses[ feature_name] = GetOnlineFeaturesResponse.FieldStatus.PRESENT for table, requested_features in grouped_refs: self._populate_result_rows_from_feature_view( entity_name_to_join_key_map, full_feature_names, provider, requested_features, result_rows, table, union_of_entity_keys, ) initial_response = OnlineResponse( GetOnlineFeaturesResponse(field_values=result_rows)) return self._augment_response_with_on_demand_transforms( _feature_refs, full_feature_names, initial_response, result_rows)