def registry_dump_command(): """ Print contents of the metadata registry """ cli_check_repo(Path.cwd()) repo_config = load_repo_config(Path.cwd()) tele = Telemetry() tele.log("registry-dump") registry_dump(repo_config, repo_path=Path.cwd())
def teardown_command(): """ Tear down deployed feature store infrastructure """ cli_check_repo(Path.cwd()) repo_config = load_repo_config(Path.cwd()) tele = Telemetry() tele.log("teardown") teardown(repo_config, Path.cwd())
def init_command(project_directory, minimal: bool, template: str): """Create a new Feast repository""" if not project_directory: project_directory = generate_project_name() if minimal: template = "minimal" tele = Telemetry() tele.log("init") init_repo(project_directory, template)
def registry_dump_command(ctx: click.Context): """ Print contents of the metadata registry """ repo = ctx.obj["CHDIR"] cli_check_repo(repo) repo_config = load_repo_config(repo) tele = Telemetry() tele.log("registry-dump") registry_dump(repo_config, repo_path=repo)
def teardown_command(ctx: click.Context): """ Tear down deployed feature store infrastructure """ repo = ctx.obj["CHDIR"] cli_check_repo(repo) repo_config = load_repo_config(repo) tele = Telemetry() tele.log("teardown") teardown(repo_config, repo)
def apply_total_command(): """ Create or update a feature store deployment """ cli_check_repo(Path.cwd()) repo_config = load_repo_config(Path.cwd()) tele = Telemetry() tele.log("apply") try: apply_total(repo_config, Path.cwd()) except FeastProviderLoginError as e: print(str(e))
def apply_total_command(ctx: click.Context): """ Create or update a feature store deployment """ repo = ctx.obj["CHDIR"] cli_check_repo(repo) repo_config = load_repo_config(repo) tele = Telemetry() tele.log("apply") try: apply_total(repo_config, repo) except FeastProviderLoginError as e: print(str(e))
class FeatureStore: """ A FeatureStore object is used to define, create, and retrieve features. """ config: RepoConfig repo_path: Optional[str] _registry: Registry def __init__( self, repo_path: Optional[str] = None, config: Optional[RepoConfig] = None, ): self.repo_path = repo_path if repo_path is not None and config is not None: raise ValueError("You cannot specify both repo_path and config") if config is not None: self.config = config elif repo_path is not None: self.config = load_repo_config(Path(repo_path)) else: self.config = RepoConfig( registry="./registry.db", project="default", provider="local", online_store=OnlineStoreConfig( local=LocalOnlineStoreConfig(path="online_store.db") ), ) registry_config = self.config.get_registry_config() self._registry = Registry( registry_path=registry_config.path, cache_ttl=timedelta(seconds=registry_config.cache_ttl_seconds), ) self._tele = Telemetry() def version(self) -> str: """Returns the version of the current Feast SDK/CLI""" return get_version() @property def project(self) -> str: return self.config.project def _get_provider(self) -> Provider: return get_provider(self.config) def refresh_registry(self): """Fetches and caches a copy of the feature registry in memory. Explicitly calling this method allows for direct control of the state of the registry cache. Every time this method is called the complete registry state will be retrieved from the remote registry store backend (e.g., GCS, S3), and the cache timer will be reset. If refresh_registry() is run before get_online_features() is called, then get_online_feature() will use the cached registry instead of retrieving (and caching) the registry itself. Additionally, the TTL for the registry cache can be set to infinity (by setting it to 0), which means that refresh_registry() will become the only way to update the cached registry. If the TTL is set to a value greater than 0, then once the cache becomes stale (more time than the TTL has passed), a new cache will be downloaded synchronously, which may increase latencies if the triggering method is get_online_features() """ self._tele.log("refresh_registry") registry_config = self.config.get_registry_config() self._registry = Registry( registry_path=registry_config.path, cache_ttl=timedelta(seconds=registry_config.cache_ttl_seconds), ) self._registry.refresh() def list_entities(self, allow_cache: bool = False) -> List[Entity]: """ Retrieve a list of entities from the registry Args: allow_cache (bool): Whether to allow returning entities from a cached registry Returns: List of entities """ self._tele.log("list_entities") return self._registry.list_entities(self.project, allow_cache=allow_cache) def list_feature_views(self) -> List[FeatureView]: """ Retrieve a list of feature views from the registry Returns: List of feature views """ self._tele.log("list_feature_views") return self._registry.list_feature_views(self.project) def get_entity(self, name: str) -> Entity: """ Retrieves an entity. Args: name: Name of entity Returns: Returns either the specified entity, or raises an exception if none is found """ self._tele.log("get_entity") return self._registry.get_entity(name, self.project) def get_feature_view(self, name: str) -> FeatureView: """ Retrieves a feature view. Args: name: Name of feature view Returns: Returns either the specified feature view, or raises an exception if none is found """ self._tele.log("get_feature_view") return self._registry.get_feature_view(name, self.project) def delete_feature_view(self, name: str): """ Deletes a feature view or raises an exception if not found. Args: name: Name of feature view """ self._tele.log("delete_feature_view") return self._registry.delete_feature_view(name, self.project) def apply(self, objects: List[Union[FeatureView, Entity]]): """Register objects to metadata store and update related infrastructure. The apply method registers one or more definitions (e.g., Entity, FeatureView) and registers or updates these objects in the Feast registry. Once the registry has been updated, the apply method will update related infrastructure (e.g., create tables in an online store) in order to reflect these new definitions. All operations are idempotent, meaning they can safely be rerun. Args: objects (List[Union[FeatureView, Entity]]): A list of FeatureView or Entity objects that should be registered Examples: Register a single Entity and FeatureView. >>> from feast.feature_store import FeatureStore >>> from feast import Entity, FeatureView, Feature, ValueType, FileSource >>> from datetime import timedelta >>> >>> fs = FeatureStore() >>> customer_entity = Entity(name="customer", value_type=ValueType.INT64, description="customer entity") >>> customer_feature_view = FeatureView( >>> name="customer_fv", >>> entities=["customer"], >>> features=[Feature(name="age", dtype=ValueType.INT64)], >>> input=FileSource(path="file.parquet", event_timestamp_column="timestamp"), >>> ttl=timedelta(days=1) >>> ) >>> fs.apply([customer_entity, customer_feature_view]) """ self._tele.log("apply") # TODO: Add locking # TODO: Optimize by only making a single call (read/write) views_to_update = [] for ob in objects: if isinstance(ob, FeatureView): self._registry.apply_feature_view(ob, project=self.config.project) views_to_update.append(ob) elif isinstance(ob, Entity): self._registry.apply_entity(ob, project=self.config.project) else: raise ValueError( f"Unknown object type ({type(ob)}) provided as part of apply() call" ) self._get_provider().update_infra( project=self.config.project, tables_to_delete=[], tables_to_keep=views_to_update, partial=True, ) def get_historical_features( self, entity_df: Union[pd.DataFrame, str], feature_refs: List[str], ) -> RetrievalJob: """Enrich an entity dataframe with historical feature values for either training or batch scoring. This method joins historical feature data from one or more feature views to an entity dataframe by using a time travel join. Each feature view is joined to the entity dataframe using all entities configured for the respective feature view. All configured entities must be available in the entity dataframe. Therefore, the entity dataframe must contain all entities found in all feature views, but the individual feature views can have different entities. Time travel is based on the configured TTL for each feature view. A shorter TTL will limit the amount of scanning that will be done in order to find feature data for a specific entity key. Setting a short TTL may result in null values being returned. Args: entity_df (Union[pd.DataFrame, str]): An entity dataframe is a collection of rows containing all entity columns (e.g., customer_id, driver_id) on which features need to be joined, as well as a event_timestamp column used to ensure point-in-time correctness. Either a Pandas DataFrame can be provided or a string SQL query. The query must be of a format supported by the configured offline store (e.g., BigQuery) feature_refs: A list of features that should be retrieved from the offline store. Feature references are of the format "feature_view:feature", e.g., "customer_fv:daily_transactions". Returns: RetrievalJob which can be used to materialize the results. Examples: Retrieve historical features using a BigQuery SQL entity dataframe >>> from feast.feature_store import FeatureStore >>> >>> fs = FeatureStore(config=RepoConfig(provider="gcp")) >>> retrieval_job = fs.get_historical_features( >>> entity_df="SELECT event_timestamp, order_id, customer_id from gcp_project.my_ds.customer_orders", >>> feature_refs=["customer:age", "customer:avg_orders_1d", "customer:avg_orders_7d"] >>> ) >>> feature_data = job.to_df() >>> model.fit(feature_data) # insert your modeling framework here. """ self._tele.log("get_historical_features") all_feature_views = self._registry.list_feature_views( project=self.config.project ) feature_views = _get_requested_feature_views(feature_refs, all_feature_views) provider = self._get_provider() job = provider.get_historical_features( self.config, feature_views, feature_refs, entity_df, self._registry, self.project, ) return job def materialize_incremental( self, end_date: datetime, feature_views: Optional[List[str]] = None, ) -> None: """ Materialize incremental new data from the offline store into the online store. This method loads incremental new feature data up to the specified end time from either the specified feature views, or all feature views if none are specified, into the online store where it is available for online serving. The start time of the interval materialized is either the most recent end time of a prior materialization or (now - ttl) if no such prior materialization exists. Args: end_date (datetime): End date for time range of data to materialize into the online store feature_views (List[str]): Optional list of feature view names. If selected, will only run materialization for the specified feature views. Examples: Materialize all features into the online store up to 5 minutes ago. >>> from datetime import datetime, timedelta >>> from feast.feature_store import FeatureStore >>> >>> fs = FeatureStore(config=RepoConfig(provider="gcp", registry="gs://my-fs/", project="my_fs_proj")) >>> fs.materialize_incremental(end_date=datetime.utcnow() - timedelta(minutes=5)) """ self._tele.log("materialize_incremental") feature_views_to_materialize = [] if feature_views is None: feature_views_to_materialize = self._registry.list_feature_views( self.config.project ) else: for name in feature_views: feature_view = self._registry.get_feature_view( name, self.config.project ) feature_views_to_materialize.append(feature_view) # TODO paging large loads for feature_view in feature_views_to_materialize: start_date = feature_view.most_recent_end_time if start_date is None: if feature_view.ttl is None: raise Exception( f"No start time found for feature view {feature_view.name}. materialize_incremental() requires either a ttl to be set or for materialize() to have been run at least once." ) start_date = datetime.utcnow() - feature_view.ttl provider = self._get_provider() provider.materialize_single_feature_view( feature_view, start_date, end_date, self._registry, self.project ) def materialize( self, start_date: datetime, end_date: datetime, feature_views: Optional[List[str]] = None, ) -> None: """ Materialize data from the offline store into the online store. This method loads feature data in the specified interval from either the specified feature views, or all feature views if none are specified, into the online store where it is available for online serving. Args: start_date (datetime): Start date for time range of data to materialize into the online store end_date (datetime): End date for time range of data to materialize into the online store feature_views (List[str]): Optional list of feature view names. If selected, will only run materialization for the specified feature views. Examples: Materialize all features into the online store over the interval from 3 hours ago to 10 minutes ago. >>> from datetime import datetime, timedelta >>> from feast.feature_store import FeatureStore >>> >>> fs = FeatureStore(config=RepoConfig(provider="gcp")) >>> fs.materialize( >>> start_date=datetime.utcnow() - timedelta(hours=3), end_date=datetime.utcnow() - timedelta(minutes=10) >>> ) """ self._tele.log("materialize") feature_views_to_materialize = [] if feature_views is None: feature_views_to_materialize = self._registry.list_feature_views( self.config.project ) else: for name in feature_views: feature_view = self._registry.get_feature_view( name, self.config.project ) feature_views_to_materialize.append(feature_view) # TODO paging large loads for feature_view in feature_views_to_materialize: provider = self._get_provider() provider.materialize_single_feature_view( feature_view, start_date, end_date, self._registry, self.project ) def get_online_features( self, feature_refs: List[str], entity_rows: List[Dict[str, Any]], ) -> OnlineResponse: """ Retrieves the latest online feature data. Note: This method will download the full feature registry the first time it is run. If you are using a remote registry like GCS or S3 then that may take a few seconds. The registry remains cached up to a TTL duration (which can be set to infinitey). If the cached registry is stale (more time than the TTL has passed), then a new registry will be downloaded synchronously by this method. This download may introduce latency to online feature retrieval. In order to avoid synchronous downloads, please call refresh_registry() prior to the TTL being reached. Remember it is possible to set the cache TTL to infinity (cache forever). Args: feature_refs: List of feature references that will be returned for each entity. Each feature reference should have the following format: "feature_table:feature" where "feature_table" & "feature" refer to the feature and feature table names respectively. Only the feature name is required. entity_rows: A list of dictionaries where each key-value is an entity-name, entity-value pair. Returns: OnlineResponse containing the feature data in records. Examples: >>> from feast import FeatureStore >>> >>> store = FeatureStore(repo_path="...") >>> feature_refs = ["sales:daily_transactions"] >>> entity_rows = [{"customer_id": 0},{"customer_id": 1}] >>> >>> online_response = store.get_online_features( >>> feature_refs, entity_rows, project="my_project") >>> online_response_dict = online_response.to_dict() >>> print(online_response_dict) {'sales:daily_transactions': [1.1,1.2], 'sales:customer_id': [0,1]} """ self._tele.log("get_online_features") provider = self._get_provider() entities = self.list_entities(allow_cache=True) entity_name_to_join_key_map = {} for entity in entities: entity_name_to_join_key_map[entity.name] = entity.join_key join_key_rows = [] for row in entity_rows: join_key_row = {} for entity_name, entity_value in row.items(): try: join_key = entity_name_to_join_key_map[entity_name] except KeyError: raise Exception( f"Entity {entity_name} does not exist in project {self.project}" ) join_key_row[join_key] = entity_value join_key_rows.append(join_key_row) entity_row_proto_list = _infer_online_entity_rows(join_key_rows) union_of_entity_keys = [] result_rows: List[GetOnlineFeaturesResponse.FieldValues] = [] for entity_row_proto in entity_row_proto_list: union_of_entity_keys.append(_entity_row_to_key(entity_row_proto)) result_rows.append(_entity_row_to_field_values(entity_row_proto)) all_feature_views = self._registry.list_feature_views( project=self.config.project, allow_cache=True ) grouped_refs = _group_refs(feature_refs, all_feature_views) for table, requested_features in grouped_refs: entity_keys = _get_table_entity_keys( table, union_of_entity_keys, entity_name_to_join_key_map ) read_rows = provider.online_read( project=self.project, table=table, entity_keys=entity_keys, ) for row_idx, read_row in enumerate(read_rows): row_ts, feature_data = read_row result_row = result_rows[row_idx] if feature_data is None: for feature_name in requested_features: feature_ref = f"{table.name}__{feature_name}" result_row.statuses[ feature_ref ] = GetOnlineFeaturesResponse.FieldStatus.NOT_FOUND else: for feature_name in feature_data: feature_ref = f"{table.name}__{feature_name}" if feature_name in requested_features: result_row.fields[feature_ref].CopyFrom( feature_data[feature_name] ) result_row.statuses[ feature_ref ] = GetOnlineFeaturesResponse.FieldStatus.PRESENT return OnlineResponse(GetOnlineFeaturesResponse(field_values=result_rows))
class Client: """ Feast Client: Used for creating, managing, and retrieving features. """ def __init__(self, options: Optional[Dict[str, str]] = None, **kwargs): """ The Feast Client should be initialized with at least one service url Please see constants.py for configuration options. Commonly used options or arguments include: core_url: Feast Core URL. Used to manage features serving_url: Feast Serving URL. Used to retrieve features project: Sets the active project. This field is optional. core_secure: Use client-side SSL/TLS for Core gRPC API serving_secure: Use client-side SSL/TLS for Serving gRPC API enable_auth: Enable authentication and authorization auth_provider: Authentication provider – "google" or "oauth" if auth_provider is "oauth", the following fields are mandatory – oauth_grant_type, oauth_client_id, oauth_client_secret, oauth_audience, oauth_token_request_url Args: options: Configuration options to initialize client with **kwargs: Additional keyword arguments that will be used as configuration options along with "options" """ if options is None: options = dict() self._config = Config(options={**options, **kwargs}) self._core_service_stub: Optional[CoreServiceStub] = None self._serving_service_stub: Optional[ServingServiceStub] = None self._auth_metadata: Optional[grpc.AuthMetadataPlugin] = None self._registry_impl: Optional[Registry] = None # Configure Auth Metadata Plugin if auth is enabled if self._config.getboolean(opt.ENABLE_AUTH): self._auth_metadata = feast_auth.get_auth_metadata_plugin( self._config) self._tele = Telemetry() @property def config(self) -> Config: return self._config @property def _core_service(self): """ Creates or returns the gRPC Feast Core Service Stub Returns: CoreServiceStub """ if not self._core_service_stub: channel = create_grpc_channel( url=self._config.get(opt.CORE_URL), enable_ssl=self._config.getboolean(opt.CORE_ENABLE_SSL), enable_auth=self._config.getboolean(opt.ENABLE_AUTH), ssl_server_cert_path=self._config.get( opt.CORE_SERVER_SSL_CERT), auth_metadata_plugin=self._auth_metadata, timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), ) self._core_service_stub = CoreServiceStub(channel) return self._core_service_stub @property def _use_object_store_registry(self) -> bool: return self._config.exists(opt.REGISTRY_PATH) @property def _registry(self): if self._registry_impl is None: self._registry_impl = Registry(self._config.get(opt.REGISTRY_PATH)) return self._registry_impl @property def _serving_service(self): """ Creates or returns the gRPC Feast Serving Service Stub. If both `opentracing` and `grpcio-opentracing` are installed, an opentracing interceptor will be instantiated based on the global tracer. Returns: ServingServiceStub """ if not self._serving_service_stub: channel = create_grpc_channel( url=self._config.get(opt.SERVING_URL), enable_ssl=self._config.getboolean(opt.SERVING_ENABLE_SSL), enable_auth=self._config.getboolean(opt.ENABLE_AUTH), ssl_server_cert_path=self._config.get( opt.SERVING_SERVER_SSL_CERT), auth_metadata_plugin=self._auth_metadata, timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), ) try: import opentracing from grpc_opentracing import open_tracing_client_interceptor from grpc_opentracing.grpcext import intercept_channel interceptor = open_tracing_client_interceptor( opentracing.global_tracer()) channel = intercept_channel(channel, interceptor) except ImportError: pass self._serving_service_stub = ServingServiceStub(channel) return self._serving_service_stub def _extra_grpc_params(self) -> Dict[str, Any]: return dict( timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ) @property def core_url(self) -> str: """ Retrieve Feast Core URL Returns: Feast Core URL string """ return self._config.get(opt.CORE_URL) @core_url.setter def core_url(self, value: str): """ Set the Feast Core URL Args: value: Feast Core URL """ self._config.set(opt.CORE_URL, value) @property def serving_url(self) -> str: """ Retrieve Feast Serving URL Returns: Feast Serving URL string """ return self._config.get(opt.SERVING_URL) @serving_url.setter def serving_url(self, value: str): """ Set the Feast Serving URL Args: value: Feast Serving URL """ self._config.set(opt.SERVING_URL, value) @property def job_service_url(self) -> str: """ Retrieve Feast Job Service URL Returns: Feast Job Service URL string """ return self._config.get(opt.JOB_SERVICE_URL) @job_service_url.setter def job_service_url(self, value: str): """ Set the Feast Job Service URL Args: value: Feast Job Service URL """ self._config.set(opt.JOB_SERVICE_URL, value) @property def core_secure(self) -> bool: """ Retrieve Feast Core client-side SSL/TLS setting Returns: Whether client-side SSL/TLS is enabled """ return self._config.getboolean(opt.CORE_ENABLE_SSL) @core_secure.setter def core_secure(self, value: bool): """ Set the Feast Core client-side SSL/TLS setting Args: value: True to enable client-side SSL/TLS """ self._config.set(opt.CORE_ENABLE_SSL, value) @property def serving_secure(self) -> bool: """ Retrieve Feast Serving client-side SSL/TLS setting Returns: Whether client-side SSL/TLS is enabled """ return self._config.getboolean(opt.SERVING_ENABLE_SSL) @serving_secure.setter def serving_secure(self, value: bool): """ Set the Feast Serving client-side SSL/TLS setting Args: value: True to enable client-side SSL/TLS """ self._config.set(opt.SERVING_ENABLE_SSL, value) @property def job_service_secure(self) -> bool: """ Retrieve Feast Job Service client-side SSL/TLS setting Returns: Whether client-side SSL/TLS is enabled """ return self._config.getboolean(opt.JOB_SERVICE_ENABLE_SSL) @job_service_secure.setter def job_service_secure(self, value: bool): """ Set the Feast Job Service client-side SSL/TLS setting Args: value: True to enable client-side SSL/TLS """ self._config.set(opt.JOB_SERVICE_ENABLE_SSL, value) def version(self, sdk_only=False): """ Returns version information from Feast Core and Feast Serving """ import pkg_resources try: sdk_version = pkg_resources.get_distribution("feast").version except pkg_resources.DistributionNotFound: sdk_version = "local build" if sdk_only: return sdk_version result = { "sdk": { "version": sdk_version }, "serving": "not configured", "core": "not configured", } if self.serving_url: serving_version = self._serving_service.GetFeastServingInfo( GetFeastServingInfoRequest(), timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ).version result["serving"] = { "url": self.serving_url, "version": serving_version } if not self._use_object_store_registry and self.core_url: core_version = self._core_service.GetFeastCoreVersion( GetFeastCoreVersionRequest(), timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ).version result["core"] = {"url": self.core_url, "version": core_version} return result @property def project(self) -> str: """ Retrieve currently active project Returns: Project name """ if not self._config.get(opt.PROJECT): raise ValueError("No project has been configured.") return self._config.get(opt.PROJECT) def set_project(self, project: Optional[str] = None): """ Set currently active Feast project Args: project: Project to set as active. If unset, will reset to the default project. """ if project is None: project = opt().PROJECT self._config.set(opt.PROJECT, project) def list_projects(self) -> List[str]: """ List all active Feast projects Returns: List of project names """ if self._use_object_store_registry: raise NotImplementedError( "Projects are not implemented for object store registry.") else: response = self._core_service.ListProjects( ListProjectsRequest(), timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ) # type: ListProjectsResponse return list(response.projects) def create_project(self, project: str): """ Creates a Feast project Args: project: Name of project """ if self._use_object_store_registry: raise NotImplementedError( "Projects are not implemented for object store registry.") else: self._core_service.CreateProject( CreateProjectRequest(name=project), timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ) # type: CreateProjectResponse def archive_project(self, project): """ Archives a project. Project will still continue to function for ingestion and retrieval, but will be in a read-only state. It will also not be visible from the Core API for management purposes. Args: project: Name of project to archive """ if self._use_object_store_registry: raise NotImplementedError( "Projects are not implemented for object store registry.") else: try: self._core_service.ArchiveProject( ArchiveProjectRequest(name=project), timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ) # type: ArchiveProjectResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) # revert to the default project if self._project == project: self._project = opt().PROJECT def apply( self, objects: Union[List[Union[Entity, FeatureTable]], Entity, FeatureTable], project: str = None, ): """ Idempotently registers entities and feature tables with Feast Core. Either a single entity or feature table or a list can be provided. Args: objects: List of entities and/or feature tables that will be registered Examples: >>> from feast import Client >>> from feast.entity import Entity >>> from feast.value_type import ValueType >>> >>> feast_client = Client(core_url="localhost:6565") >>> entity = Entity( >>> name="driver_entity", >>> description="Driver entity for car rides", >>> value_type=ValueType.STRING, >>> labels={ >>> "key": "val" >>> } >>> ) >>> feast_client.apply(entity) """ self._tele.log("apply") if project is None: project = self.project if not isinstance(objects, list): objects = [objects] for obj in objects: if isinstance(obj, Entity): self._apply_entity(project, obj) # type: ignore elif isinstance(obj, FeatureTable): self._apply_feature_table(project, obj) # type: ignore else: raise ValueError( f"Could not determine object type to apply {obj} with type {type(obj)}. Type must be Entity or FeatureTable." ) def apply_entity(self, entities: Union[List[Entity], Entity], project: str = None): """ Deprecated. Please see apply(). """ warnings.warn( "The method apply_entity() is being deprecated. Please use apply() instead. Feast 0.10 and onwards will not support apply_entity().", DeprecationWarning, ) if project is None: project = self.project if not isinstance(entities, list): entities = [entities] for entity in entities: if isinstance(entity, Entity): self._apply_entity(project, entity) # type: ignore continue raise ValueError( f"Could not determine entity type to apply {entity}") def _apply_entity(self, project: str, entity: Entity): """ Registers a single entity with Feast Args: entity: Entity that will be registered """ if self._use_object_store_registry: return self._registry.apply_entity(entity, project) else: entity.is_valid() entity_proto = entity.to_spec_proto() # Convert the entity to a request and send to Feast Core try: apply_entity_response = self._core_service.ApplyEntity( ApplyEntityRequest(project=project, spec=entity_proto), # type: ignore timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ) # type: ApplyEntityResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) # Extract the returned entity applied_entity = Entity.from_proto(apply_entity_response.entity) # Deep copy from the returned entity to the local entity entity._update_from_entity(applied_entity) def list_entities(self, project: str = None, labels: Dict[str, str] = dict()) -> List[Entity]: """ Retrieve a list of entities from Feast Core Args: project: Filter entities based on project name labels: User-defined labels that these entities are associated with Returns: List of entities """ if project is None: project = self.project if self._use_object_store_registry: return self._registry.list_entities(project) else: filter = ListEntitiesRequest.Filter(project=project, labels=labels) # Get latest entities from Feast Core entity_protos = self._core_service.ListEntities( ListEntitiesRequest(filter=filter), metadata=self._get_grpc_metadata(), ) # type: ListEntitiesResponse # Extract entities and return entities = [] for entity_proto in entity_protos.entities: entity = Entity.from_proto(entity_proto) entity._client = self entities.append(entity) return entities def get_entity(self, name: str, project: str = None) -> Entity: """ Retrieves an entity. Args: project: Feast project that this entity belongs to name: Name of entity Returns: Returns either the specified entity, or raises an exception if none is found """ self._tele.log("get_entity") if project is None: project = self.project if self._use_object_store_registry: return self._registry.get_entity(name, project) else: try: get_entity_response = self._core_service.GetEntity( GetEntityRequest(project=project, name=name.strip()), metadata=self._get_grpc_metadata(), ) # type: GetEntityResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) entity = Entity.from_proto(get_entity_response.entity) return entity def apply_feature_table( self, feature_tables: Union[List[FeatureTable], FeatureTable], project: str = None, ): """ Deprecated. Please see apply(). """ warnings.warn( "The method apply_feature_table() is being deprecated. Please use apply() instead. Feast 0.10 and onwards will not support apply_feature_table().", DeprecationWarning, ) if project is None: project = self.project if not isinstance(feature_tables, list): feature_tables = [feature_tables] for feature_table in feature_tables: if isinstance(feature_table, FeatureTable): self._apply_feature_table(project, feature_table) # type: ignore continue raise ValueError( f"Could not determine feature table type to apply {feature_table}" ) def _apply_feature_table(self, project: str, feature_table: FeatureTable): """ Registers a single feature table with Feast Args: feature_table: Feature table that will be registered """ if self._use_object_store_registry: return self._registry.apply_feature_table(feature_table, project) else: feature_table.is_valid() feature_table_proto = feature_table.to_spec_proto() # Convert the feature table to a request and send to Feast Core try: apply_feature_table_response = self._core_service.ApplyFeatureTable( ApplyFeatureTableRequest( project=project, table_spec=feature_table_proto), # type: ignore timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ) # type: ApplyFeatureTableResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) # Extract the returned feature table applied_feature_table = FeatureTable.from_proto( apply_feature_table_response.table) # Deep copy from the returned feature table to the local entity feature_table._update_from_feature_table(applied_feature_table) def list_feature_tables( self, project: str = None, labels: Dict[str, str] = dict() ) -> List[FeatureTable]: """ Retrieve a list of feature tables from Feast Core Args: project: Filter feature tables based on project name Returns: List of feature tables """ if project is None: project = self.project if self._use_object_store_registry: return self._registry.list_feature_tables(project) else: filter = ListFeatureTablesRequest.Filter(project=project, labels=labels) # Get latest feature tables from Feast Core feature_table_protos = self._core_service.ListFeatureTables( ListFeatureTablesRequest(filter=filter), metadata=self._get_grpc_metadata(), ) # type: ListFeatureTablesResponse # Extract feature tables and return feature_tables = [] for feature_table_proto in feature_table_protos.tables: feature_table = FeatureTable.from_proto(feature_table_proto) feature_table._client = self feature_tables.append(feature_table) return feature_tables def get_feature_table(self, name: str, project: str = None) -> FeatureTable: """ Retrieves a feature table. Args: project: Feast project that this feature table belongs to name: Name of feature table Returns: Returns either the specified feature table, or raises an exception if none is found """ self._tele.log("get_feature_table") if project is None: project = self.project if self._use_object_store_registry: return self._registry.get_feature_table(name, project) else: try: get_feature_table_response = self._core_service.GetFeatureTable( GetFeatureTableRequest(project=project, name=name.strip()), metadata=self._get_grpc_metadata(), ) # type: GetFeatureTableResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) return FeatureTable.from_proto(get_feature_table_response.table) def delete_feature_table(self, name: str, project: str = None) -> None: """ Deletes a feature table. Args: project: Feast project that this feature table belongs to name: Name of feature table """ if project is None: project = self.project if self._use_object_store_registry: return self._registry.delete_feature_table(name, project) else: try: self._core_service.DeleteFeatureTable( DeleteFeatureTableRequest(project=project, name=name.strip()), metadata=self._get_grpc_metadata(), ) except grpc.RpcError as e: raise grpc.RpcError(e.details()) def list_features_by_ref( self, project: str = None, entities: List[str] = list(), labels: Dict[str, str] = dict(), ) -> Dict[FeatureRef, Feature]: """ Retrieve a dictionary of feature reference to feature from Feast Core based on filters provided. Args: project: Feast project that these features belongs to entities: Feast entity that these features are associated with labels: Feast labels that these features are associated with Returns: Dictionary of <feature references: features> Examples: >>> from feast import Client >>> >>> feast_client = Client(core_url="localhost:6565") >>> features = feast_client.list_features(project="test_project", entities=["driver_id"], labels={"key1":"val1","key2":"val2"}) >>> print(features) """ if self._use_object_store_registry: raise NotImplementedError( "This function is not implemented for object store registry.") else: if project is None: project = self.project filter = ListFeaturesRequest.Filter(project=project, entities=entities, labels=labels) feature_protos = self._core_service.ListFeatures( ListFeaturesRequest(filter=filter), metadata=self._get_grpc_metadata(), ) # type: ListFeaturesResponse # Extract features and return features_dict = {} for ref_str, feature_proto in feature_protos.features.items(): feature_ref = FeatureRef.from_str(ref_str) feature = Feature.from_proto(feature_proto) features_dict[feature_ref] = feature return features_dict def ingest( self, feature_table: Union[str, FeatureTable], source: Union[pd.DataFrame, str], project: str = None, chunk_size: int = 10000, max_workers: int = max(CPU_COUNT - 1, 1), timeout: int = int(opt().BATCH_INGESTION_PRODUCTION_TIMEOUT), ) -> None: """ Batch load feature data into a FeatureTable. Args: feature_table (typing.Union[str, feast.feature_table.FeatureTable]): FeatureTable object or the string name of the feature table source (typing.Union[pd.DataFrame, str]): Either a file path or Pandas Dataframe to ingest into Feast Files that are currently supported: * parquet * csv * json project: Feast project to locate FeatureTable chunk_size (int): Amount of rows to load and ingest at a time. max_workers (int): Number of worker processes to use to encode values. timeout (int): Timeout in seconds to wait for completion. Examples: >>> from feast import Client >>> >>> client = Client(core_url="localhost:6565") >>> ft_df = pd.DataFrame( >>> { >>> "datetime": [pd.datetime.now()], >>> "driver": [1001], >>> "rating": [4.3], >>> } >>> ) >>> client.set_project("project1") >>> >>> driver_ft = client.get_feature_table("driver") >>> client.ingest(driver_ft, ft_df) """ self._tele.log("ingest") if project is None: project = self.project if isinstance(feature_table, str): name = feature_table if isinstance(feature_table, FeatureTable): name = feature_table.name fetched_feature_table: Optional[FeatureTable] = self.get_feature_table( name, project) if fetched_feature_table is not None: feature_table = fetched_feature_table else: raise Exception(f"FeatureTable, {name} cannot be found.") # Check 1) Only parquet file format for FeatureTable batch source is supported if (feature_table.batch_source and issubclass(type(feature_table.batch_source), FileSource) and isinstance( type(feature_table.batch_source.file_options.file_format), ParquetFormat)): raise Exception( f"No suitable batch source found for FeatureTable, {name}." f"Only BATCH_FILE source with parquet format is supported for batch ingestion." ) pyarrow_table, column_names = _read_table_from_source(source) # Check 2) Check if FeatureTable batch source field mappings can be found in provided source table _check_field_mappings( column_names, name, feature_table.batch_source.event_timestamp_column, feature_table.batch_source.field_mapping, ) dir_path = None with_partitions = False if (issubclass(type(feature_table.batch_source), FileSource) and feature_table.batch_source.date_partition_column): with_partitions = True dest_path = _write_partitioned_table_from_source( column_names, pyarrow_table, feature_table.batch_source.date_partition_column, feature_table.batch_source.event_timestamp_column, ) else: dir_path, dest_path = _write_non_partitioned_table_from_source( column_names, pyarrow_table, chunk_size, max_workers, ) try: if issubclass(type(feature_table.batch_source), FileSource): file_url = feature_table.batch_source.file_options.file_url.rstrip( "*") _upload_to_file_source(file_url, with_partitions, dest_path, self._config) if issubclass(type(feature_table.batch_source), BigQuerySource): bq_table_ref = feature_table.batch_source.bigquery_options.table_ref feature_table_timestamp_column = ( feature_table.batch_source.event_timestamp_column) _upload_to_bq_source(bq_table_ref, feature_table_timestamp_column, dest_path) finally: # Remove parquet file(s) that were created earlier print("Removing temporary file(s)...") if dir_path: shutil.rmtree(dir_path) print( "Data has been successfully ingested into FeatureTable batch source." ) def _get_grpc_metadata(self): """ Returns a metadata tuple to attach to gRPC requests. This is primarily used when authentication is enabled but SSL/TLS is disabled. Returns: Tuple of metadata to attach to each gRPC call """ if self._config.getboolean(opt.ENABLE_AUTH) and self._auth_metadata: return self._auth_metadata.get_signed_meta() return () def get_online_features( self, feature_refs: List[str], entity_rows: List[Dict[str, Any]], project: Optional[str] = None, ) -> OnlineResponse: """ Retrieves the latest online feature data from Feast Serving. Args: feature_refs: List of feature references that will be returned for each entity. Each feature reference should have the following format: "feature_table:feature" where "feature_table" & "feature" refer to the feature and feature table names respectively. Only the feature name is required. entity_rows: A list of dictionaries where each key-value is an entity-name, entity-value pair. project: Optionally specify the the project override. If specified, uses given project for retrieval. Overrides the projects specified in Feature References if also are specified. Returns: GetOnlineFeaturesResponse containing the feature data in records. Each EntityRow provided will yield one record, which contains data fields with data value and field status metadata (if included). Examples: >>> from feast import Client >>> >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566") >>> feature_refs = ["sales:daily_transactions"] >>> entity_rows = [{"customer_id": 0},{"customer_id": 1}] >>> >>> online_response = feast_client.get_online_features( >>> feature_refs, entity_rows, project="my_project") >>> online_response_dict = online_response.to_dict() >>> print(online_response_dict) {'sales:daily_transactions': [1.1,1.2], 'sales:customer_id': [0,1]} """ self._tele.log("get_online_features") try: response = self._serving_service.GetOnlineFeaturesV2( GetOnlineFeaturesRequestV2( features=_build_feature_references( feature_ref_strs=feature_refs), entity_rows=_infer_online_entity_rows(entity_rows), project=project if project is not None else self.project, ), timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ) except grpc.RpcError as e: raise grpc.RpcError(e.details()) response = OnlineResponse(response) return response