def test_set_value(self): """ Test type casting of strings to other types """ fd, path = mkstemp() config = Config(path=path) config.set("my_val", 1) assert config.getint("my_val") == 1
def test_type_casting_of_defaults(self): """ default values are casted as expected """ fd, path = mkstemp() config = Config(path=path) assert isinstance(config.getboolean("enable_auth"), bool) assert isinstance(config.getint("DATAPROC_EXECUTOR_INSTANCES"), int) assert isinstance(config.getfloat("DATAPROC_EXECUTOR_INSTANCES"), float)
def test_type_casting(self): """ Test type casting of strings to other types """ fd, path = mkstemp() os.environ["FEAST_INT_VAR"] = "1" os.environ["FEAST_FLOAT_VAR"] = "1.0" os.environ["FEAST_BOOLEAN_VAR"] = "True" config = Config(path=path) assert config.getint("INT_VAR") == 1 assert config.getfloat("FLOAT_VAR") == 1.0 assert config.getboolean("BOOLEAN_VAR") is True
class Client: """ Feast Client: Used for creating, managing, and retrieving features. """ def __init__(self, options: Optional[Dict[str, str]] = None, **kwargs): """ The Feast Client should be initialized with at least one service url Please see constants.py for configuration options. Commonly used options or arguments include: core_url: Feast Core URL. Used to manage features serving_url: Feast Serving URL. Used to retrieve features project: Sets the active project. This field is optional. core_secure: Use client-side SSL/TLS for Core gRPC API serving_secure: Use client-side SSL/TLS for Serving gRPC API enable_auth: Enable authentication and authorization auth_provider: Authentication provider – "google" or "oauth" if auth_provider is "oauth", the following fields are mandatory – oauth_grant_type, oauth_client_id, oauth_client_secret, oauth_audience, oauth_token_request_url Args: options: Configuration options to initialize client with **kwargs: Additional keyword arguments that will be used as configuration options along with "options" """ if options is None: options = dict() self._config = Config(options={**options, **kwargs}) self._core_service_stub: Optional[CoreServiceStub] = None self._serving_service_stub: Optional[ServingServiceStub] = None self._auth_metadata: Optional[grpc.AuthMetadataPlugin] = None # Configure Auth Metadata Plugin if auth is enabled if self._config.getboolean(CONFIG_ENABLE_AUTH_KEY): self._auth_metadata = feast_auth.get_auth_metadata_plugin(self._config) @property def _core_service(self): """ Creates or returns the gRPC Feast Core Service Stub Returns: CoreServiceStub """ if not self._core_service_stub: channel = create_grpc_channel( url=self._config.get(CONFIG_CORE_URL_KEY), enable_ssl=self._config.getboolean(CONFIG_CORE_ENABLE_SSL_KEY), enable_auth=self._config.getboolean(CONFIG_ENABLE_AUTH_KEY), ssl_server_cert_path=self._config.get(CONFIG_CORE_SERVER_SSL_CERT_KEY), auth_metadata_plugin=self._auth_metadata, timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), ) self._core_service_stub = CoreServiceStub(channel) return self._core_service_stub @property def _serving_service(self): """ Creates or returns the gRPC Feast Serving Service Stub Returns: ServingServiceStub """ if not self._serving_service_stub: channel = create_grpc_channel( url=self._config.get(CONFIG_SERVING_URL_KEY), enable_ssl=self._config.getboolean(CONFIG_SERVING_ENABLE_SSL_KEY), enable_auth=self._config.getboolean(CONFIG_ENABLE_AUTH_KEY), ssl_server_cert_path=self._config.get( CONFIG_SERVING_SERVER_SSL_CERT_KEY ), auth_metadata_plugin=self._auth_metadata, timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), ) self._serving_service_stub = ServingServiceStub(channel) return self._serving_service_stub @property def core_url(self) -> str: """ Retrieve Feast Core URL Returns: Feast Core URL string """ return self._config.get(CONFIG_CORE_URL_KEY) @core_url.setter def core_url(self, value: str): """ Set the Feast Core URL Args: value: Feast Core URL """ self._config.set(CONFIG_CORE_URL_KEY, value) @property def serving_url(self) -> str: """ Retrieve Serving Core URL Returns: Feast Serving URL string """ return self._config.get(CONFIG_SERVING_URL_KEY) @serving_url.setter def serving_url(self, value: str): """ Set the Feast Serving URL Args: value: Feast Serving URL """ self._config.set(CONFIG_SERVING_URL_KEY, value) @property def core_secure(self) -> bool: """ Retrieve Feast Core client-side SSL/TLS setting Returns: Whether client-side SSL/TLS is enabled """ return self._config.getboolean(CONFIG_CORE_ENABLE_SSL_KEY) @core_secure.setter def core_secure(self, value: bool): """ Set the Feast Core client-side SSL/TLS setting Args: value: True to enable client-side SSL/TLS """ self._config.set(CONFIG_CORE_ENABLE_SSL_KEY, value) @property def serving_secure(self) -> bool: """ Retrieve Feast Serving client-side SSL/TLS setting Returns: Whether client-side SSL/TLS is enabled """ return self._config.getboolean(CONFIG_SERVING_ENABLE_SSL_KEY) @serving_secure.setter def serving_secure(self, value: bool): """ Set the Feast Serving client-side SSL/TLS setting Args: value: True to enable client-side SSL/TLS """ self._config.set(CONFIG_SERVING_ENABLE_SSL_KEY, value) def version(self): """ Returns version information from Feast Core and Feast Serving """ import pkg_resources result = { "sdk": {"version": pkg_resources.get_distribution("feast").version}, "serving": "not configured", "core": "not configured", } if self.serving_url: serving_version = self._serving_service.GetFeastServingInfo( GetFeastServingInfoRequest(), timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), metadata=self._get_grpc_metadata(), ).version result["serving"] = {"url": self.serving_url, "version": serving_version} if self.core_url: core_version = self._core_service.GetFeastCoreVersion( GetFeastCoreVersionRequest(), timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), metadata=self._get_grpc_metadata(), ).version result["core"] = {"url": self.core_url, "version": core_version} return result @property def project(self) -> Union[str, None]: """ Retrieve currently active project Returns: Project name """ if not self._config.get(CONFIG_PROJECT_KEY): raise ValueError("No project has been configured.") return self._config.get(CONFIG_PROJECT_KEY) def set_project(self, project: Optional[str] = None): """ Set currently active Feast project Args: project: Project to set as active. If unset, will reset to the default project. """ if project is None: project = FEAST_DEFAULT_OPTIONS[CONFIG_PROJECT_KEY] self._config.set(CONFIG_PROJECT_KEY, project) def list_projects(self) -> List[str]: """ List all active Feast projects Returns: List of project names """ response = self._core_service.ListProjects( ListProjectsRequest(), timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), metadata=self._get_grpc_metadata(), ) # type: ListProjectsResponse return list(response.projects) def create_project(self, project: str): """ Creates a Feast project Args: project: Name of project """ self._core_service.CreateProject( CreateProjectRequest(name=project), timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), metadata=self._get_grpc_metadata(), ) # type: CreateProjectResponse def archive_project(self, project): """ Archives a project. Project will still continue to function for ingestion and retrieval, but will be in a read-only state. It will also not be visible from the Core API for management purposes. Args: project: Name of project to archive """ try: self._core_service_stub.ArchiveProject( ArchiveProjectRequest(name=project), timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), metadata=self._get_grpc_metadata(), ) # type: ArchiveProjectResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) # revert to the default project if self._project == project: self._project = FEAST_DEFAULT_OPTIONS[CONFIG_PROJECT_KEY] def apply_entity(self, entities: Union[List[Entity], Entity], project: str = None): """ Idempotently registers entities with Feast Core. Either a single entity or a list can be provided. Args: entities: List of entities that will be registered Examples: >>> from feast import Client >>> from feast.entity import Entity >>> from feast.value_type import ValueType >>> >>> feast_client = Client(core_url="localhost:6565") >>> entity = Entity( >>> name="driver_entity", >>> description="Driver entity for car rides", >>> value_type=ValueType.STRING, >>> labels={ >>> "key": "val" >>> } >>> ) >>> feast_client.apply_entity(entity) """ if project is None: project = self.project if not isinstance(entities, list): entities = [entities] for entity in entities: if isinstance(entity, Entity): self._apply_entity(project, entity) # type: ignore continue raise ValueError(f"Could not determine entity type to apply {entity}") def _apply_entity(self, project: str, entity: Entity): """ Registers a single entity with Feast Args: entity: Entity that will be registered """ entity.is_valid() entity_proto = entity.to_spec_proto() # Convert the entity to a request and send to Feast Core try: apply_entity_response = self._core_service.ApplyEntity( ApplyEntityRequest(project=project, spec=entity_proto), # type: ignore timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), metadata=self._get_grpc_metadata(), ) # type: ApplyEntityResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) # Extract the returned entity applied_entity = Entity.from_proto(apply_entity_response.entity) # Deep copy from the returned entity to the local entity entity._update_from_entity(applied_entity) def list_entities( self, project: str = None, labels: Dict[str, str] = dict() ) -> List[Entity]: """ Retrieve a list of entities from Feast Core Args: project: Filter entities based on project name labels: User-defined labels that these entities are associated with Returns: List of entities """ if project is None: project = self.project filter = ListEntitiesRequest.Filter(project=project, labels=labels) # Get latest entities from Feast Core entity_protos = self._core_service.ListEntities( ListEntitiesRequest(filter=filter), metadata=self._get_grpc_metadata(), ) # type: ListEntitiesResponse # Extract entities and return entities = [] for entity_proto in entity_protos.entities: entity = Entity.from_proto(entity_proto) entity._client = self entities.append(entity) return entities def get_entity(self, name: str, project: str = None) -> Entity: """ Retrieves an entity. Args: project: Feast project that this entity belongs to name: Name of entity Returns: Returns either the specified entity, or raises an exception if none is found """ if project is None: project = self.project try: get_entity_response = self._core_service.GetEntity( GetEntityRequest(project=project, name=name.strip()), metadata=self._get_grpc_metadata(), ) # type: GetEntityResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) entity = Entity.from_proto(get_entity_response.entity) return entity def apply_feature_table( self, feature_tables: Union[List[FeatureTable], FeatureTable], project: str = None, ): """ Idempotently registers feature tables with Feast Core. Either a single feature table or a list can be provided. Args: feature_tables: List of feature tables that will be registered """ if project is None: project = self.project if not isinstance(feature_tables, list): feature_tables = [feature_tables] for feature_table in feature_tables: if isinstance(feature_table, FeatureTable): self._apply_feature_table(project, feature_table) # type: ignore continue raise ValueError( f"Could not determine feature table type to apply {feature_table}" ) def _apply_feature_table(self, project: str, feature_table: FeatureTable): """ Registers a single feature table with Feast Args: feature_table: Feature table that will be registered """ feature_table.is_valid() feature_table_proto = feature_table.to_spec_proto() # Convert the feature table to a request and send to Feast Core try: apply_feature_table_response = self._core_service.ApplyFeatureTable( ApplyFeatureTableRequest(project=project, table_spec=feature_table_proto), # type: ignore timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), metadata=self._get_grpc_metadata(), ) # type: ApplyFeatureTableResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) # Extract the returned feature table applied_feature_table = FeatureTable.from_proto( apply_feature_table_response.table ) # Deep copy from the returned feature table to the local entity feature_table._update_from_feature_table(applied_feature_table) def list_feature_tables( self, project: str = None, labels: Dict[str, str] = dict() ) -> List[FeatureTable]: """ Retrieve a list of feature tables from Feast Core Args: project: Filter feature tables based on project name Returns: List of feature tables """ if project is None: project = self.project filter = ListFeatureTablesRequest.Filter(project=project, labels=labels) # Get latest feature tables from Feast Core feature_table_protos = self._core_service.ListFeatureTables( ListFeatureTablesRequest(filter=filter), metadata=self._get_grpc_metadata(), ) # type: ListFeatureTablesResponse # Extract feature tables and return feature_tables = [] for feature_table_proto in feature_table_protos.tables: feature_table = FeatureTable.from_proto(feature_table_proto) feature_table._client = self feature_tables.append(feature_table) return feature_tables def get_feature_table(self, name: str, project: str = None) -> FeatureTable: """ Retrieves a feature table. Args: project: Feast project that this feature table belongs to name: Name of feature table Returns: Returns either the specified feature table, or raises an exception if none is found """ if project is None: project = self.project try: get_feature_table_response = self._core_service.GetFeatureTable( GetFeatureTableRequest(project=project, name=name.strip()), metadata=self._get_grpc_metadata(), ) # type: GetFeatureTableResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) return FeatureTable.from_proto(get_feature_table_response.table) def ingest( self, feature_table: Union[str, FeatureTable], source: Union[pd.DataFrame, str], project: str = None, chunk_size: int = 10000, max_workers: int = max(CPU_COUNT - 1, 1), timeout: int = BATCH_INGESTION_PRODUCTION_TIMEOUT, ) -> None: """ Batch load feature data into a FeatureTable. Args: feature_table (typing.Union[str, feast.feature_table.FeatureTable]): FeatureTable object or the string name of the feature table source (typing.Union[pd.DataFrame, str]): Either a file path or Pandas Dataframe to ingest into Feast Files that are currently supported: * parquet * csv * json project: Feast project to locate FeatureTable chunk_size (int): Amount of rows to load and ingest at a time. max_workers (int): Number of worker processes to use to encode values. timeout (int): Timeout in seconds to wait for completion. Examples: >>> from feast import Client >>> >>> client = Client(core_url="localhost:6565") >>> ft_df = pd.DataFrame( >>> { >>> "datetime": [pd.datetime.now()], >>> "driver": [1001], >>> "rating": [4.3], >>> } >>> ) >>> client.set_project("project1") >>> >>> driver_ft = client.get_feature_table("driver") >>> client.ingest(driver_ft, ft_df) """ if project is None: project = self.project if isinstance(feature_table, FeatureTable): name = feature_table.name fetched_feature_table: Optional[FeatureTable] = self.get_feature_table( name, project ) if fetched_feature_table is not None: feature_table = fetched_feature_table else: raise Exception(f"FeatureTable, {name} cannot be found.") # Check 1) Only parquet file format for FeatureTable batch source is supported if ( feature_table.batch_source and issubclass(type(feature_table.batch_source), FileSource) and "".join( feature_table.batch_source.file_options.file_format.split() ).lower() != "parquet" ): raise Exception( f"No suitable batch source found for FeatureTable, {name}." f"Only BATCH_FILE source with parquet format is supported for batch ingestion." ) pyarrow_table, column_names = _read_table_from_source(source) # Check 2) Check if FeatureTable batch source field mappings can be found in provided source table _check_field_mappings( column_names, name, feature_table.batch_source.timestamp_column, feature_table.batch_source.field_mapping, ) dir_path = None with_partitions = False if ( issubclass(type(feature_table.batch_source), FileSource) and feature_table.batch_source.date_partition_column ): with_partitions = True dest_path = _write_partitioned_table_from_source( column_names, pyarrow_table, feature_table.batch_source.date_partition_column, feature_table.batch_source.timestamp_column, ) else: dir_path, dest_path = _write_non_partitioned_table_from_source( column_names, pyarrow_table, chunk_size, max_workers, ) try: if issubclass(type(feature_table.batch_source), FileSource): file_url = feature_table.batch_source.file_options.file_url[:-1] _upload_to_file_source(file_url, with_partitions, dest_path) if issubclass(type(feature_table.batch_source), BigQuerySource): bq_table_ref = feature_table.batch_source.bigquery_options.table_ref feature_table_timestamp_column = ( feature_table.batch_source.timestamp_column ) _upload_to_bq_source( bq_table_ref, feature_table_timestamp_column, dest_path ) finally: # Remove parquet file(s) that were created earlier print("Removing temporary file(s)...") if dir_path: shutil.rmtree(dir_path) print("Data has been successfully ingested into FeatureTable batch source.") def _get_grpc_metadata(self): """ Returns a metadata tuple to attach to gRPC requests. This is primarily used when authentication is enabled but SSL/TLS is disabled. Returns: Tuple of metadata to attach to each gRPC call """ if self._config.getboolean(CONFIG_ENABLE_AUTH_KEY) and self._auth_metadata: return self._auth_metadata.get_signed_meta() return ()
class Client: """ Feast Client: Used for creating, managing, and retrieving features. """ def __init__(self, options: Optional[Dict[str, str]] = None, **kwargs): """ The Feast Client should be initialized with at least one service url Args: core_url: Feast Core URL. Used to manage features serving_url: Feast Serving URL. Used to retrieve features project: Sets the active project. This field is optional. core_secure: Use client-side SSL/TLS for Core gRPC API serving_secure: Use client-side SSL/TLS for Serving gRPC API options: Configuration options to initialize client with **kwargs: Additional keyword arguments that will be used as configuration options along with "options" """ if options is None: options = dict() self._config = Config(options={**options, **kwargs}) self.__core_channel: grpc.Channel = None self.__serving_channel: grpc.Channel = None self._core_service_stub: CoreServiceStub = None self._serving_service_stub: ServingServiceStub = None @property def core_url(self) -> str: """ Retrieve Feast Core URL Returns: Feast Core URL string """ return self._config.get(CONFIG_CORE_URL_KEY) @core_url.setter def core_url(self, value: str): """ Set the Feast Core URL Args: value: Feast Core URL """ self._config.set(CONFIG_CORE_URL_KEY, value) @property def serving_url(self) -> str: """ Retrieve Serving Core URL Returns: Feast Serving URL string """ return self._config.get(CONFIG_SERVING_URL_KEY) @serving_url.setter def serving_url(self, value: str): """ Set the Feast Serving URL Args: value: Feast Serving URL """ self._config.set(CONFIG_SERVING_URL_KEY, value) @property def core_secure(self) -> bool: """ Retrieve Feast Core client-side SSL/TLS setting Returns: Whether client-side SSL/TLS is enabled """ return self._config.getboolean(CONFIG_CORE_SECURE_KEY) @core_secure.setter def core_secure(self, value: bool): """ Set the Feast Core client-side SSL/TLS setting Args: value: True to enable client-side SSL/TLS """ self._config.set(CONFIG_CORE_SECURE_KEY, value) @property def serving_secure(self) -> bool: """ Retrieve Feast Serving client-side SSL/TLS setting Returns: Whether client-side SSL/TLS is enabled """ return self._config.getboolean(CONFIG_SERVING_SECURE_KEY) @serving_secure.setter def serving_secure(self, value: bool): """ Set the Feast Serving client-side SSL/TLS setting Args: value: True to enable client-side SSL/TLS """ self._config.set(CONFIG_SERVING_SECURE_KEY, value) def version(self): """ Returns version information from Feast Core and Feast Serving """ result = {} if self.serving_url: self._connect_serving() serving_version = self._serving_service_stub.GetFeastServingInfo( GetFeastServingInfoRequest(), timeout=self._config.getint( CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), ).version result["serving"] = { "url": self.serving_url, "version": serving_version } if self.core_url: self._connect_core() core_version = self._core_service_stub.GetFeastCoreVersion( GetFeastCoreVersionRequest(), timeout=self._config.getint( CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), ).version result["core"] = {"url": self.core_url, "version": core_version} return result def _connect_core(self, skip_if_connected: bool = True): """ Connect to Core API Args: skip_if_connected: Do not attempt to connect if already connected """ if skip_if_connected and self._core_service_stub: return if not self.core_url: raise ValueError("Please set Feast Core URL.") if self.__core_channel is None: if self.core_secure or self.core_url.endswith(":443"): self.__core_channel = grpc.secure_channel( self.core_url, grpc.ssl_channel_credentials()) else: self.__core_channel = grpc.insecure_channel(self.core_url) try: grpc.channel_ready_future( self.__core_channel).result(timeout=self._config.getint( CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY)) except grpc.FutureTimeoutError: raise ConnectionError( f"Connection timed out while attempting to connect to Feast " f"Core gRPC server {self.core_url} ") else: self._core_service_stub = CoreServiceStub(self.__core_channel) def _connect_serving(self, skip_if_connected=True): """ Connect to Serving API Args: skip_if_connected: Do not attempt to connect if already connected """ if skip_if_connected and self._serving_service_stub: return if not self.serving_url: raise ValueError("Please set Feast Serving URL.") if self.__serving_channel is None: if self.serving_secure or self.serving_url.endswith(":443"): self.__serving_channel = grpc.secure_channel( self.serving_url, grpc.ssl_channel_credentials()) else: self.__serving_channel = grpc.insecure_channel( self.serving_url) try: grpc.channel_ready_future( self.__serving_channel).result(timeout=self._config.getint( CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY)) except grpc.FutureTimeoutError: raise ConnectionError( f"Connection timed out while attempting to connect to Feast " f"Serving gRPC server {self.serving_url} ") else: self._serving_service_stub = ServingServiceStub( self.__serving_channel) @property def project(self) -> Union[str, None]: """ Retrieve currently active project Returns: Project name """ return self._config.get(CONFIG_PROJECT_KEY) def set_project(self, project: str): """ Set currently active Feast project Args: project: Project to set as active """ self._config.set(CONFIG_PROJECT_KEY, project) def list_projects(self) -> List[str]: """ List all active Feast projects Returns: List of project names """ self._connect_core() response = self._core_service_stub.ListProjects( ListProjectsRequest(), timeout=self._config.getint( CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), ) # type: ListProjectsResponse return list(response.projects) def create_project(self, project: str): """ Creates a Feast project Args: project: Name of project """ self._connect_core() self._core_service_stub.CreateProject( CreateProjectRequest(name=project), timeout=self._config.getint( CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), ) # type: CreateProjectResponse def archive_project(self, project): """ Archives a project. Project will still continue to function for ingestion and retrieval, but will be in a read-only state. It will also not be visible from the Core API for management purposes. Args: project: Name of project to archive """ self._connect_core() self._core_service_stub.ArchiveProject( ArchiveProjectRequest(name=project), timeout=self._config.getint( CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), ) # type: ArchiveProjectResponse if self._project == project: self._project = "" def apply(self, feature_sets: Union[List[FeatureSet], FeatureSet]): """ Idempotently registers feature set(s) with Feast Core. Either a single feature set or a list can be provided. Args: feature_sets: List of feature sets that will be registered """ if not isinstance(feature_sets, list): feature_sets = [feature_sets] for feature_set in feature_sets: if isinstance(feature_set, FeatureSet): self._apply_feature_set(feature_set) continue raise ValueError( f"Could not determine feature set type to apply {feature_set}") def _apply_feature_set(self, feature_set: FeatureSet): """ Registers a single feature set with Feast Args: feature_set: Feature set that will be registered """ self._connect_core() feature_set.is_valid() feature_set_proto = feature_set.to_proto() if len(feature_set_proto.spec.project) == 0: if self.project is None: raise ValueError( f"No project found in feature set {feature_set.name}. " f"Please set the project within the feature set or within " f"your Feast Client.") else: feature_set_proto.spec.project = self.project # Convert the feature set to a request and send to Feast Core try: apply_fs_response = self._core_service_stub.ApplyFeatureSet( ApplyFeatureSetRequest(feature_set=feature_set_proto), timeout=self._config.getint( CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), ) # type: ApplyFeatureSetResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) # Extract the returned feature set applied_fs = FeatureSet.from_proto(apply_fs_response.feature_set) # If the feature set has changed, update the local copy if apply_fs_response.status == ApplyFeatureSetResponse.Status.CREATED: print( f'Feature set updated/created: "{applied_fs.name}:{applied_fs.version}"' ) # If no change has been applied, do nothing if apply_fs_response.status == ApplyFeatureSetResponse.Status.NO_CHANGE: print(f"No change detected or applied: {feature_set.name}") # Deep copy from the returned feature set to the local feature set feature_set._update_from_feature_set(applied_fs) def list_feature_sets(self, project: str = None, name: str = None, version: str = None) -> List[FeatureSet]: """ Retrieve a list of feature sets from Feast Core Args: project: Filter feature sets based on project name name: Filter feature sets based on feature set name version: Filter feature sets based on version numbf, Returns: List of feature sets """ self._connect_core() if project is None: if self.project is not None: project = self.project else: project = "*" if name is None: name = "*" if version is None: version = "*" filter = ListFeatureSetsRequest.Filter(project=project, feature_set_name=name, feature_set_version=version) # Get latest feature sets from Feast Core feature_set_protos = self._core_service_stub.ListFeatureSets( ListFeatureSetsRequest( filter=filter)) # type: ListFeatureSetsResponse # Extract feature sets and return feature_sets = [] for feature_set_proto in feature_set_protos.feature_sets: feature_set = FeatureSet.from_proto(feature_set_proto) feature_set._client = self feature_sets.append(feature_set) return feature_sets def get_feature_set(self, name: str, version: int = None, project: str = None) -> Union[FeatureSet, None]: """ Retrieves a feature set. If no version is specified then the latest version will be returned. Args: project: Feast project that this feature set belongs to name: Name of feature set version: Version of feature set Returns: Returns either the specified feature set, or raises an exception if none is found """ self._connect_core() if project is None: if self.project is not None: project = self.project else: raise ValueError("No project has been configured.") if version is None: version = 0 try: get_feature_set_response = self._core_service_stub.GetFeatureSet( GetFeatureSetRequest( project=project, name=name.strip(), version=int(version))) # type: GetFeatureSetResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) return FeatureSet.from_proto(get_feature_set_response.feature_set) def list_entities(self) -> Dict[str, Entity]: """ Returns a dictionary of entities across all feature sets Returns: Dictionary of entities, indexed by name """ entities_dict = OrderedDict() for fs in self.list_feature_sets(): for entity in fs.entities: entities_dict[entity.name] = entity return entities_dict def get_batch_features( self, feature_refs: List[str], entity_rows: Union[pd.DataFrame, str], default_project: str = None, ) -> RetrievalJob: """ Retrieves historical features from a Feast Serving deployment. Args: feature_refs (List[str]): List of feature references that will be returned for each entity. Each feature reference should have the following format "project/feature:version". entity_rows (Union[pd.DataFrame, str]): Pandas dataframe containing entities and a 'datetime' column. Each entity in a feature set must be present as a column in this dataframe. The datetime column must contain timestamps in datetime64 format. default_project: Default project where feature values will be found. Returns: feast.job.RetrievalJob: Returns a retrival job object that can be used to monitor retrieval progress asynchronously, and can be used to materialize the results. Examples: >>> from feast import Client >>> from datetime import datetime >>> >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566") >>> feature_refs = ["my_project/bookings_7d:1", "booking_14d"] >>> entity_rows = pd.DataFrame( >>> { >>> "datetime": [pd.datetime.now() for _ in range(3)], >>> "customer": [1001, 1002, 1003], >>> } >>> ) >>> feature_retrieval_job = feast_client.get_batch_features( >>> feature_refs, entity_rows, default_project="my_project") >>> df = feature_retrieval_job.to_dataframe() >>> print(df) """ self._connect_serving() feature_references = _build_feature_references( feature_refs=feature_refs, default_project=default_project) # Retrieve serving information to determine store type and # staging location serving_info = self._serving_service_stub.GetFeastServingInfo( GetFeastServingInfoRequest(), timeout=self._config.getint( CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), ) # type: GetFeastServingInfoResponse if serving_info.type != FeastServingType.FEAST_SERVING_TYPE_BATCH: raise Exception( f'You are connected to a store "{self._serving_url}" which ' f"does not support batch retrieval ") if isinstance(entity_rows, pd.DataFrame): # Pandas DataFrame detected # Remove timezone from datetime column if isinstance(entity_rows["datetime"].dtype, pd.core.dtypes.dtypes.DatetimeTZDtype): entity_rows["datetime"] = pd.DatetimeIndex( entity_rows["datetime"]).tz_localize(None) elif isinstance(entity_rows, str): # String based source if not entity_rows.endswith((".avro", "*")): raise Exception( f"Only .avro and wildcard paths are accepted as entity_rows" ) else: raise Exception(f"Only pandas.DataFrame and str types are allowed" f" as entity_rows, but got {type(entity_rows)}.") # Export and upload entity row DataFrame to staging location # provided by Feast staged_files = export_source_to_staging_location( entity_rows, serving_info.job_staging_location) # type: List[str] request = GetBatchFeaturesRequest( features=feature_references, dataset_source=DatasetSource(file_source=DatasetSource.FileSource( file_uris=staged_files, data_format=DataFormat.DATA_FORMAT_AVRO)), ) # Retrieve Feast Job object to manage life cycle of retrieval response = self._serving_service_stub.GetBatchFeatures(request) return RetrievalJob(response.job, self._serving_service_stub) def get_online_features( self, feature_refs: List[str], entity_rows: List[GetOnlineFeaturesRequest.EntityRow], default_project: Optional[str] = None, ) -> GetOnlineFeaturesResponse: """ Retrieves the latest online feature data from Feast Serving Args: feature_refs: List of feature references in the following format [project]/[feature_name]:[version]. Only the feature name is a required component in the reference. example: ["my_project/my_feature_1:3", "my_project3/my_feature_4:1",] entity_rows: List of GetFeaturesRequest.EntityRow where each row contains entities. Timestamp should not be set for online retrieval. All entity types within a feature default_project: This project will be used if the project name is not provided in the feature reference Returns: Returns a list of maps where each item in the list contains the latest feature values for the provided entities """ self._connect_serving() return self._serving_service_stub.GetOnlineFeatures( GetOnlineFeaturesRequest( features=_build_feature_references( feature_refs=feature_refs, default_project=(default_project if not self.project else self.project), ), entity_rows=entity_rows, )) def list_ingest_jobs( self, job_id: str = None, feature_set_ref: FeatureSetRef = None, store_name: str = None, ): """ List the ingestion jobs currently registered in Feast, with optional filters. Provides detailed metadata about each ingestion job. Args: job_id: Select specific ingestion job with the given job_id feature_set_ref: Filter ingestion jobs by target feature set (via reference) store_name: Filter ingestion jobs by target feast store's name Returns: List of IngestJobs matching the given filters """ self._connect_core() # construct list request feature_set_ref = None list_filter = ListIngestionJobsRequest.Filter( id=job_id, feature_set_reference=feature_set_ref, store_name=store_name, ) request = ListIngestionJobsRequest(filter=list_filter) # make list request & unpack response response = self._core_service_stub.ListIngestionJobs(request) ingest_jobs = [ IngestJob(proto, self._core_service_stub) for proto in response.jobs ] return ingest_jobs def restart_ingest_job(self, job: IngestJob): """ Restart ingestion job currently registered in Feast. NOTE: Data might be lost during the restart for some job runners. Does not support stopping a job in a transitional (ie pending, suspending, aborting), terminal state (ie suspended or aborted) or unknown status Args: job: IngestJob to restart """ self._connect_core() request = RestartIngestionJobRequest(id=job.id) try: self._core_service_stub.RestartIngestionJob(request) except grpc.RpcError as e: raise grpc.RpcError(e.details()) def stop_ingest_job(self, job: IngestJob): """ Stop ingestion job currently resgistered in Feast Does nothing if the target job if already in a terminal state (ie suspended or aborted). Does not support stopping a job in a transitional (ie pending, suspending, aborting) or in a unknown status Args: job: IngestJob to restart """ self._connect_core() request = StopIngestionJobRequest(id=job.id) try: self._core_service_stub.StopIngestionJob(request) except grpc.RpcError as e: raise grpc.RpcError(e.details()) def ingest( self, feature_set: Union[str, FeatureSet], source: Union[pd.DataFrame, str], chunk_size: int = 10000, version: int = None, force_update: bool = False, max_workers: int = max(CPU_COUNT - 1, 1), disable_progress_bar: bool = False, timeout: int = KAFKA_CHUNK_PRODUCTION_TIMEOUT, ) -> None: """ Loads feature data into Feast for a specific feature set. Args: feature_set (typing.Union[str, feast.feature_set.FeatureSet]): Feature set object or the string name of the feature set (without a version). source (typing.Union[pd.DataFrame, str]): Either a file path or Pandas Dataframe to ingest into Feast Files that are currently supported: * parquet * csv * json chunk_size (int): Amount of rows to load and ingest at a time. version (int): Feature set version. force_update (bool): Automatically update feature set based on source data prior to ingesting. This will also register changes to Feast. max_workers (int): Number of worker processes to use to encode values. disable_progress_bar (bool): Disable printing of progress statistics. timeout (int): Timeout in seconds to wait for completion. Returns: None: None """ if isinstance(feature_set, FeatureSet): name = feature_set.name if version is None: version = feature_set.version elif isinstance(feature_set, str): name = feature_set else: raise Exception(f"Feature set name must be provided") # Read table and get row count dir_path, dest_path = _read_table_from_source(source, chunk_size, max_workers) pq_file = pq.ParquetFile(dest_path) row_count = pq_file.metadata.num_rows # Update the feature set based on PyArrow table of first row group if force_update: feature_set.infer_fields_from_pa( table=pq_file.read_row_group(0), discard_unused_fields=True, replace_existing_features=True, ) self.apply(feature_set) current_time = time.time() print("Waiting for feature set to be ready for ingestion...") while True: if timeout is not None and time.time() - current_time >= timeout: raise TimeoutError( "Timed out waiting for feature set to be ready") feature_set = self.get_feature_set(name, version) if (feature_set is not None and feature_set.status == FeatureSetStatus.STATUS_READY): break time.sleep(3) if timeout is not None: timeout = timeout - int(time.time() - current_time) try: # Kafka configs brokers = feature_set.get_kafka_source_brokers() topic = feature_set.get_kafka_source_topic() producer = get_producer(brokers, row_count, disable_progress_bar) # Loop optimization declarations produce = producer.produce flush = producer.flush # Transform and push data to Kafka if feature_set.source.source_type == "Kafka": for chunk in get_feature_row_chunks( file=dest_path, row_groups=list(range(pq_file.num_row_groups)), fs=feature_set, max_workers=max_workers, ): # Push FeatureRow one chunk at a time to kafka for serialized_row in chunk: produce(topic=topic, value=serialized_row) # Force a flush after each chunk flush(timeout=timeout) # Remove chunk from memory del chunk else: raise Exception( f"Could not determine source type for feature set " f'"{feature_set.name}" with source type ' f'"{feature_set.source.source_type}"') # Print ingestion statistics producer.print_results() finally: # Remove parquet file(s) that were created earlier print("Removing temporary file(s)...") shutil.rmtree(dir_path) return None
class Client: """ Feast Client: Used for creating, managing, and retrieving features. """ def __init__(self, options: Optional[Dict[str, str]] = None, **kwargs): """ The Feast Client should be initialized with at least one service url Please see constants.py for configuration options. Commonly used options or arguments include: core_url: Feast Core URL. Used to manage features serving_url: Feast Serving URL. Used to retrieve features project: Sets the active project. This field is optional. core_secure: Use client-side SSL/TLS for Core gRPC API serving_secure: Use client-side SSL/TLS for Serving gRPC API enable_auth: Enable authentication and authorization auth_provider: Authentication provider – "google" or "oauth" if auth_provider is "oauth", the following fields are mandatory – oauth_grant_type, oauth_client_id, oauth_client_secret, oauth_audience, oauth_token_request_url Args: options: Configuration options to initialize client with **kwargs: Additional keyword arguments that will be used as configuration options along with "options" """ if options is None: options = dict() self._config = Config(options={**options, **kwargs}) self._core_service_stub: Optional[CoreServiceStub] = None self._serving_service_stub: Optional[ServingServiceStub] = None self._job_service_stub: Optional[JobServiceStub] = None self._auth_metadata: Optional[grpc.AuthMetadataPlugin] = None # Configure Auth Metadata Plugin if auth is enabled if self._config.getboolean(opt.ENABLE_AUTH): self._auth_metadata = feast_auth.get_auth_metadata_plugin( self._config) @property def _core_service(self): """ Creates or returns the gRPC Feast Core Service Stub Returns: CoreServiceStub """ if not self._core_service_stub: channel = create_grpc_channel( url=self._config.get(opt.CORE_URL), enable_ssl=self._config.getboolean(opt.CORE_ENABLE_SSL), enable_auth=self._config.getboolean(opt.ENABLE_AUTH), ssl_server_cert_path=self._config.get( opt.CORE_SERVER_SSL_CERT), auth_metadata_plugin=self._auth_metadata, timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), ) self._core_service_stub = CoreServiceStub(channel) return self._core_service_stub @property def _serving_service(self): """ Creates or returns the gRPC Feast Serving Service Stub. If both `opentracing` and `grpcio-opentracing` are installed, an opentracing interceptor will be instantiated based on the global tracer. Returns: ServingServiceStub """ if not self._serving_service_stub: channel = create_grpc_channel( url=self._config.get(opt.SERVING_URL), enable_ssl=self._config.getboolean(opt.SERVING_ENABLE_SSL), enable_auth=self._config.getboolean(opt.ENABLE_AUTH), ssl_server_cert_path=self._config.get( opt.SERVING_SERVER_SSL_CERT), auth_metadata_plugin=self._auth_metadata, timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), ) try: import opentracing from grpc_opentracing import open_tracing_client_interceptor from grpc_opentracing.grpcext import intercept_channel interceptor = open_tracing_client_interceptor( opentracing.global_tracer()) channel = intercept_channel(channel, interceptor) except ImportError: pass self._serving_service_stub = ServingServiceStub(channel) return self._serving_service_stub @property def _use_job_service(self) -> bool: return self._config.exists(opt.JOB_SERVICE_URL) @property def _job_service(self): """ Creates or returns the gRPC Feast Job Service Stub Returns: JobServiceStub """ # Don't try to initialize job service stub if the job service is disabled if not self._use_job_service: return None if not self._job_service_stub: channel = create_grpc_channel( url=self._config.get(opt.JOB_SERVICE_URL), enable_ssl=self._config.getboolean(opt.JOB_SERVICE_ENABLE_SSL), enable_auth=self._config.getboolean(opt.ENABLE_AUTH), ssl_server_cert_path=self._config.get( opt.JOB_SERVICE_SERVER_SSL_CERT), auth_metadata_plugin=self._auth_metadata, timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), ) self._job_service_service_stub = JobServiceStub(channel) return self._job_service_service_stub def _extra_grpc_params(self) -> Dict[str, Any]: return dict( timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ) @property def core_url(self) -> str: """ Retrieve Feast Core URL Returns: Feast Core URL string """ return self._config.get(opt.CORE_URL) @core_url.setter def core_url(self, value: str): """ Set the Feast Core URL Args: value: Feast Core URL """ self._config.set(opt.CORE_URL, value) @property def serving_url(self) -> str: """ Retrieve Feast Serving URL Returns: Feast Serving URL string """ return self._config.get(opt.SERVING_URL) @serving_url.setter def serving_url(self, value: str): """ Set the Feast Serving URL Args: value: Feast Serving URL """ self._config.set(opt.SERVING_URL, value) @property def job_service_url(self) -> str: """ Retrieve Feast Job Service URL Returns: Feast Job Service URL string """ return self._config.get(opt.JOB_SERVICE_URL) @job_service_url.setter def job_service_url(self, value: str): """ Set the Feast Job Service URL Args: value: Feast Job Service URL """ self._config.set(opt.JOB_SERVICE_URL, value) @property def core_secure(self) -> bool: """ Retrieve Feast Core client-side SSL/TLS setting Returns: Whether client-side SSL/TLS is enabled """ return self._config.getboolean(opt.CORE_ENABLE_SSL) @core_secure.setter def core_secure(self, value: bool): """ Set the Feast Core client-side SSL/TLS setting Args: value: True to enable client-side SSL/TLS """ self._config.set(opt.CORE_ENABLE_SSL, value) @property def serving_secure(self) -> bool: """ Retrieve Feast Serving client-side SSL/TLS setting Returns: Whether client-side SSL/TLS is enabled """ return self._config.getboolean(opt.SERVING_ENABLE_SSL) @serving_secure.setter def serving_secure(self, value: bool): """ Set the Feast Serving client-side SSL/TLS setting Args: value: True to enable client-side SSL/TLS """ self._config.set(opt.SERVING_ENABLE_SSL, value) @property def job_service_secure(self) -> bool: """ Retrieve Feast Job Service client-side SSL/TLS setting Returns: Whether client-side SSL/TLS is enabled """ return self._config.getboolean(opt.JOB_SERVICE_ENABLE_SSL) @job_service_secure.setter def job_service_secure(self, value: bool): """ Set the Feast Job Service client-side SSL/TLS setting Args: value: True to enable client-side SSL/TLS """ self._config.set(opt.JOB_SERVICE_ENABLE_SSL, value) def version(self): """ Returns version information from Feast Core and Feast Serving """ import pkg_resources result = { "sdk": { "version": pkg_resources.get_distribution("feast").version }, "serving": "not configured", "core": "not configured", } if self.serving_url: serving_version = self._serving_service.GetFeastServingInfo( GetFeastServingInfoRequest(), timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ).version result["serving"] = { "url": self.serving_url, "version": serving_version } if self.core_url: core_version = self._core_service.GetFeastCoreVersion( GetFeastCoreVersionRequest(), timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ).version result["core"] = {"url": self.core_url, "version": core_version} return result @property def project(self) -> str: """ Retrieve currently active project Returns: Project name """ if not self._config.get(opt.PROJECT): raise ValueError("No project has been configured.") return self._config.get(opt.PROJECT) def set_project(self, project: Optional[str] = None): """ Set currently active Feast project Args: project: Project to set as active. If unset, will reset to the default project. """ if project is None: project = opt().PROJECT self._config.set(opt.PROJECT, project) def list_projects(self) -> List[str]: """ List all active Feast projects Returns: List of project names """ response = self._core_service.ListProjects( ListProjectsRequest(), timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ) # type: ListProjectsResponse return list(response.projects) def create_project(self, project: str): """ Creates a Feast project Args: project: Name of project """ self._core_service.CreateProject( CreateProjectRequest(name=project), timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ) # type: CreateProjectResponse def archive_project(self, project): """ Archives a project. Project will still continue to function for ingestion and retrieval, but will be in a read-only state. It will also not be visible from the Core API for management purposes. Args: project: Name of project to archive """ try: self._core_service_stub.ArchiveProject( ArchiveProjectRequest(name=project), timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ) # type: ArchiveProjectResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) # revert to the default project if self._project == project: self._project = opt().PROJECT def apply( self, objects: Union[List[Union[Entity, FeatureTable]], Entity, FeatureTable], project: str = None, ): """ Idempotently registers entities and feature tables with Feast Core. Either a single entity or feature table or a list can be provided. Args: objects: List of entities and/or feature tables that will be registered Examples: >>> from feast import Client >>> from feast.entity import Entity >>> from feast.value_type import ValueType >>> >>> feast_client = Client(core_url="localhost:6565") >>> entity = Entity( >>> name="driver_entity", >>> description="Driver entity for car rides", >>> value_type=ValueType.STRING, >>> labels={ >>> "key": "val" >>> } >>> ) >>> feast_client.apply(entity) """ if project is None: project = self.project if not isinstance(objects, list): objects = [objects] for obj in objects: if isinstance(obj, Entity): self._apply_entity(project, obj) # type: ignore elif isinstance(obj, FeatureTable): self._apply_feature_table(project, obj) # type: ignore else: raise ValueError( f"Could not determine object type to apply {obj} with type {type(obj)}. Type must be Entity or FeatureTable." ) def apply_entity(self, entities: Union[List[Entity], Entity], project: str = None): """ Deprecated. Please see apply(). """ warnings.warn( "The method apply_entity() is being deprecated. Please use apply() instead. Feast 0.10 and onwards will not support apply_entity().", DeprecationWarning, ) if project is None: project = self.project if not isinstance(entities, list): entities = [entities] for entity in entities: if isinstance(entity, Entity): self._apply_entity(project, entity) # type: ignore continue raise ValueError( f"Could not determine entity type to apply {entity}") def _apply_entity(self, project: str, entity: Entity): """ Registers a single entity with Feast Args: entity: Entity that will be registered """ entity.is_valid() entity_proto = entity.to_spec_proto() # Convert the entity to a request and send to Feast Core try: apply_entity_response = self._core_service.ApplyEntity( ApplyEntityRequest(project=project, spec=entity_proto), # type: ignore timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ) # type: ApplyEntityResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) # Extract the returned entity applied_entity = Entity.from_proto(apply_entity_response.entity) # Deep copy from the returned entity to the local entity entity._update_from_entity(applied_entity) def list_entities(self, project: str = None, labels: Dict[str, str] = dict()) -> List[Entity]: """ Retrieve a list of entities from Feast Core Args: project: Filter entities based on project name labels: User-defined labels that these entities are associated with Returns: List of entities """ if project is None: project = self.project filter = ListEntitiesRequest.Filter(project=project, labels=labels) # Get latest entities from Feast Core entity_protos = self._core_service.ListEntities( ListEntitiesRequest(filter=filter), metadata=self._get_grpc_metadata(), ) # type: ListEntitiesResponse # Extract entities and return entities = [] for entity_proto in entity_protos.entities: entity = Entity.from_proto(entity_proto) entity._client = self entities.append(entity) return entities def get_entity(self, name: str, project: str = None) -> Entity: """ Retrieves an entity. Args: project: Feast project that this entity belongs to name: Name of entity Returns: Returns either the specified entity, or raises an exception if none is found """ if project is None: project = self.project try: get_entity_response = self._core_service.GetEntity( GetEntityRequest(project=project, name=name.strip()), metadata=self._get_grpc_metadata(), ) # type: GetEntityResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) entity = Entity.from_proto(get_entity_response.entity) return entity def apply_feature_table( self, feature_tables: Union[List[FeatureTable], FeatureTable], project: str = None, ): """ Deprecated. Please see apply(). """ warnings.warn( "The method apply_feature_table() is being deprecated. Please use apply() instead. Feast 0.10 and onwards will not support apply_feature_table().", DeprecationWarning, ) if project is None: project = self.project if not isinstance(feature_tables, list): feature_tables = [feature_tables] for feature_table in feature_tables: if isinstance(feature_table, FeatureTable): self._apply_feature_table(project, feature_table) # type: ignore continue raise ValueError( f"Could not determine feature table type to apply {feature_table}" ) def _apply_feature_table(self, project: str, feature_table: FeatureTable): """ Registers a single feature table with Feast Args: feature_table: Feature table that will be registered """ feature_table.is_valid() feature_table_proto = feature_table.to_spec_proto() # Convert the feature table to a request and send to Feast Core try: apply_feature_table_response = self._core_service.ApplyFeatureTable( ApplyFeatureTableRequest( project=project, table_spec=feature_table_proto), # type: ignore timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ) # type: ApplyFeatureTableResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) # Extract the returned feature table applied_feature_table = FeatureTable.from_proto( apply_feature_table_response.table) # Deep copy from the returned feature table to the local entity feature_table._update_from_feature_table(applied_feature_table) def list_feature_tables( self, project: str = None, labels: Dict[str, str] = dict() ) -> List[FeatureTable]: """ Retrieve a list of feature tables from Feast Core Args: project: Filter feature tables based on project name Returns: List of feature tables """ if project is None: project = self.project filter = ListFeatureTablesRequest.Filter(project=project, labels=labels) # Get latest feature tables from Feast Core feature_table_protos = self._core_service.ListFeatureTables( ListFeatureTablesRequest(filter=filter), metadata=self._get_grpc_metadata(), ) # type: ListFeatureTablesResponse # Extract feature tables and return feature_tables = [] for feature_table_proto in feature_table_protos.tables: feature_table = FeatureTable.from_proto(feature_table_proto) feature_table._client = self feature_tables.append(feature_table) return feature_tables def get_feature_table(self, name: str, project: str = None) -> FeatureTable: """ Retrieves a feature table. Args: project: Feast project that this feature table belongs to name: Name of feature table Returns: Returns either the specified feature table, or raises an exception if none is found """ if project is None: project = self.project try: get_feature_table_response = self._core_service.GetFeatureTable( GetFeatureTableRequest(project=project, name=name.strip()), metadata=self._get_grpc_metadata(), ) # type: GetFeatureTableResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) return FeatureTable.from_proto(get_feature_table_response.table) def delete_feature_table(self, name: str, project: str = None) -> None: """ Deletes a feature table. Args: project: Feast project that this feature table belongs to name: Name of feature table """ if project is None: project = self.project try: self._core_service.DeleteFeatureTable( DeleteFeatureTableRequest(project=project, name=name.strip()), metadata=self._get_grpc_metadata(), ) except grpc.RpcError as e: raise grpc.RpcError(e.details()) def list_features_by_ref( self, project: str = None, entities: List[str] = list(), labels: Dict[str, str] = dict(), ) -> Dict[FeatureRef, Feature]: """ Retrieve a dictionary of feature reference to feature from Feast Core based on filters provided. Args: project: Feast project that these features belongs to entities: Feast entity that these features are associated with labels: Feast labels that these features are associated with Returns: Dictionary of <feature references: features> Examples: >>> from feast import Client >>> >>> feast_client = Client(core_url="localhost:6565") >>> features = feast_client.list_features(project="test_project", entities=["driver_id"], labels={"key1":"val1","key2":"val2"}) >>> print(features) """ if project is None: project = self.project filter = ListFeaturesRequest.Filter(project=project, entities=entities, labels=labels) feature_protos = self._core_service.ListFeatures( ListFeaturesRequest(filter=filter), metadata=self._get_grpc_metadata(), ) # type: ListFeaturesResponse # Extract features and return features_dict = {} for ref_str, feature_proto in feature_protos.features.items(): feature_ref = FeatureRef.from_str(ref_str) feature = Feature.from_proto(feature_proto) features_dict[feature_ref] = feature return features_dict def ingest( self, feature_table: Union[str, FeatureTable], source: Union[pd.DataFrame, str], project: str = None, chunk_size: int = 10000, max_workers: int = max(CPU_COUNT - 1, 1), timeout: int = int(opt().BATCH_INGESTION_PRODUCTION_TIMEOUT), ) -> None: """ Batch load feature data into a FeatureTable. Args: feature_table (typing.Union[str, feast.feature_table.FeatureTable]): FeatureTable object or the string name of the feature table source (typing.Union[pd.DataFrame, str]): Either a file path or Pandas Dataframe to ingest into Feast Files that are currently supported: * parquet * csv * json project: Feast project to locate FeatureTable chunk_size (int): Amount of rows to load and ingest at a time. max_workers (int): Number of worker processes to use to encode values. timeout (int): Timeout in seconds to wait for completion. Examples: >>> from feast import Client >>> >>> client = Client(core_url="localhost:6565") >>> ft_df = pd.DataFrame( >>> { >>> "datetime": [pd.datetime.now()], >>> "driver": [1001], >>> "rating": [4.3], >>> } >>> ) >>> client.set_project("project1") >>> >>> driver_ft = client.get_feature_table("driver") >>> client.ingest(driver_ft, ft_df) """ if project is None: project = self.project if isinstance(feature_table, str): name = feature_table if isinstance(feature_table, FeatureTable): name = feature_table.name fetched_feature_table: Optional[FeatureTable] = self.get_feature_table( name, project) if fetched_feature_table is not None: feature_table = fetched_feature_table else: raise Exception(f"FeatureTable, {name} cannot be found.") # Check 1) Only parquet file format for FeatureTable batch source is supported if (feature_table.batch_source and issubclass(type(feature_table.batch_source), FileSource) and isinstance( type(feature_table.batch_source.file_options.file_format), ParquetFormat)): raise Exception( f"No suitable batch source found for FeatureTable, {name}." f"Only BATCH_FILE source with parquet format is supported for batch ingestion." ) pyarrow_table, column_names = _read_table_from_source(source) # Check 2) Check if FeatureTable batch source field mappings can be found in provided source table _check_field_mappings( column_names, name, feature_table.batch_source.event_timestamp_column, feature_table.batch_source.field_mapping, ) dir_path = None with_partitions = False if (issubclass(type(feature_table.batch_source), FileSource) and feature_table.batch_source.date_partition_column): with_partitions = True dest_path = _write_partitioned_table_from_source( column_names, pyarrow_table, feature_table.batch_source.date_partition_column, feature_table.batch_source.event_timestamp_column, ) else: dir_path, dest_path = _write_non_partitioned_table_from_source( column_names, pyarrow_table, chunk_size, max_workers, ) try: if issubclass(type(feature_table.batch_source), FileSource): file_url = feature_table.batch_source.file_options.file_url.rstrip( "*") _upload_to_file_source(file_url, with_partitions, dest_path, self._config) if issubclass(type(feature_table.batch_source), BigQuerySource): bq_table_ref = feature_table.batch_source.bigquery_options.table_ref feature_table_timestamp_column = ( feature_table.batch_source.event_timestamp_column) _upload_to_bq_source(bq_table_ref, feature_table_timestamp_column, dest_path) finally: # Remove parquet file(s) that were created earlier print("Removing temporary file(s)...") if dir_path: shutil.rmtree(dir_path) print( "Data has been successfully ingested into FeatureTable batch source." ) def _get_grpc_metadata(self): """ Returns a metadata tuple to attach to gRPC requests. This is primarily used when authentication is enabled but SSL/TLS is disabled. Returns: Tuple of metadata to attach to each gRPC call """ if self._config.getboolean(opt.ENABLE_AUTH) and self._auth_metadata: return self._auth_metadata.get_signed_meta() return () def get_online_features( self, feature_refs: List[str], entity_rows: List[Dict[str, Any]], project: Optional[str] = None, ) -> OnlineResponse: """ Retrieves the latest online feature data from Feast Serving. Args: feature_refs: List of feature references that will be returned for each entity. Each feature reference should have the following format: "feature_table:feature" where "feature_table" & "feature" refer to the feature and feature table names respectively. Only the feature name is required. entity_rows: A list of dictionaries where each key-value is an entity-name, entity-value pair. project: Optionally specify the the project override. If specified, uses given project for retrieval. Overrides the projects specified in Feature References if also are specified. Returns: GetOnlineFeaturesResponse containing the feature data in records. Each EntityRow provided will yield one record, which contains data fields with data value and field status metadata (if included). Examples: >>> from feast import Client >>> >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566") >>> feature_refs = ["sales:daily_transactions"] >>> entity_rows = [{"customer_id": 0},{"customer_id": 1}] >>> >>> online_response = feast_client.get_online_features( >>> feature_refs, entity_rows, project="my_project") >>> online_response_dict = online_response.to_dict() >>> print(online_response_dict) {'sales:daily_transactions': [1.1,1.2], 'sales:customer_id': [0,1]} """ try: response = self._serving_service.GetOnlineFeaturesV2( GetOnlineFeaturesRequestV2( features=_build_feature_references( feature_ref_strs=feature_refs), entity_rows=_infer_online_entity_rows(entity_rows), project=project if project is not None else self.project, ), timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ) except grpc.RpcError as e: raise grpc.RpcError(e.details()) response = OnlineResponse(response) return response def get_historical_features( self, feature_refs: List[str], entity_source: Union[pd.DataFrame, FileSource, BigQuerySource], output_location: Optional[str] = None, ) -> RetrievalJob: """ Launch a historical feature retrieval job. Args: feature_refs: List of feature references that will be returned for each entity. Each feature reference should have the following format: "feature_table:feature" where "feature_table" & "feature" refer to the feature and feature table names respectively. entity_source (Union[pd.DataFrame, FileSource, BigQuerySource]): Source for the entity rows. If entity_source is a Panda DataFrame, the dataframe will be staged to become accessible by spark workers. If one of feature tables' source is in BigQuery - entities will be upload to BQ. Otherwise to remote file storage (derived from configured staging location). It is also assumed that the column event_timestamp is present in the dataframe, and is of type datetime without timezone information. The user needs to make sure that the source (or staging location, if entity_source is a Panda DataFrame) is accessible from the Spark cluster that will be used for the retrieval job. destination_path: Specifies the path in a bucket to write the exported feature data files Returns: Returns a retrieval job object that can be used to monitor retrieval progress asynchronously, and can be used to materialize the results. Examples: >>> from feast import Client >>> from feast.data_format import ParquetFormat >>> from datetime import datetime >>> feast_client = Client(core_url="localhost:6565") >>> feature_refs = ["bookings:bookings_7d", "bookings:booking_14d"] >>> entity_source = FileSource("event_timestamp", ParquetFormat(), "gs://some-bucket/customer") >>> feature_retrieval_job = feast_client.get_historical_features( >>> feature_refs, entity_source) >>> output_file_uri = feature_retrieval_job.get_output_file_uri() "gs://some-bucket/output/ """ feature_tables = self._get_feature_tables_from_feature_refs( feature_refs, self.project) assert all( ft.batch_source.created_timestamp_column for ft in feature_tables), ( "All BatchSources attached to retrieved FeatureTables " "must have specified `created_timestamp_column` to be used in " "historical dataset generation.") if output_location is None: output_location = os.path.join( self._config.get(opt.HISTORICAL_FEATURE_OUTPUT_LOCATION), str(uuid.uuid4()), ) output_format = self._config.get(opt.HISTORICAL_FEATURE_OUTPUT_FORMAT) feature_sources = [ feature_table.batch_source for feature_table in feature_tables ] if isinstance(entity_source, pd.DataFrame): if any( isinstance(source, BigQuerySource) for source in feature_sources): first_bq_source = [ source for source in feature_sources if isinstance(source, BigQuerySource) ][0] source_ref = table_reference_from_string( first_bq_source.bigquery_options.table_ref) entity_source = stage_entities_to_bq(entity_source, source_ref.project, source_ref.dataset_id) else: entity_source = stage_entities_to_fs( entity_source, staging_location=self._config.get( opt.SPARK_STAGING_LOCATION), config=self._config, ) if self._use_job_service: response = self._job_service.GetHistoricalFeatures( GetHistoricalFeaturesRequest( feature_refs=feature_refs, entity_source=entity_source.to_proto(), project=self.project, output_format=output_format, output_location=output_location, ), **self._extra_grpc_params(), ) return RemoteRetrievalJob( self._job_service, self._extra_grpc_params, response.id, output_file_uri=response.output_file_uri, ) else: return start_historical_feature_retrieval_job( client=self, project=self.project, entity_source=entity_source, feature_tables=feature_tables, output_format=output_format, output_path=output_location, ) def get_historical_features_df( self, feature_refs: List[str], entity_source: Union[FileSource, BigQuerySource], ): """ Launch a historical feature retrieval job. Args: feature_refs: List of feature references that will be returned for each entity. Each feature reference should have the following format: "feature_table:feature" where "feature_table" & "feature" refer to the feature and feature table names respectively. entity_source (Union[FileSource, BigQuerySource]): Source for the entity rows. The user needs to make sure that the source is accessible from the Spark cluster that will be used for the retrieval job. Returns: Returns the historical feature retrieval result in the form of Spark dataframe. Examples: >>> from feast import Client >>> from feast.data_format import ParquetFormat >>> from datetime import datetime >>> from pyspark.sql import SparkSession >>> spark = SparkSession.builder.getOrCreate() >>> feast_client = Client(core_url="localhost:6565") >>> feature_refs = ["bookings:bookings_7d", "bookings:booking_14d"] >>> entity_source = FileSource("event_timestamp", ParquetFormat, "gs://some-bucket/customer") >>> df = feast_client.get_historical_features( >>> feature_refs, entity_source) """ feature_tables = self._get_feature_tables_from_feature_refs( feature_refs, self.project) return start_historical_feature_retrieval_spark_session( client=self, project=self.project, entity_source=entity_source, feature_tables=feature_tables, ) def _get_feature_tables_from_feature_refs(self, feature_refs: List[str], project: Optional[str]): feature_refs_grouped_by_table = [ (feature_table_name, list(grouped_feature_refs)) for feature_table_name, grouped_feature_refs in groupby( feature_refs, lambda x: x.split(":")[0]) ] feature_tables = [] for feature_table_name, grouped_feature_refs in feature_refs_grouped_by_table: feature_table = self.get_feature_table(feature_table_name, project) feature_names = [f.split(":")[-1] for f in grouped_feature_refs] feature_table.features = [ f for f in feature_table.features if f.name in feature_names ] feature_tables.append(feature_table) return feature_tables def start_offline_to_online_ingestion( self, feature_table: FeatureTable, start: datetime, end: datetime, ) -> SparkJob: """ Launch Ingestion Job from Batch Source to Online Store for given featureTable :param feature_table: FeatureTable which will be ingested :param start: lower datetime boundary :param end: upper datetime boundary :return: Spark Job Proxy object """ if not self._use_job_service: return start_offline_to_online_ingestion( client=self, project=self.project, feature_table=feature_table, start=start, end=end, ) else: request = StartOfflineToOnlineIngestionJobRequest( project=self.project, table_name=feature_table.name, ) request.start_date.FromDatetime(start) request.end_date.FromDatetime(end) response = self._job_service.StartOfflineToOnlineIngestionJob( request) return RemoteBatchIngestionJob( self._job_service, self._extra_grpc_params, response.id, ) def start_stream_to_online_ingestion( self, feature_table: FeatureTable, extra_jars: Optional[List[str]] = None, project: str = None, ) -> SparkJob: if not self._use_job_service: return start_stream_to_online_ingestion( client=self, project=project or self.project, feature_table=feature_table, extra_jars=extra_jars or [], ) else: request = StartStreamToOnlineIngestionJobRequest( project=self.project, table_name=feature_table.name, ) response = self._job_service.StartStreamToOnlineIngestionJob( request) return RemoteStreamIngestionJob(self._job_service, self._extra_grpc_params, response.id) def list_jobs(self, include_terminated: bool) -> List[SparkJob]: if not self._use_job_service: return list_jobs(include_terminated, self) else: request = ListJobsRequest(include_terminated=include_terminated) response = self._job_service.ListJobs(request) return [ get_remote_job_from_proto(self._job_service, self._extra_grpc_params, job) for job in response.jobs ] def get_job_by_id(self, job_id: str) -> SparkJob: if not self._use_job_service: return get_job_by_id(job_id, self) else: request = GetJobRequest(job_id=job_id) response = self._job_service.GetJob(request) return get_remote_job_from_proto(self._job_service, self._extra_grpc_params, response.job) def stage_dataframe( self, df: pd.DataFrame, event_timestamp_column: str, ) -> FileSource: return stage_dataframe(df, event_timestamp_column, self._config)
class Client: """ Feast Client: Used for creating, managing, and retrieving features. """ def __init__(self, options: Optional[Dict[str, str]] = None, **kwargs): """ The Feast Client should be initialized with at least one service url Please see constants.py for configuration options. Commonly used options or arguments include: core_url: Feast Core URL. Used to manage features serving_url: Feast Serving URL. Used to retrieve features project: Sets the active project. This field is optional. core_secure: Use client-side SSL/TLS for Core gRPC API serving_secure: Use client-side SSL/TLS for Serving gRPC API enable_auth: Enable authentication and authorization auth_provider: Authentication provider – "google" or "oauth" if auth_provider is "oauth", the following fields are mandatory – oauth_grant_type, oauth_client_id, oauth_client_secret, oauth_audience, oauth_token_request_url Args: options: Configuration options to initialize client with **kwargs: Additional keyword arguments that will be used as configuration options along with "options" """ if options is None: options = dict() self._config = Config(options={**options, **kwargs}) self._core_service_stub: Optional[CoreServiceStub] = None self._serving_service_stub: Optional[ServingServiceStub] = None self._auth_metadata: Optional[grpc.AuthMetadataPlugin] = None self._registry_impl: Optional[Registry] = None # Configure Auth Metadata Plugin if auth is enabled if self._config.getboolean(opt.ENABLE_AUTH): self._auth_metadata = feast_auth.get_auth_metadata_plugin( self._config) self._configure_telemetry() @property def config(self) -> Config: return self._config @property def _core_service(self): """ Creates or returns the gRPC Feast Core Service Stub Returns: CoreServiceStub """ if not self._core_service_stub: channel = create_grpc_channel( url=self._config.get(opt.CORE_URL), enable_ssl=self._config.getboolean(opt.CORE_ENABLE_SSL), enable_auth=self._config.getboolean(opt.ENABLE_AUTH), ssl_server_cert_path=self._config.get( opt.CORE_SERVER_SSL_CERT), auth_metadata_plugin=self._auth_metadata, timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), ) self._core_service_stub = CoreServiceStub(channel) return self._core_service_stub @property def _use_object_store_registry(self) -> bool: return self._config.exists(opt.REGISTRY_PATH) @property def _registry(self): if self._registry_impl is None: self._registry_impl = Registry(self._config.get(opt.REGISTRY_PATH)) return self._registry_impl @property def _serving_service(self): """ Creates or returns the gRPC Feast Serving Service Stub. If both `opentracing` and `grpcio-opentracing` are installed, an opentracing interceptor will be instantiated based on the global tracer. Returns: ServingServiceStub """ if not self._serving_service_stub: channel = create_grpc_channel( url=self._config.get(opt.SERVING_URL), enable_ssl=self._config.getboolean(opt.SERVING_ENABLE_SSL), enable_auth=self._config.getboolean(opt.ENABLE_AUTH), ssl_server_cert_path=self._config.get( opt.SERVING_SERVER_SSL_CERT), auth_metadata_plugin=self._auth_metadata, timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), ) try: import opentracing from grpc_opentracing import open_tracing_client_interceptor from grpc_opentracing.grpcext import intercept_channel interceptor = open_tracing_client_interceptor( opentracing.global_tracer()) channel = intercept_channel(channel, interceptor) except ImportError: pass self._serving_service_stub = ServingServiceStub(channel) return self._serving_service_stub def _extra_grpc_params(self) -> Dict[str, Any]: return dict( timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ) @property def core_url(self) -> str: """ Retrieve Feast Core URL Returns: Feast Core URL string """ return self._config.get(opt.CORE_URL) @core_url.setter def core_url(self, value: str): """ Set the Feast Core URL Args: value: Feast Core URL """ self._config.set(opt.CORE_URL, value) @property def serving_url(self) -> str: """ Retrieve Feast Serving URL Returns: Feast Serving URL string """ return self._config.get(opt.SERVING_URL) @serving_url.setter def serving_url(self, value: str): """ Set the Feast Serving URL Args: value: Feast Serving URL """ self._config.set(opt.SERVING_URL, value) @property def job_service_url(self) -> str: """ Retrieve Feast Job Service URL Returns: Feast Job Service URL string """ return self._config.get(opt.JOB_SERVICE_URL) @job_service_url.setter def job_service_url(self, value: str): """ Set the Feast Job Service URL Args: value: Feast Job Service URL """ self._config.set(opt.JOB_SERVICE_URL, value) @property def core_secure(self) -> bool: """ Retrieve Feast Core client-side SSL/TLS setting Returns: Whether client-side SSL/TLS is enabled """ return self._config.getboolean(opt.CORE_ENABLE_SSL) @core_secure.setter def core_secure(self, value: bool): """ Set the Feast Core client-side SSL/TLS setting Args: value: True to enable client-side SSL/TLS """ self._config.set(opt.CORE_ENABLE_SSL, value) @property def serving_secure(self) -> bool: """ Retrieve Feast Serving client-side SSL/TLS setting Returns: Whether client-side SSL/TLS is enabled """ return self._config.getboolean(opt.SERVING_ENABLE_SSL) @serving_secure.setter def serving_secure(self, value: bool): """ Set the Feast Serving client-side SSL/TLS setting Args: value: True to enable client-side SSL/TLS """ self._config.set(opt.SERVING_ENABLE_SSL, value) @property def job_service_secure(self) -> bool: """ Retrieve Feast Job Service client-side SSL/TLS setting Returns: Whether client-side SSL/TLS is enabled """ return self._config.getboolean(opt.JOB_SERVICE_ENABLE_SSL) @job_service_secure.setter def job_service_secure(self, value: bool): """ Set the Feast Job Service client-side SSL/TLS setting Args: value: True to enable client-side SSL/TLS """ self._config.set(opt.JOB_SERVICE_ENABLE_SSL, value) def version(self, sdk_only=False): """ Returns version information from Feast Core and Feast Serving """ import pkg_resources try: sdk_version = pkg_resources.get_distribution("feast").version except pkg_resources.DistributionNotFound: sdk_version = "local build" if sdk_only: return sdk_version result = { "sdk": { "version": sdk_version }, "serving": "not configured", "core": "not configured", } if self.serving_url: serving_version = self._serving_service.GetFeastServingInfo( GetFeastServingInfoRequest(), timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ).version result["serving"] = { "url": self.serving_url, "version": serving_version } if not self._use_object_store_registry and self.core_url: core_version = self._core_service.GetFeastCoreVersion( GetFeastCoreVersionRequest(), timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ).version result["core"] = {"url": self.core_url, "version": core_version} return result def _configure_telemetry(self): telemetry_filepath = join(expanduser("~"), ".feast", "telemetry") self._telemetry_enabled = ( self._config.get(opt.TELEMETRY, "True") == "True" ) # written this way to turn the env var string into a boolean if self._telemetry_enabled: self._telemetry_counter = {"get_online_features": 0} if os.path.exists(telemetry_filepath): with open(telemetry_filepath, "r") as f: self._telemetry_id = f.read() else: self._telemetry_id = str(uuid.uuid4()) print( "Feast is an open source project that collects anonymized usage statistics. To opt out or learn more see https://docs.feast.dev/v/master/advanced/telemetry" ) with open(telemetry_filepath, "w") as f: f.write(self._telemetry_id) else: if os.path.exists(telemetry_filepath): os.remove(telemetry_filepath) @property def project(self) -> str: """ Retrieve currently active project Returns: Project name """ if not self._config.get(opt.PROJECT): raise ValueError("No project has been configured.") return self._config.get(opt.PROJECT) def set_project(self, project: Optional[str] = None): """ Set currently active Feast project Args: project: Project to set as active. If unset, will reset to the default project. """ if project is None: project = opt().PROJECT self._config.set(opt.PROJECT, project) def list_projects(self) -> List[str]: """ List all active Feast projects Returns: List of project names """ if self._use_object_store_registry: raise NotImplementedError( "Projects are not implemented for object store registry.") else: response = self._core_service.ListProjects( ListProjectsRequest(), timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ) # type: ListProjectsResponse return list(response.projects) def create_project(self, project: str): """ Creates a Feast project Args: project: Name of project """ if self._use_object_store_registry: raise NotImplementedError( "Projects are not implemented for object store registry.") else: self._core_service.CreateProject( CreateProjectRequest(name=project), timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ) # type: CreateProjectResponse def archive_project(self, project): """ Archives a project. Project will still continue to function for ingestion and retrieval, but will be in a read-only state. It will also not be visible from the Core API for management purposes. Args: project: Name of project to archive """ if self._use_object_store_registry: raise NotImplementedError( "Projects are not implemented for object store registry.") else: try: self._core_service.ArchiveProject( ArchiveProjectRequest(name=project), timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ) # type: ArchiveProjectResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) # revert to the default project if self._project == project: self._project = opt().PROJECT def apply( self, objects: Union[List[Union[Entity, FeatureTable]], Entity, FeatureTable], project: str = None, ): """ Idempotently registers entities and feature tables with Feast Core. Either a single entity or feature table or a list can be provided. Args: objects: List of entities and/or feature tables that will be registered Examples: >>> from feast import Client >>> from feast.entity import Entity >>> from feast.value_type import ValueType >>> >>> feast_client = Client(core_url="localhost:6565") >>> entity = Entity( >>> name="driver_entity", >>> description="Driver entity for car rides", >>> value_type=ValueType.STRING, >>> labels={ >>> "key": "val" >>> } >>> ) >>> feast_client.apply(entity) """ if self._telemetry_enabled: log_usage( "apply", self._telemetry_id, datetime.utcnow(), self.version(sdk_only=True), ) if project is None: project = self.project if not isinstance(objects, list): objects = [objects] for obj in objects: if isinstance(obj, Entity): self._apply_entity(project, obj) # type: ignore elif isinstance(obj, FeatureTable): self._apply_feature_table(project, obj) # type: ignore else: raise ValueError( f"Could not determine object type to apply {obj} with type {type(obj)}. Type must be Entity or FeatureTable." ) def apply_entity(self, entities: Union[List[Entity], Entity], project: str = None): """ Deprecated. Please see apply(). """ warnings.warn( "The method apply_entity() is being deprecated. Please use apply() instead. Feast 0.10 and onwards will not support apply_entity().", DeprecationWarning, ) if project is None: project = self.project if not isinstance(entities, list): entities = [entities] for entity in entities: if isinstance(entity, Entity): self._apply_entity(project, entity) # type: ignore continue raise ValueError( f"Could not determine entity type to apply {entity}") def _apply_entity(self, project: str, entity: Entity): """ Registers a single entity with Feast Args: entity: Entity that will be registered """ if self._use_object_store_registry: return self._registry.apply_entity(entity, project) else: entity.is_valid() entity_proto = entity.to_spec_proto() # Convert the entity to a request and send to Feast Core try: apply_entity_response = self._core_service.ApplyEntity( ApplyEntityRequest(project=project, spec=entity_proto), # type: ignore timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ) # type: ApplyEntityResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) # Extract the returned entity applied_entity = Entity.from_proto(apply_entity_response.entity) # Deep copy from the returned entity to the local entity entity._update_from_entity(applied_entity) def list_entities(self, project: str = None, labels: Dict[str, str] = dict()) -> List[Entity]: """ Retrieve a list of entities from Feast Core Args: project: Filter entities based on project name labels: User-defined labels that these entities are associated with Returns: List of entities """ if project is None: project = self.project if self._use_object_store_registry: return self._registry.list_entities(project) else: filter = ListEntitiesRequest.Filter(project=project, labels=labels) # Get latest entities from Feast Core entity_protos = self._core_service.ListEntities( ListEntitiesRequest(filter=filter), metadata=self._get_grpc_metadata(), ) # type: ListEntitiesResponse # Extract entities and return entities = [] for entity_proto in entity_protos.entities: entity = Entity.from_proto(entity_proto) entity._client = self entities.append(entity) return entities def get_entity(self, name: str, project: str = None) -> Entity: """ Retrieves an entity. Args: project: Feast project that this entity belongs to name: Name of entity Returns: Returns either the specified entity, or raises an exception if none is found """ if self._telemetry_enabled: log_usage( "get_entity", self._telemetry_id, datetime.utcnow(), self.version(sdk_only=True), ) if project is None: project = self.project if self._use_object_store_registry: return self._registry.get_entity(name, project) else: try: get_entity_response = self._core_service.GetEntity( GetEntityRequest(project=project, name=name.strip()), metadata=self._get_grpc_metadata(), ) # type: GetEntityResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) entity = Entity.from_proto(get_entity_response.entity) return entity def apply_feature_table( self, feature_tables: Union[List[FeatureTable], FeatureTable], project: str = None, ): """ Deprecated. Please see apply(). """ warnings.warn( "The method apply_feature_table() is being deprecated. Please use apply() instead. Feast 0.10 and onwards will not support apply_feature_table().", DeprecationWarning, ) if project is None: project = self.project if not isinstance(feature_tables, list): feature_tables = [feature_tables] for feature_table in feature_tables: if isinstance(feature_table, FeatureTable): self._apply_feature_table(project, feature_table) # type: ignore continue raise ValueError( f"Could not determine feature table type to apply {feature_table}" ) def _apply_feature_table(self, project: str, feature_table: FeatureTable): """ Registers a single feature table with Feast Args: feature_table: Feature table that will be registered """ if self._use_object_store_registry: return self._registry.apply_feature_table(feature_table, project) else: feature_table.is_valid() feature_table_proto = feature_table.to_spec_proto() # Convert the feature table to a request and send to Feast Core try: apply_feature_table_response = self._core_service.ApplyFeatureTable( ApplyFeatureTableRequest( project=project, table_spec=feature_table_proto), # type: ignore timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ) # type: ApplyFeatureTableResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) # Extract the returned feature table applied_feature_table = FeatureTable.from_proto( apply_feature_table_response.table) # Deep copy from the returned feature table to the local entity feature_table._update_from_feature_table(applied_feature_table) def list_feature_tables( self, project: str = None, labels: Dict[str, str] = dict() ) -> List[FeatureTable]: """ Retrieve a list of feature tables from Feast Core Args: project: Filter feature tables based on project name Returns: List of feature tables """ if project is None: project = self.project if self._use_object_store_registry: return self._registry.list_feature_tables(project) else: filter = ListFeatureTablesRequest.Filter(project=project, labels=labels) # Get latest feature tables from Feast Core feature_table_protos = self._core_service.ListFeatureTables( ListFeatureTablesRequest(filter=filter), metadata=self._get_grpc_metadata(), ) # type: ListFeatureTablesResponse # Extract feature tables and return feature_tables = [] for feature_table_proto in feature_table_protos.tables: feature_table = FeatureTable.from_proto(feature_table_proto) feature_table._client = self feature_tables.append(feature_table) return feature_tables def get_feature_table(self, name: str, project: str = None) -> FeatureTable: """ Retrieves a feature table. Args: project: Feast project that this feature table belongs to name: Name of feature table Returns: Returns either the specified feature table, or raises an exception if none is found """ if self._telemetry_enabled: log_usage( "get_feature_table", self._telemetry_id, datetime.utcnow(), self.version(sdk_only=True), ) if project is None: project = self.project if self._use_object_store_registry: return self._registry.get_feature_table(name, project) else: try: get_feature_table_response = self._core_service.GetFeatureTable( GetFeatureTableRequest(project=project, name=name.strip()), metadata=self._get_grpc_metadata(), ) # type: GetFeatureTableResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) return FeatureTable.from_proto(get_feature_table_response.table) def delete_feature_table(self, name: str, project: str = None) -> None: """ Deletes a feature table. Args: project: Feast project that this feature table belongs to name: Name of feature table """ if project is None: project = self.project if self._use_object_store_registry: return self._registry.delete_feature_table(name, project) else: try: self._core_service.DeleteFeatureTable( DeleteFeatureTableRequest(project=project, name=name.strip()), metadata=self._get_grpc_metadata(), ) except grpc.RpcError as e: raise grpc.RpcError(e.details()) def list_features_by_ref( self, project: str = None, entities: List[str] = list(), labels: Dict[str, str] = dict(), ) -> Dict[FeatureRef, Feature]: """ Retrieve a dictionary of feature reference to feature from Feast Core based on filters provided. Args: project: Feast project that these features belongs to entities: Feast entity that these features are associated with labels: Feast labels that these features are associated with Returns: Dictionary of <feature references: features> Examples: >>> from feast import Client >>> >>> feast_client = Client(core_url="localhost:6565") >>> features = feast_client.list_features(project="test_project", entities=["driver_id"], labels={"key1":"val1","key2":"val2"}) >>> print(features) """ if self._use_object_store_registry: raise NotImplementedError( "This function is not implemented for object store registry.") else: if project is None: project = self.project filter = ListFeaturesRequest.Filter(project=project, entities=entities, labels=labels) feature_protos = self._core_service.ListFeatures( ListFeaturesRequest(filter=filter), metadata=self._get_grpc_metadata(), ) # type: ListFeaturesResponse # Extract features and return features_dict = {} for ref_str, feature_proto in feature_protos.features.items(): feature_ref = FeatureRef.from_str(ref_str) feature = Feature.from_proto(feature_proto) features_dict[feature_ref] = feature return features_dict def ingest( self, feature_table: Union[str, FeatureTable], source: Union[pd.DataFrame, str], project: str = None, chunk_size: int = 10000, max_workers: int = max(CPU_COUNT - 1, 1), timeout: int = int(opt().BATCH_INGESTION_PRODUCTION_TIMEOUT), ) -> None: """ Batch load feature data into a FeatureTable. Args: feature_table (typing.Union[str, feast.feature_table.FeatureTable]): FeatureTable object or the string name of the feature table source (typing.Union[pd.DataFrame, str]): Either a file path or Pandas Dataframe to ingest into Feast Files that are currently supported: * parquet * csv * json project: Feast project to locate FeatureTable chunk_size (int): Amount of rows to load and ingest at a time. max_workers (int): Number of worker processes to use to encode values. timeout (int): Timeout in seconds to wait for completion. Examples: >>> from feast import Client >>> >>> client = Client(core_url="localhost:6565") >>> ft_df = pd.DataFrame( >>> { >>> "datetime": [pd.datetime.now()], >>> "driver": [1001], >>> "rating": [4.3], >>> } >>> ) >>> client.set_project("project1") >>> >>> driver_ft = client.get_feature_table("driver") >>> client.ingest(driver_ft, ft_df) """ if self._telemetry_enabled: log_usage( "ingest", self._telemetry_id, datetime.utcnow(), self.version(sdk_only=True), ) if project is None: project = self.project if isinstance(feature_table, str): name = feature_table if isinstance(feature_table, FeatureTable): name = feature_table.name fetched_feature_table: Optional[FeatureTable] = self.get_feature_table( name, project) if fetched_feature_table is not None: feature_table = fetched_feature_table else: raise Exception(f"FeatureTable, {name} cannot be found.") # Check 1) Only parquet file format for FeatureTable batch source is supported if (feature_table.batch_source and issubclass(type(feature_table.batch_source), FileSource) and isinstance( type(feature_table.batch_source.file_options.file_format), ParquetFormat)): raise Exception( f"No suitable batch source found for FeatureTable, {name}." f"Only BATCH_FILE source with parquet format is supported for batch ingestion." ) pyarrow_table, column_names = _read_table_from_source(source) # Check 2) Check if FeatureTable batch source field mappings can be found in provided source table _check_field_mappings( column_names, name, feature_table.batch_source.event_timestamp_column, feature_table.batch_source.field_mapping, ) dir_path = None with_partitions = False if (issubclass(type(feature_table.batch_source), FileSource) and feature_table.batch_source.date_partition_column): with_partitions = True dest_path = _write_partitioned_table_from_source( column_names, pyarrow_table, feature_table.batch_source.date_partition_column, feature_table.batch_source.event_timestamp_column, ) else: dir_path, dest_path = _write_non_partitioned_table_from_source( column_names, pyarrow_table, chunk_size, max_workers, ) try: if issubclass(type(feature_table.batch_source), FileSource): file_url = feature_table.batch_source.file_options.file_url.rstrip( "*") _upload_to_file_source(file_url, with_partitions, dest_path, self._config) if issubclass(type(feature_table.batch_source), BigQuerySource): bq_table_ref = feature_table.batch_source.bigquery_options.table_ref feature_table_timestamp_column = ( feature_table.batch_source.event_timestamp_column) _upload_to_bq_source(bq_table_ref, feature_table_timestamp_column, dest_path) finally: # Remove parquet file(s) that were created earlier print("Removing temporary file(s)...") if dir_path: shutil.rmtree(dir_path) print( "Data has been successfully ingested into FeatureTable batch source." ) def _get_grpc_metadata(self): """ Returns a metadata tuple to attach to gRPC requests. This is primarily used when authentication is enabled but SSL/TLS is disabled. Returns: Tuple of metadata to attach to each gRPC call """ if self._config.getboolean(opt.ENABLE_AUTH) and self._auth_metadata: return self._auth_metadata.get_signed_meta() return () def get_online_features( self, feature_refs: List[str], entity_rows: List[Dict[str, Any]], project: Optional[str] = None, ) -> OnlineResponse: """ Retrieves the latest online feature data from Feast Serving. Args: feature_refs: List of feature references that will be returned for each entity. Each feature reference should have the following format: "feature_table:feature" where "feature_table" & "feature" refer to the feature and feature table names respectively. Only the feature name is required. entity_rows: A list of dictionaries where each key-value is an entity-name, entity-value pair. project: Optionally specify the the project override. If specified, uses given project for retrieval. Overrides the projects specified in Feature References if also are specified. Returns: GetOnlineFeaturesResponse containing the feature data in records. Each EntityRow provided will yield one record, which contains data fields with data value and field status metadata (if included). Examples: >>> from feast import Client >>> >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566") >>> feature_refs = ["sales:daily_transactions"] >>> entity_rows = [{"customer_id": 0},{"customer_id": 1}] >>> >>> online_response = feast_client.get_online_features( >>> feature_refs, entity_rows, project="my_project") >>> online_response_dict = online_response.to_dict() >>> print(online_response_dict) {'sales:daily_transactions': [1.1,1.2], 'sales:customer_id': [0,1]} """ if self._telemetry_enabled: if self._telemetry_counter["get_online_features"] % 1000 == 0: log_usage( "get_online_features", self._telemetry_id, datetime.utcnow(), self.version(sdk_only=True), ) self._telemetry_counter["get_online_features"] += 1 try: response = self._serving_service.GetOnlineFeaturesV2( GetOnlineFeaturesRequestV2( features=_build_feature_references( feature_ref_strs=feature_refs), entity_rows=_infer_online_entity_rows(entity_rows), project=project if project is not None else self.project, ), timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ) except grpc.RpcError as e: raise grpc.RpcError(e.details()) response = OnlineResponse(response) return response
class Client: """ Feast Client: Used for creating, managing, and retrieving features. """ def __init__(self, options: Optional[Dict[str, str]] = None, **kwargs): """ The Feast Client should be initialized with at least one service url Please see constants.py for configuration options. Commonly used options or arguments include: core_url: Feast Core URL. Used to manage features serving_url: Feast Serving URL. Used to retrieve features project: Sets the active project. This field is optional. core_secure: Use client-side SSL/TLS for Core gRPC API serving_secure: Use client-side SSL/TLS for Serving gRPC API enable_auth: Enable authentication and authorization auth_provider: Authentication provider – "google" or "oauth" if auth_provider is "oauth", the following fields are mandatory – oauth_grant_type, oauth_client_id, oauth_client_secret, oauth_audience, oauth_token_request_url Args: options: Configuration options to initialize client with **kwargs: Additional keyword arguments that will be used as configuration options along with "options" """ if options is None: options = dict() self._config = Config(options={**options, **kwargs}) self._core_service_stub: Optional[CoreServiceStub] = None self._serving_service_stub: Optional[ServingServiceStub] = None self._auth_metadata: Optional[grpc.AuthMetadataPlugin] = None # Configure Auth Metadata Plugin if auth is enabled if self._config.getboolean(CONFIG_ENABLE_AUTH_KEY): self._auth_metadata = feast_auth.get_auth_metadata_plugin( self._config) @property def _core_service(self): """ Creates or returns the gRPC Feast Core Service Stub Returns: CoreServiceStub """ if not self._core_service_stub: channel = create_grpc_channel( url=self._config.get(CONFIG_CORE_URL_KEY), enable_ssl=self._config.getboolean(CONFIG_CORE_ENABLE_SSL_KEY), enable_auth=self._config.getboolean(CONFIG_ENABLE_AUTH_KEY), ssl_server_cert_path=self._config.get( CONFIG_CORE_SERVER_SSL_CERT_KEY), auth_metadata_plugin=self._auth_metadata, timeout=self._config.getint( CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), ) self._core_service_stub = CoreServiceStub(channel) return self._core_service_stub @property def _serving_service(self): """ Creates or returns the gRPC Feast Serving Service Stub Returns: ServingServiceStub """ if not self._serving_service_stub: channel = create_grpc_channel( url=self._config.get(CONFIG_SERVING_URL_KEY), enable_ssl=self._config.getboolean( CONFIG_SERVING_ENABLE_SSL_KEY), enable_auth=self._config.getboolean(CONFIG_ENABLE_AUTH_KEY), ssl_server_cert_path=self._config.get( CONFIG_SERVING_SERVER_SSL_CERT_KEY), auth_metadata_plugin=self._auth_metadata, timeout=self._config.getint( CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), ) self._serving_service_stub = ServingServiceStub(channel) return self._serving_service_stub @property def core_url(self) -> str: """ Retrieve Feast Core URL Returns: Feast Core URL string """ return self._config.get(CONFIG_CORE_URL_KEY) @core_url.setter def core_url(self, value: str): """ Set the Feast Core URL Args: value: Feast Core URL """ self._config.set(CONFIG_CORE_URL_KEY, value) @property def serving_url(self) -> str: """ Retrieve Serving Core URL Returns: Feast Serving URL string """ return self._config.get(CONFIG_SERVING_URL_KEY) @serving_url.setter def serving_url(self, value: str): """ Set the Feast Serving URL Args: value: Feast Serving URL """ self._config.set(CONFIG_SERVING_URL_KEY, value) @property def core_secure(self) -> bool: """ Retrieve Feast Core client-side SSL/TLS setting Returns: Whether client-side SSL/TLS is enabled """ return self._config.getboolean(CONFIG_CORE_ENABLE_SSL_KEY) @core_secure.setter def core_secure(self, value: bool): """ Set the Feast Core client-side SSL/TLS setting Args: value: True to enable client-side SSL/TLS """ self._config.set(CONFIG_CORE_ENABLE_SSL_KEY, value) @property def serving_secure(self) -> bool: """ Retrieve Feast Serving client-side SSL/TLS setting Returns: Whether client-side SSL/TLS is enabled """ return self._config.getboolean(CONFIG_SERVING_ENABLE_SSL_KEY) @serving_secure.setter def serving_secure(self, value: bool): """ Set the Feast Serving client-side SSL/TLS setting Args: value: True to enable client-side SSL/TLS """ self._config.set(CONFIG_SERVING_ENABLE_SSL_KEY, value) def version(self): """ Returns version information from Feast Core and Feast Serving """ import pkg_resources result = { "sdk": { "version": pkg_resources.get_distribution("feast").version }, "serving": "not configured", "core": "not configured", } if self.serving_url: serving_version = self._serving_service.GetFeastServingInfo( GetFeastServingInfoRequest(), timeout=self._config.getint( CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), metadata=self._get_grpc_metadata(), ).version result["serving"] = { "url": self.serving_url, "version": serving_version } if self.core_url: core_version = self._core_service.GetFeastCoreVersion( GetFeastCoreVersionRequest(), timeout=self._config.getint( CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), metadata=self._get_grpc_metadata(), ).version result["core"] = {"url": self.core_url, "version": core_version} return result @property def project(self) -> Union[str, None]: """ Retrieve currently active project Returns: Project name """ return self._config.get(CONFIG_PROJECT_KEY) def set_project(self, project: Optional[str] = None): """ Set currently active Feast project Args: project: Project to set as active. If unset, will reset to the default project. """ if project is None: project = FEAST_DEFAULT_OPTIONS[CONFIG_PROJECT_KEY] self._config.set(CONFIG_PROJECT_KEY, project) def list_projects(self) -> List[str]: """ List all active Feast projects Returns: List of project names """ response = self._core_service.ListProjects( ListProjectsRequest(), timeout=self._config.getint( CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), metadata=self._get_grpc_metadata(), ) # type: ListProjectsResponse return list(response.projects) def create_project(self, project: str): """ Creates a Feast project Args: project: Name of project """ self._core_service.CreateProject( CreateProjectRequest(name=project), timeout=self._config.getint( CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), metadata=self._get_grpc_metadata(), ) # type: CreateProjectResponse def archive_project(self, project): """ Archives a project. Project will still continue to function for ingestion and retrieval, but will be in a read-only state. It will also not be visible from the Core API for management purposes. Args: project: Name of project to archive """ try: self._core_service_stub.ArchiveProject( ArchiveProjectRequest(name=project), timeout=self._config.getint( CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), metadata=self._get_grpc_metadata(), ) # type: ArchiveProjectResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) # revert to the default project if self._project == project: self._project = FEAST_DEFAULT_OPTIONS[CONFIG_PROJECT_KEY] def apply(self, feature_sets: Union[List[FeatureSet], FeatureSet]): """ Idempotently registers feature set(s) with Feast Core. Either a single feature set or a list can be provided. Args: feature_sets: List of feature sets that will be registered """ if not isinstance(feature_sets, list): feature_sets = [feature_sets] for feature_set in feature_sets: if isinstance(feature_set, FeatureSet): self._apply_feature_set(feature_set) continue raise ValueError( f"Could not determine feature set type to apply {feature_set}") def _apply_feature_set(self, feature_set: FeatureSet): """ Registers a single feature set with Feast Args: feature_set: Feature set that will be registered """ feature_set.is_valid() feature_set_proto = feature_set.to_proto() if len(feature_set_proto.spec.project) == 0: if self.project is not None: feature_set_proto.spec.project = self.project # Convert the feature set to a request and send to Feast Core try: apply_fs_response = self._core_service.ApplyFeatureSet( ApplyFeatureSetRequest(feature_set=feature_set_proto), timeout=self._config.getint( CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), metadata=self._get_grpc_metadata(), ) # type: ApplyFeatureSetResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) # Extract the returned feature set applied_fs = FeatureSet.from_proto(apply_fs_response.feature_set) # If the feature set has changed, update the local copy if apply_fs_response.status == ApplyFeatureSetResponse.Status.CREATED: print(f'Feature set created: "{applied_fs.name}"') if apply_fs_response.status == ApplyFeatureSetResponse.Status.UPDATED: print(f'Feature set updated: "{applied_fs.name}"') # If no change has been applied, do nothing if apply_fs_response.status == ApplyFeatureSetResponse.Status.NO_CHANGE: print(f"No change detected or applied: {feature_set.name}") # Deep copy from the returned feature set to the local feature set feature_set._update_from_feature_set(applied_fs) def list_feature_sets( self, project: str = None, name: str = None, labels: Dict[str, str] = dict()) -> List[FeatureSet]: """ Retrieve a list of feature sets from Feast Core Args: project: Filter feature sets based on project name name: Filter feature sets based on feature set name Returns: List of feature sets """ if project is None: if self.project is not None: project = self.project else: project = "*" if name is None: name = "*" filter = ListFeatureSetsRequest.Filter(project=project, feature_set_name=name, labels=labels) # Get latest feature sets from Feast Core feature_set_protos = self._core_service.ListFeatureSets( ListFeatureSetsRequest(filter=filter), metadata=self._get_grpc_metadata(), ) # type: ListFeatureSetsResponse # Extract feature sets and return feature_sets = [] for feature_set_proto in feature_set_protos.feature_sets: feature_set = FeatureSet.from_proto(feature_set_proto) feature_set._client = self feature_sets.append(feature_set) return feature_sets def get_feature_set(self, name: str, project: str = None) -> Union[FeatureSet, None]: """ Retrieves a feature set. Args: project: Feast project that this feature set belongs to name: Name of feature set Returns: Returns either the specified feature set, or raises an exception if none is found """ if project is None: if self.project is not None: project = self.project else: raise ValueError("No project has been configured.") try: get_feature_set_response = self._core_service.GetFeatureSet( GetFeatureSetRequest(project=project, name=name.strip()), metadata=self._get_grpc_metadata(), ) # type: GetFeatureSetResponse except grpc.RpcError as e: raise grpc.RpcError(e.details()) return FeatureSet.from_proto(get_feature_set_response.feature_set) def list_features_by_ref( self, project: str = None, entities: List[str] = list(), labels: Dict[str, str] = dict(), ) -> Dict[FeatureRef, Feature]: """ Returns a list of features based on filters provided. Args: project: Feast project that these features belongs to entities: Feast entity that these features are associated with labels: Feast labels that these features are associated with Returns: Dictionary of <feature references: features> Examples: >>> from feast import Client >>> >>> feast_client = Client(core_url="localhost:6565") >>> features = list_features_by_ref(project="test_project", entities=["driver_id"], labels={"key1":"val1","key2":"val2"}) >>> print(features) """ if project is None: if self.project is not None: project = self.project else: project = "default" filter = ListFeaturesRequest.Filter(project=project, entities=entities, labels=labels) feature_protos = self._core_service.ListFeatures( ListFeaturesRequest(filter=filter), metadata=self._get_grpc_metadata(), ) # type: ListFeaturesResponse features_dict = {} for ref_str, feature_proto in feature_protos.features.items(): feature_ref = FeatureRef.from_str(ref_str, ignore_project=True) feature = Feature.from_proto(feature_proto) features_dict[feature_ref] = feature return features_dict def list_entities(self) -> Dict[str, Entity]: """ Returns a dictionary of entities across all feature sets Returns: Dictionary of entities, indexed by name """ entities_dict = OrderedDict() for fs in self.list_feature_sets(): for entity in fs.entities: entities_dict[entity.name] = entity return entities_dict def get_batch_features( self, feature_refs: List[str], entity_rows: Union[pd.DataFrame, str], compute_statistics: bool = False, project: str = None, ) -> RetrievalJob: """ Deprecated. Please see get_historical_features. """ warnings.warn( "The method get_batch_features() is being deprecated. Please use the identical get_historical_features(). " "Feast 0.7 and onwards will not support get_batch_features().", DeprecationWarning, ) return self.get_historical_features(feature_refs, entity_rows, compute_statistics, project) def get_historical_features( self, feature_refs: List[str], entity_rows: Union[pd.DataFrame, str], compute_statistics: bool = False, project: str = None, ) -> RetrievalJob: """ Retrieves historical features from a Feast Serving deployment. Args: feature_refs: List of feature references that will be returned for each entity. Each feature reference should have the following format: "feature_set:feature" where "feature_set" & "feature" refer to the feature and feature set names respectively. Only the feature name is required. entity_rows (Union[pd.DataFrame, str]): Pandas dataframe containing entities and a 'datetime' column. Each entity in a feature set must be present as a column in this dataframe. The datetime column must contain timestamps in datetime64 format. compute_statistics (bool): Indicates whether Feast should compute statistics over the retrieved dataset. project: Specifies the project which contain the FeatureSets which the requested features belong to. Returns: feast.job.RetrievalJob: Returns a retrival job object that can be used to monitor retrieval progress asynchronously, and can be used to materialize the results. Examples: >>> from feast import Client >>> from datetime import datetime >>> >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566") >>> feature_refs = ["my_project/bookings_7d", "booking_14d"] >>> entity_rows = pd.DataFrame( >>> { >>> "datetime": [pd.datetime.now() for _ in range(3)], >>> "customer": [1001, 1002, 1003], >>> } >>> ) >>> feature_retrieval_job = feast_client.get_historical_features( >>> feature_refs, entity_rows, project="my_project") >>> df = feature_retrieval_job.to_dataframe() >>> print(df) """ # Retrieve serving information to determine store type and # staging location serving_info = self._serving_service.GetFeastServingInfo( GetFeastServingInfoRequest(), timeout=self._config.getint( CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), metadata=self._get_grpc_metadata(), ) # type: GetFeastServingInfoResponse if serving_info.type != FeastServingType.FEAST_SERVING_TYPE_BATCH: raise Exception( f'You are connected to a store "{self.serving_url}" which ' f"does not support batch retrieval ") if isinstance(entity_rows, pd.DataFrame): # Pandas DataFrame detected # Remove timezone from datetime column if isinstance(entity_rows["datetime"].dtype, pd.core.dtypes.dtypes.DatetimeTZDtype): entity_rows["datetime"] = pd.DatetimeIndex( entity_rows["datetime"]).tz_localize(None) elif isinstance(entity_rows, str): # String based source if not entity_rows.endswith((".avro", "*")): raise Exception( "Only .avro and wildcard paths are accepted as entity_rows" ) else: raise Exception(f"Only pandas.DataFrame and str types are allowed" f" as entity_rows, but got {type(entity_rows)}.") # Export and upload entity row DataFrame to staging location # provided by Feast staged_files = export_source_to_staging_location( entity_rows, serving_info.job_staging_location) # type: List[str] request = GetBatchFeaturesRequest( features=_build_feature_references( feature_ref_strs=feature_refs, project=project if project is not None else self.project, ), dataset_source=DatasetSource(file_source=DatasetSource.FileSource( file_uris=staged_files, data_format=DataFormat.DATA_FORMAT_AVRO)), compute_statistics=compute_statistics, ) # Retrieve Feast Job object to manage life cycle of retrieval try: response = self._serving_service.GetBatchFeatures( request, metadata=self._get_grpc_metadata()) except grpc.RpcError as e: raise grpc.RpcError(e.details()) return RetrievalJob( response.job, self._serving_service, auth_metadata_plugin=self._auth_metadata, ) def get_online_features( self, feature_refs: List[str], entity_rows: List[Union[GetOnlineFeaturesRequest.EntityRow, Dict[str, Any]]], project: Optional[str] = None, omit_entities: bool = False, ) -> OnlineResponse: """ Retrieves the latest online feature data from Feast Serving Args: feature_refs: List of feature references that will be returned for each entity. Each feature reference should have the following format: "feature_set:feature" where "feature_set" & "feature" refer to the feature and feature set names respectively. Only the feature name is required. entity_rows: A list of dictionaries where each key is an entity and each value is feast.types.Value or Python native form. project: Optionally specify the the project override. If specified, uses given project for retrieval. Overrides the projects specified in Feature References if also are specified. omit_entities: If true will omit entity values in the returned feature data. Returns: GetOnlineFeaturesResponse containing the feature data in records. Each EntityRow provided will yield one record, which contains data fields with data value and field status metadata (if included). Examples: >>> from feast import Client >>> >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566") >>> feature_refs = ["daily_transactions"] >>> entity_rows = [{"customer_id": 0},{"customer_id": 1}] >>> >>> online_response = feast_client.get_online_features( >>> feature_refs, entity_rows, project="my_project") >>> online_response_dict = online_response.to_dict() >>> print(online_response_dict) {'daily_transactions': [1.1,1.2], 'customer_id': [0,1]} """ try: response = self._serving_service.GetOnlineFeatures( GetOnlineFeaturesRequest( omit_entities_in_response=omit_entities, features=_build_feature_references( feature_ref_strs=feature_refs), entity_rows=_infer_online_entity_rows(entity_rows), project=project if project is not None else self.project, ), metadata=self._get_grpc_metadata(), ) except grpc.RpcError as e: raise grpc.RpcError(e.details()) response = OnlineResponse(response) return response def list_ingest_jobs( self, job_id: str = None, feature_set_ref: FeatureSetRef = None, store_name: str = None, ): """ List the ingestion jobs currently registered in Feast, with optional filters. Provides detailed metadata about each ingestion job. Args: job_id: Select specific ingestion job with the given job_id feature_set_ref: Filter ingestion jobs by target feature set (via reference) store_name: Filter ingestion jobs by target feast store's name Returns: List of IngestJobs matching the given filters """ # construct list request feature_set_ref_proto = None if feature_set_ref: feature_set_ref_proto = feature_set_ref.to_proto() list_filter = ListIngestionJobsRequest.Filter( id=job_id, feature_set_reference=feature_set_ref_proto, store_name=store_name, ) request = ListIngestionJobsRequest(filter=list_filter) # make list request & unpack response response = self._core_service.ListIngestionJobs( request, metadata=self._get_grpc_metadata(), ) # type: ignore ingest_jobs = [ IngestJob(proto, self._core_service, auth_metadata_plugin=self._auth_metadata) for proto in response.jobs # type: ignore ] return ingest_jobs def restart_ingest_job(self, job: IngestJob): """ Restart ingestion job currently registered in Feast. NOTE: Data might be lost during the restart for some job runners. Does not support stopping a job in a transitional (ie pending, suspending, aborting), terminal state (ie suspended or aborted) or unknown status Args: job: IngestJob to restart """ request = RestartIngestionJobRequest(id=job.id) try: self._core_service.RestartIngestionJob( request, metadata=self._get_grpc_metadata(), ) # type: ignore except grpc.RpcError as e: raise grpc.RpcError(e.details()) def stop_ingest_job(self, job: IngestJob): """ Stop ingestion job currently resgistered in Feast Does nothing if the target job if already in a terminal state (ie suspended or aborted). Does not support stopping a job in a transitional (ie pending, suspending, aborting) or in a unknown status Args: job: IngestJob to restart """ request = StopIngestionJobRequest(id=job.id) try: self._core_service.StopIngestionJob( request, metadata=self._get_grpc_metadata(), ) # type: ignore except grpc.RpcError as e: raise grpc.RpcError(e.details()) def ingest( self, feature_set: Union[str, FeatureSet], source: Union[pd.DataFrame, str], chunk_size: int = 10000, max_workers: int = max(CPU_COUNT - 1, 1), disable_progress_bar: bool = False, timeout: int = KAFKA_CHUNK_PRODUCTION_TIMEOUT, ) -> str: """ Loads feature data into Feast for a specific feature set. Args: feature_set (typing.Union[str, feast.feature_set.FeatureSet]): Feature set object or the string name of the feature set source (typing.Union[pd.DataFrame, str]): Either a file path or Pandas Dataframe to ingest into Feast Files that are currently supported: * parquet * csv * json chunk_size (int): Amount of rows to load and ingest at a time. max_workers (int): Number of worker processes to use to encode values. disable_progress_bar (bool): Disable printing of progress statistics. timeout (int): Timeout in seconds to wait for completion. Returns: str: ingestion id for this dataset Examples: >>> from feast import Client >>> >>> client = Client(core_url="localhost:6565") >>> fs_df = pd.DataFrame( >>> { >>> "datetime": [pd.datetime.now()], >>> "driver": [1001], >>> "rating": [4.3], >>> } >>> ) >>> client.set_project("project1") >>> client.ingest("driver", fs_df) >>> >>> driver_fs = client.get_feature_set(name="driver", project="project1") >>> client.ingest(driver_fs, fs_df) """ if isinstance(feature_set, FeatureSet): name = feature_set.name project = feature_set.project elif isinstance(feature_set, str): if self.project is not None: project = self.project else: project = "default" name = feature_set else: raise Exception("Feature set name must be provided") # Read table and get row count dir_path, dest_path = _read_table_from_source(source, chunk_size, max_workers) pq_file = pq.ParquetFile(dest_path) row_count = pq_file.metadata.num_rows current_time = time.time() print("Waiting for feature set to be ready for ingestion...") while True: if timeout is not None and time.time() - current_time >= timeout: raise TimeoutError( "Timed out waiting for feature set to be ready") fetched_feature_set: Optional[FeatureSet] = self.get_feature_set( name, project) if (fetched_feature_set is not None and fetched_feature_set.status == FeatureSetStatus.STATUS_READY): feature_set = fetched_feature_set break time.sleep(3) if timeout is not None: timeout = timeout - int(time.time() - current_time) try: # Kafka configs brokers = feature_set.get_kafka_source_brokers() topic = feature_set.get_kafka_source_topic() producer = get_producer(brokers, row_count, disable_progress_bar) # Loop optimization declarations produce = producer.produce flush = producer.flush ingestion_id = _generate_ingestion_id(feature_set) # Transform and push data to Kafka if feature_set.source.source_type == "Kafka": for chunk in get_feature_row_chunks( file=dest_path, row_groups=list(range(pq_file.num_row_groups)), fs=feature_set, ingestion_id=ingestion_id, max_workers=max_workers, ): # Push FeatureRow one chunk at a time to kafka for serialized_row in chunk: produce(topic=topic, value=serialized_row) # Force a flush after each chunk flush(timeout=timeout) # Remove chunk from memory del chunk else: raise Exception( f"Could not determine source type for feature set " f'"{feature_set.name}" with source type ' f'"{feature_set.source.source_type}"') # Print ingestion statistics producer.print_results() finally: # Remove parquet file(s) that were created earlier print("Removing temporary file(s)...") shutil.rmtree(dir_path) return ingestion_id def get_statistics( self, feature_set_id: str, store: str, features: List[str] = [], ingestion_ids: Optional[List[str]] = None, start_date: Optional[datetime.datetime] = None, end_date: Optional[datetime.datetime] = None, force_refresh: bool = False, project: Optional[str] = None, ) -> statistics_pb2.DatasetFeatureStatisticsList: """ Retrieves the feature featureStatistics computed over the data in the batch stores. Args: feature_set_id: Feature set id to retrieve batch featureStatistics for. If project is not provided, the default ("default") will be used. store: Name of the store to retrieve feature featureStatistics over. This store must be a historical store. features: Optional list of feature names to filter from the results. ingestion_ids: Optional list of dataset Ids by which to filter data before retrieving featureStatistics. Cannot be used with start_date and end_date. If multiple dataset ids are provided, unaggregatable featureStatistics will be dropped. start_date: Optional start date over which to filter statistical data. Data from this date will be included. Cannot be used with dataset_ids. If the provided period spans multiple days, unaggregatable featureStatistics will be dropped. end_date: Optional end date over which to filter statistical data. Data from this data will not be included. Cannot be used with dataset_ids. If the provided period spans multiple days, unaggregatable featureStatistics will be dropped. force_refresh: Setting this flag to true will force a recalculation of featureStatistics and overwrite results currently in the cache, if any. project: Manual override for default project. Returns: Returns a tensorflow DatasetFeatureStatisticsList containing TFDV featureStatistics. """ if ingestion_ids is not None and (start_date is not None or end_date is not None): raise ValueError( "Only one of dataset_id or [start_date, end_date] can be provided." ) if project != "" and "/" not in feature_set_id: feature_set_id = f"{project}/{feature_set_id}" request = GetFeatureStatisticsRequest( feature_set_id=feature_set_id, features=features, store=store, force_refresh=force_refresh, ) if ingestion_ids is not None: request.ingestion_ids.extend(ingestion_ids) else: if start_date is not None: request.start_date.CopyFrom( Timestamp(seconds=int(start_date.timestamp()))) if end_date is not None: request.end_date.CopyFrom( Timestamp(seconds=int(end_date.timestamp()))) return self._core_service.GetFeatureStatistics( request).dataset_feature_statistics_list def _get_grpc_metadata(self): """ Returns a metadata tuple to attach to gRPC requests. This is primarily used when authentication is enabled but SSL/TLS is disabled. Returns: Tuple of metadata to attach to each gRPC call """ if self._config.getboolean( CONFIG_ENABLE_AUTH_KEY) and self._auth_metadata: return self._auth_metadata.get_signed_meta() return ()
class Client: """ JobController Client: used internally to manage Ingestion Jobs """ def __init__(self, options=None, **kwargs): """ JobControllerClient should be initialized with jobcontroller_url: Feast JobController address :param options: Configuration options to initialize client with :param kwargs: options in kwargs style """ if options is None: options = dict() self._config = Config(options={**options, **kwargs}) self._jobcontroller_service_stub: Optional[ JobControllerServiceStub] = None self._auth_metadata: Optional[grpc.AuthMetadataPlugin] = None # Configure Auth Metadata Plugin if auth is enabled if self._config.getboolean(CONFIG_ENABLE_AUTH_KEY): self._auth_metadata = feast_auth.get_auth_metadata_plugin( self._config) @property def _jobcontroller_service(self): if not self._jobcontroller_service_stub: channel = create_grpc_channel( url=self._config.get(CONFIG_JOB_CONTROLLER_SERVER_KEY), enable_ssl=self._config.getboolean(CONFIG_CORE_ENABLE_SSL_KEY), enable_auth=self._config.getboolean(CONFIG_ENABLE_AUTH_KEY), ssl_server_cert_path=self._config.get( CONFIG_CORE_SERVER_SSL_CERT_KEY), auth_metadata_plugin=self._auth_metadata, timeout=self._config.getint( CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), ) self._jobcontroller_service_stub = JobControllerServiceStub( channel) return self._jobcontroller_service_stub def list_ingest_jobs( self, job_id: str = None, feature_set_ref: FeatureSetRef = None, store_name: str = None, ): """ List the ingestion jobs currently registered in Feast, with optional filters. Provides detailed metadata about each ingestion job. Args: job_id: Select specific ingestion job with the given job_id feature_set_ref: Filter ingestion jobs by target feature set (via reference) store_name: Filter ingestion jobs by target feast store's name Returns: List of IngestJobs matching the given filters """ # construct list request feature_set_ref_proto = None if feature_set_ref: feature_set_ref_proto = feature_set_ref.to_proto() list_filter = ListIngestionJobsRequest.Filter( id=job_id, feature_set_reference=feature_set_ref_proto, store_name=store_name, ) request = ListIngestionJobsRequest(filter=list_filter) # make list request & unpack response response = self._jobcontroller_service.ListIngestionJobs( request, metadata=self._get_grpc_metadata(), ) # type: ignore ingest_jobs = [ IngestJob(proto, self._jobcontroller_service, auth_metadata_plugin=self._auth_metadata) for proto in response.jobs # type: ignore ] return ingest_jobs def restart_ingest_job(self, job: IngestJob): """ Restart ingestion job currently registered in Feast. NOTE: Data might be lost during the restart for some job runners. Does not support stopping a job in a transitional (ie pending, suspending, aborting), terminal state (ie suspended or aborted) or unknown status Args: job: IngestJob to restart """ request = RestartIngestionJobRequest(id=job.id) try: self._jobcontroller_service.RestartIngestionJob( request, metadata=self._get_grpc_metadata(), ) # type: ignore except grpc.RpcError as e: raise grpc.RpcError(e.details()) def stop_ingest_job(self, job: IngestJob): """ Stop ingestion job currently resgistered in Feast Does nothing if the target job if already in a terminal state (ie suspended or aborted). Does not support stopping a job in a transitional (ie pending, suspending, aborting) or in a unknown status Args: job: IngestJob to restart """ request = StopIngestionJobRequest(id=job.id) try: self._jobcontroller_service.StopIngestionJob( request, metadata=self._get_grpc_metadata(), ) # type: ignore except grpc.RpcError as e: raise grpc.RpcError(e.details()) def _get_grpc_metadata(self): """ Returns a metadata tuple to attach to gRPC requests. This is primarily used when authentication is enabled but SSL/TLS is disabled. Returns: Tuple of metadata to attach to each gRPC call """ if self._config.getboolean( CONFIG_ENABLE_AUTH_KEY) and self._auth_metadata: return self._auth_metadata.get_signed_meta() return ()