def version(self): """ Returns version information from Feast Core and Feast Serving """ import pkg_resources result = { "sdk": {"version": pkg_resources.get_distribution("feast").version}, "serving": "not configured", "core": "not configured", } if self.serving_url: serving_version = self._serving_service.GetFeastServingInfo( GetFeastServingInfoRequest(), timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), metadata=self._get_grpc_metadata(), ).version result["serving"] = {"url": self.serving_url, "version": serving_version} if self.core_url: core_version = self._core_service.GetFeastCoreVersion( GetFeastCoreVersionRequest(), timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), metadata=self._get_grpc_metadata(), ).version result["core"] = {"url": self.core_url, "version": core_version} return result
def version(self): """ Returns version information from Feast Core and Feast Serving """ result = {} if self.serving_url: self._connect_serving() serving_version = self._serving_service_stub.GetFeastServingInfo( GetFeastServingInfoRequest(), timeout=self._config.getint( CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), ).version result["serving"] = { "url": self.serving_url, "version": serving_version } if self.core_url: self._connect_core() core_version = self._core_service_stub.GetFeastCoreVersion( GetFeastCoreVersionRequest(), timeout=self._config.getint( CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), ).version result["core"] = {"url": self.core_url, "version": core_version} return result
def version(self): """ Returns version information from Feast Core and Feast Serving :return: Dictionary containing Core and Serving versions and status """ self._connect_core() self._connect_serving() core_version = "" serving_version = "" core_status = "not connected" serving_status = "not connected" try: core_version = self._core_service_stub.GetFeastCoreVersion( GetFeastCoreVersionRequest(), timeout=GRPC_CONNECTION_TIMEOUT_DEFAULT).version core_status = "connected" except grpc.RpcError as e: print( format_grpc_exception("GetFeastCoreVersion", e.code(), e.details())) try: serving_version = self._serving_service_stub.GetFeastServingInfo( GetFeastServingInfoRequest(), timeout=GRPC_CONNECTION_TIMEOUT_DEFAULT).version serving_status = "connected" except grpc.RpcError as e: print( format_grpc_exception("GetFeastServingInfo", e.code(), e.details())) return { "core": { "url": self.core_url, "version": core_version, "status": core_status, }, "serving": { "url": self.serving_url, "version": serving_version, "status": serving_status, }, }
def version(self, sdk_only=False): """ Returns version information from Feast Core and Feast Serving """ import pkg_resources try: sdk_version = pkg_resources.get_distribution("feast").version except pkg_resources.DistributionNotFound: sdk_version = "local build" if sdk_only: return sdk_version result = { "sdk": { "version": sdk_version }, "serving": "not configured", "core": "not configured", } if self.serving_url: serving_version = self._serving_service.GetFeastServingInfo( GetFeastServingInfoRequest(), timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ).version result["serving"] = { "url": self.serving_url, "version": serving_version } if self.core_url: core_version = self._core_service.GetFeastCoreVersion( GetFeastCoreVersionRequest(), timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT), metadata=self._get_grpc_metadata(), ).version result["core"] = {"url": self.core_url, "version": core_version} return result
def get_batch_features( self, feature_refs: List[str], entity_rows: Union[pd.DataFrame, str], default_project: str = None, ) -> RetrievalJob: """ Retrieves historical features from a Feast Serving deployment. Args: feature_refs (List[str]): List of feature references that will be returned for each entity. Each feature reference should have the following format "project/feature:version". entity_rows (Union[pd.DataFrame, str]): Pandas dataframe containing entities and a 'datetime' column. Each entity in a feature set must be present as a column in this dataframe. The datetime column must contain timestamps in datetime64 format. default_project: Default project where feature values will be found. Returns: feast.job.RetrievalJob: Returns a retrival job object that can be used to monitor retrieval progress asynchronously, and can be used to materialize the results. Examples: >>> from feast import Client >>> from datetime import datetime >>> >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566") >>> feature_refs = ["my_project/bookings_7d:1", "booking_14d"] >>> entity_rows = pd.DataFrame( >>> { >>> "datetime": [pd.datetime.now() for _ in range(3)], >>> "customer": [1001, 1002, 1003], >>> } >>> ) >>> feature_retrieval_job = feast_client.get_batch_features( >>> feature_refs, entity_rows, default_project="my_project") >>> df = feature_retrieval_job.to_dataframe() >>> print(df) """ self._connect_serving() feature_references = _build_feature_references( feature_refs=feature_refs, default_project=default_project) # Retrieve serving information to determine store type and # staging location serving_info = self._serving_service_stub.GetFeastServingInfo( GetFeastServingInfoRequest(), timeout=self._config.getint( CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY), ) # type: GetFeastServingInfoResponse if serving_info.type != FeastServingType.FEAST_SERVING_TYPE_BATCH: raise Exception( f'You are connected to a store "{self._serving_url}" which ' f"does not support batch retrieval ") if isinstance(entity_rows, pd.DataFrame): # Pandas DataFrame detected # Remove timezone from datetime column if isinstance(entity_rows["datetime"].dtype, pd.core.dtypes.dtypes.DatetimeTZDtype): entity_rows["datetime"] = pd.DatetimeIndex( entity_rows["datetime"]).tz_localize(None) elif isinstance(entity_rows, str): # String based source if not entity_rows.endswith((".avro", "*")): raise Exception( f"Only .avro and wildcard paths are accepted as entity_rows" ) else: raise Exception(f"Only pandas.DataFrame and str types are allowed" f" as entity_rows, but got {type(entity_rows)}.") # Export and upload entity row DataFrame to staging location # provided by Feast staged_files = export_source_to_staging_location( entity_rows, serving_info.job_staging_location) # type: List[str] request = GetBatchFeaturesRequest( features=feature_references, dataset_source=DatasetSource(file_source=DatasetSource.FileSource( file_uris=staged_files, data_format=DataFormat.DATA_FORMAT_AVRO)), ) # Retrieve Feast Job object to manage life cycle of retrieval response = self._serving_service_stub.GetBatchFeatures(request) return RetrievalJob(response.job, self._serving_service_stub)
def get_batch_features(self, feature_ids: List[str], entity_rows: pd.DataFrame) -> Job: """ Retrieves historical features from a Feast Serving deployment. Args: feature_ids: List of feature ids that will be returned for each entity. Each feature id should have the following format "feature_set_name:version:feature_name". entity_rows: Pandas dataframe containing entities and a 'datetime' column. Each entity in a feature set must be present as a column in this dataframe. The datetime column must contain timestamps in datetime64 format Returns: Feast batch retrieval job: feast.job.Job Example usage: ============================================================ >>> from feast import Client >>> from datetime import datetime >>> >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566") >>> feature_ids = ["customer:1:bookings_7d"] >>> entity_rows = pd.DataFrame( >>> { >>> "datetime": [pd.datetime.now() for _ in range(3)], >>> "customer": [1001, 1002, 1003], >>> } >>> ) >>> feature_retrieval_job = feast_client.get_batch_features(feature_ids, entity_rows) >>> df = feature_retrieval_job.to_dataframe() >>> print(df) """ self._connect_serving() try: fs_request = _build_feature_set_request(feature_ids) # Validate entity rows based on entities in Feast Core self._validate_entity_rows_for_batch_retrieval( entity_rows, fs_request) # We want the timestamp column naming to be consistent with the # rest of Feast entity_rows.columns = [ "event_timestamp" if col == "datetime" else col for col in entity_rows.columns ] # Remove timezone from datetime column if isinstance( entity_rows["event_timestamp"].dtype, pd.core.dtypes.dtypes.DatetimeTZDtype, ): entity_rows["event_timestamp"] = pd.DatetimeIndex( entity_rows["event_timestamp"]).tz_localize(None) # Retrieve serving information to determine store type and staging location serving_info = self._serving_service_stub.GetFeastServingInfo( GetFeastServingInfoRequest(), timeout=GRPC_CONNECTION_TIMEOUT_DEFAULT ) # type: GetFeastServingInfoResponse if serving_info.type != FeastServingType.FEAST_SERVING_TYPE_BATCH: raise Exception( f'You are connected to a store "{self._serving_url}" which does not support batch retrieval' ) # Export and upload entity row dataframe to staging location provided by Feast staged_file = export_dataframe_to_staging_location( entity_rows, serving_info.job_staging_location) # type: str request = GetBatchFeaturesRequest( feature_sets=fs_request, dataset_source=DatasetSource( file_source=DatasetSource.FileSource( file_uris=[staged_file], data_format=DataFormat.DATA_FORMAT_AVRO)), ) # Retrieve Feast Job object to manage life cycle of retrieval response = self._serving_service_stub.GetBatchFeatures(request) return Job(response.job, self._serving_service_stub) except grpc.RpcError as e: print( format_grpc_exception("GetBatchFeatures", e.code(), e.details()))