Beispiel #1
0
    def _connect_core(self, skip_if_connected: bool = True):
        """
        Connect to Core API

        Args:
            skip_if_connected: Do not attempt to connect if already connected
        """
        if skip_if_connected and self._core_service_stub:
            return

        if not self.core_url:
            raise ValueError("Please set Feast Core URL.")

        if self.__core_channel is None:
            if self.core_secure or self.core_url.endswith(":443"):
                self.__core_channel = grpc.secure_channel(
                    self.core_url, grpc.ssl_channel_credentials())
            else:
                self.__core_channel = grpc.insecure_channel(self.core_url)

        try:
            grpc.channel_ready_future(
                self.__core_channel).result(timeout=self._config.getint(
                    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY))
        except grpc.FutureTimeoutError:
            raise ConnectionError(
                f"Connection timed out while attempting to connect to Feast "
                f"Core gRPC server {self.core_url} ")
        else:
            self._core_service_stub = CoreServiceStub(self.__core_channel)
Beispiel #2
0
    def _connect_core(self, skip_if_connected: bool = True):
        """
        Connect to Core API

        Args:
            skip_if_connected: Do not attempt to connect if already connected
        """
        if skip_if_connected and self._core_service_stub:
            return

        if not self.core_url:
            raise ValueError("Please set Feast Core URL.")

        if self.__core_channel is None:
            self.__core_channel = grpc.insecure_channel(self.core_url)

        try:
            grpc.channel_ready_future(self.__core_channel).result(
                timeout=GRPC_CONNECTION_TIMEOUT_DEFAULT)
        except grpc.FutureTimeoutError:
            print(
                f"Connection timed out while attempting to connect to Feast Core gRPC server {self.core_url}"
            )
            sys.exit(1)
        else:
            self._core_service_stub = CoreServiceStub(self.__core_channel)
Beispiel #3
0
 def _connect_core(self):
     """Connect to core api"""
     if self.__core_channel is None:
         self.__core_channel = grpc.insecure_channel(self.core_url)
         self._core_service_stub = CoreServiceStub(self.__core_channel)
         self._job_service_stub = JobServiceStub(self.__core_channel)
         self._dataset_service_stub = DatasetServiceStub(
             self.__core_channel)
Beispiel #4
0
    def _core_service(self):
        """
        Creates or returns the gRPC Feast Core Service Stub

        Returns: CoreServiceStub
        """
        if not self._core_service_stub:
            channel = create_grpc_channel(
                url=self._config.get(CONFIG_CORE_URL_KEY),
                enable_ssl=self._config.getboolean(CONFIG_CORE_ENABLE_SSL_KEY),
                enable_auth=self._config.getboolean(CONFIG_ENABLE_AUTH_KEY),
                ssl_server_cert_path=self._config.get(CONFIG_CORE_SERVER_SSL_CERT_KEY),
                auth_metadata_plugin=self._auth_metadata,
                timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
            )
            self._core_service_stub = CoreServiceStub(channel)
        return self._core_service_stub
    def __connect_core(self, core_url: str):
        if not core_url:
            raise ValueError("Please set Feast Core URL.")

        if self.__core_channel is None:
            self.__core_channel = grpc.insecure_channel(core_url)

        try:
            grpc.channel_ready_future(self.__core_channel).result(timeout=5)
        except grpc.FutureTimeoutError:
            raise ConnectionError(
                "connection timed out while attempting to connect to Feast Core gRPC server "
                + core_url)
        else:
            self._core_service_stub = CoreServiceStub(self.__core_channel)
Beispiel #6
0
    def core_service_stub(self, core_url):
        if core_url.endswith(":443"):
            core_channel = grpc.secure_channel(core_url,
                                               grpc.ssl_channel_credentials())
        else:
            core_channel = grpc.insecure_channel(core_url)

        try:
            grpc.channel_ready_future(core_channel).result(
                timeout=self.GRPC_CONNECTION_TIMEOUT)
        except grpc.FutureTimeoutError:
            raise ConnectionError(
                f"Connection timed out while attempting to connect to Feast "
                f"Core gRPC server {core_url} ")
        core_service_stub = CoreServiceStub(core_channel)
        return core_service_stub
Beispiel #7
0
class Client:
    """
    Feast Client: Used for creating, managing, and retrieving features.
    """

    def __init__(self, options: Optional[Dict[str, str]] = None, **kwargs):
        """
        The Feast Client should be initialized with at least one service url
        Please see constants.py for configuration options. Commonly used options
        or arguments include:
            core_url: Feast Core URL. Used to manage features
            serving_url: Feast Serving URL. Used to retrieve features
            project: Sets the active project. This field is optional.
            core_secure: Use client-side SSL/TLS for Core gRPC API
            serving_secure: Use client-side SSL/TLS for Serving gRPC API
            enable_auth: Enable authentication and authorization
            auth_provider: Authentication provider – "google" or "oauth"
            if auth_provider is "oauth", the following fields are mandatory –
            oauth_grant_type, oauth_client_id, oauth_client_secret, oauth_audience, oauth_token_request_url

        Args:
            options: Configuration options to initialize client with
            **kwargs: Additional keyword arguments that will be used as
                configuration options along with "options"
        """

        if options is None:
            options = dict()
        self._config = Config(options={**options, **kwargs})

        self._core_service_stub: Optional[CoreServiceStub] = None
        self._serving_service_stub: Optional[ServingServiceStub] = None
        self._auth_metadata: Optional[grpc.AuthMetadataPlugin] = None

        # Configure Auth Metadata Plugin if auth is enabled
        if self._config.getboolean(CONFIG_ENABLE_AUTH_KEY):
            self._auth_metadata = feast_auth.get_auth_metadata_plugin(self._config)

    @property
    def _core_service(self):
        """
        Creates or returns the gRPC Feast Core Service Stub

        Returns: CoreServiceStub
        """
        if not self._core_service_stub:
            channel = create_grpc_channel(
                url=self._config.get(CONFIG_CORE_URL_KEY),
                enable_ssl=self._config.getboolean(CONFIG_CORE_ENABLE_SSL_KEY),
                enable_auth=self._config.getboolean(CONFIG_ENABLE_AUTH_KEY),
                ssl_server_cert_path=self._config.get(CONFIG_CORE_SERVER_SSL_CERT_KEY),
                auth_metadata_plugin=self._auth_metadata,
                timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
            )
            self._core_service_stub = CoreServiceStub(channel)
        return self._core_service_stub

    @property
    def _serving_service(self):
        """
        Creates or returns the gRPC Feast Serving Service Stub

        Returns: ServingServiceStub
        """
        if not self._serving_service_stub:
            channel = create_grpc_channel(
                url=self._config.get(CONFIG_SERVING_URL_KEY),
                enable_ssl=self._config.getboolean(CONFIG_SERVING_ENABLE_SSL_KEY),
                enable_auth=self._config.getboolean(CONFIG_ENABLE_AUTH_KEY),
                ssl_server_cert_path=self._config.get(
                    CONFIG_SERVING_SERVER_SSL_CERT_KEY
                ),
                auth_metadata_plugin=self._auth_metadata,
                timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
            )
            self._serving_service_stub = ServingServiceStub(channel)
        return self._serving_service_stub

    @property
    def core_url(self) -> str:
        """
        Retrieve Feast Core URL

        Returns:
            Feast Core URL string
        """
        return self._config.get(CONFIG_CORE_URL_KEY)

    @core_url.setter
    def core_url(self, value: str):
        """
        Set the Feast Core URL

        Args:
            value: Feast Core URL
        """
        self._config.set(CONFIG_CORE_URL_KEY, value)

    @property
    def serving_url(self) -> str:
        """
        Retrieve Serving Core URL

        Returns:
            Feast Serving URL string
        """
        return self._config.get(CONFIG_SERVING_URL_KEY)

    @serving_url.setter
    def serving_url(self, value: str):
        """
        Set the Feast Serving URL

        Args:
            value: Feast Serving URL
        """
        self._config.set(CONFIG_SERVING_URL_KEY, value)

    @property
    def core_secure(self) -> bool:
        """
        Retrieve Feast Core client-side SSL/TLS setting

        Returns:
            Whether client-side SSL/TLS is enabled
        """
        return self._config.getboolean(CONFIG_CORE_ENABLE_SSL_KEY)

    @core_secure.setter
    def core_secure(self, value: bool):
        """
        Set the Feast Core client-side SSL/TLS setting

        Args:
            value: True to enable client-side SSL/TLS
        """
        self._config.set(CONFIG_CORE_ENABLE_SSL_KEY, value)

    @property
    def serving_secure(self) -> bool:
        """
        Retrieve Feast Serving client-side SSL/TLS setting

        Returns:
            Whether client-side SSL/TLS is enabled
        """
        return self._config.getboolean(CONFIG_SERVING_ENABLE_SSL_KEY)

    @serving_secure.setter
    def serving_secure(self, value: bool):
        """
        Set the Feast Serving client-side SSL/TLS setting

        Args:
            value: True to enable client-side SSL/TLS
        """
        self._config.set(CONFIG_SERVING_ENABLE_SSL_KEY, value)

    def version(self):
        """
        Returns version information from Feast Core and Feast Serving
        """
        import pkg_resources

        result = {
            "sdk": {"version": pkg_resources.get_distribution("feast").version},
            "serving": "not configured",
            "core": "not configured",
        }

        if self.serving_url:
            serving_version = self._serving_service.GetFeastServingInfo(
                GetFeastServingInfoRequest(),
                timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
                metadata=self._get_grpc_metadata(),
            ).version
            result["serving"] = {"url": self.serving_url, "version": serving_version}

        if self.core_url:
            core_version = self._core_service.GetFeastCoreVersion(
                GetFeastCoreVersionRequest(),
                timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
                metadata=self._get_grpc_metadata(),
            ).version
            result["core"] = {"url": self.core_url, "version": core_version}

        return result

    @property
    def project(self) -> Union[str, None]:
        """
        Retrieve currently active project

        Returns:
            Project name
        """
        if not self._config.get(CONFIG_PROJECT_KEY):
            raise ValueError("No project has been configured.")
        return self._config.get(CONFIG_PROJECT_KEY)

    def set_project(self, project: Optional[str] = None):
        """
        Set currently active Feast project

        Args:
            project: Project to set as active. If unset, will reset to the default project.
        """
        if project is None:
            project = FEAST_DEFAULT_OPTIONS[CONFIG_PROJECT_KEY]
        self._config.set(CONFIG_PROJECT_KEY, project)

    def list_projects(self) -> List[str]:
        """
        List all active Feast projects

        Returns:
            List of project names

        """

        response = self._core_service.ListProjects(
            ListProjectsRequest(),
            timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
            metadata=self._get_grpc_metadata(),
        )  # type: ListProjectsResponse
        return list(response.projects)

    def create_project(self, project: str):
        """
        Creates a Feast project

        Args:
            project: Name of project
        """

        self._core_service.CreateProject(
            CreateProjectRequest(name=project),
            timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
            metadata=self._get_grpc_metadata(),
        )  # type: CreateProjectResponse

    def archive_project(self, project):
        """
        Archives a project. Project will still continue to function for
        ingestion and retrieval, but will be in a read-only state. It will
        also not be visible from the Core API for management purposes.

        Args:
            project: Name of project to archive
        """

        try:
            self._core_service_stub.ArchiveProject(
                ArchiveProjectRequest(name=project),
                timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
                metadata=self._get_grpc_metadata(),
            )  # type: ArchiveProjectResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        # revert to the default project
        if self._project == project:
            self._project = FEAST_DEFAULT_OPTIONS[CONFIG_PROJECT_KEY]

    def apply_entity(self, entities: Union[List[Entity], Entity], project: str = None):
        """
        Idempotently registers entities with Feast Core. Either a single
        entity or a list can be provided.

        Args:
            entities: List of entities that will be registered

        Examples:
            >>> from feast import Client
            >>> from feast.entity import Entity
            >>> from feast.value_type import ValueType
            >>>
            >>> feast_client = Client(core_url="localhost:6565")
            >>> entity = Entity(
            >>>     name="driver_entity",
            >>>     description="Driver entity for car rides",
            >>>     value_type=ValueType.STRING,
            >>>     labels={
            >>>         "key": "val"
            >>>     }
            >>> )
            >>> feast_client.apply_entity(entity)
        """

        if project is None:
            project = self.project

        if not isinstance(entities, list):
            entities = [entities]
        for entity in entities:
            if isinstance(entity, Entity):
                self._apply_entity(project, entity)  # type: ignore
                continue
            raise ValueError(f"Could not determine entity type to apply {entity}")

    def _apply_entity(self, project: str, entity: Entity):
        """
        Registers a single entity with Feast

        Args:
            entity: Entity that will be registered
        """

        entity.is_valid()
        entity_proto = entity.to_spec_proto()

        # Convert the entity to a request and send to Feast Core
        try:
            apply_entity_response = self._core_service.ApplyEntity(
                ApplyEntityRequest(project=project, spec=entity_proto),  # type: ignore
                timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
                metadata=self._get_grpc_metadata(),
            )  # type: ApplyEntityResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        # Extract the returned entity
        applied_entity = Entity.from_proto(apply_entity_response.entity)

        # Deep copy from the returned entity to the local entity
        entity._update_from_entity(applied_entity)

    def list_entities(
        self, project: str = None, labels: Dict[str, str] = dict()
    ) -> List[Entity]:
        """
        Retrieve a list of entities from Feast Core

        Args:
            project: Filter entities based on project name
            labels: User-defined labels that these entities are associated with

        Returns:
            List of entities
        """

        if project is None:
            project = self.project

        filter = ListEntitiesRequest.Filter(project=project, labels=labels)

        # Get latest entities from Feast Core
        entity_protos = self._core_service.ListEntities(
            ListEntitiesRequest(filter=filter), metadata=self._get_grpc_metadata(),
        )  # type: ListEntitiesResponse

        # Extract entities and return
        entities = []
        for entity_proto in entity_protos.entities:
            entity = Entity.from_proto(entity_proto)
            entity._client = self
            entities.append(entity)
        return entities

    def get_entity(self, name: str, project: str = None) -> Entity:
        """
        Retrieves an entity.

        Args:
            project: Feast project that this entity belongs to
            name: Name of entity

        Returns:
            Returns either the specified entity, or raises an exception if
            none is found
        """

        if project is None:
            project = self.project

        try:
            get_entity_response = self._core_service.GetEntity(
                GetEntityRequest(project=project, name=name.strip()),
                metadata=self._get_grpc_metadata(),
            )  # type: GetEntityResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())
        entity = Entity.from_proto(get_entity_response.entity)

        return entity

    def apply_feature_table(
        self,
        feature_tables: Union[List[FeatureTable], FeatureTable],
        project: str = None,
    ):
        """
        Idempotently registers feature tables with Feast Core. Either a single
        feature table or a list can be provided.

        Args:
            feature_tables: List of feature tables that will be registered
        """

        if project is None:
            project = self.project

        if not isinstance(feature_tables, list):
            feature_tables = [feature_tables]
        for feature_table in feature_tables:
            if isinstance(feature_table, FeatureTable):
                self._apply_feature_table(project, feature_table)  # type: ignore
                continue
            raise ValueError(
                f"Could not determine feature table type to apply {feature_table}"
            )

    def _apply_feature_table(self, project: str, feature_table: FeatureTable):
        """
        Registers a single feature table with Feast

        Args:
            feature_table: Feature table that will be registered
        """

        feature_table.is_valid()
        feature_table_proto = feature_table.to_spec_proto()

        # Convert the feature table to a request and send to Feast Core
        try:
            apply_feature_table_response = self._core_service.ApplyFeatureTable(
                ApplyFeatureTableRequest(project=project, table_spec=feature_table_proto),  # type: ignore
                timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
                metadata=self._get_grpc_metadata(),
            )  # type: ApplyFeatureTableResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        # Extract the returned feature table
        applied_feature_table = FeatureTable.from_proto(
            apply_feature_table_response.table
        )

        # Deep copy from the returned feature table to the local entity
        feature_table._update_from_feature_table(applied_feature_table)

    def list_feature_tables(
        self, project: str = None, labels: Dict[str, str] = dict()
    ) -> List[FeatureTable]:
        """
        Retrieve a list of feature tables from Feast Core

        Args:
            project: Filter feature tables based on project name

        Returns:
            List of feature tables
        """

        if project is None:
            project = self.project

        filter = ListFeatureTablesRequest.Filter(project=project, labels=labels)

        # Get latest feature tables from Feast Core
        feature_table_protos = self._core_service.ListFeatureTables(
            ListFeatureTablesRequest(filter=filter), metadata=self._get_grpc_metadata(),
        )  # type: ListFeatureTablesResponse

        # Extract feature tables and return
        feature_tables = []
        for feature_table_proto in feature_table_protos.tables:
            feature_table = FeatureTable.from_proto(feature_table_proto)
            feature_table._client = self
            feature_tables.append(feature_table)
        return feature_tables

    def get_feature_table(self, name: str, project: str = None) -> FeatureTable:
        """
        Retrieves a feature table.

        Args:
            project: Feast project that this feature table belongs to
            name: Name of feature table

        Returns:
            Returns either the specified feature table, or raises an exception if
            none is found
        """

        if project is None:
            project = self.project

        try:
            get_feature_table_response = self._core_service.GetFeatureTable(
                GetFeatureTableRequest(project=project, name=name.strip()),
                metadata=self._get_grpc_metadata(),
            )  # type: GetFeatureTableResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())
        return FeatureTable.from_proto(get_feature_table_response.table)

    def ingest(
        self,
        feature_table: Union[str, FeatureTable],
        source: Union[pd.DataFrame, str],
        project: str = None,
        chunk_size: int = 10000,
        max_workers: int = max(CPU_COUNT - 1, 1),
        timeout: int = BATCH_INGESTION_PRODUCTION_TIMEOUT,
    ) -> None:
        """
        Batch load feature data into a FeatureTable.

        Args:
            feature_table (typing.Union[str, feast.feature_table.FeatureTable]):
                FeatureTable object or the string name of the feature table

            source (typing.Union[pd.DataFrame, str]):
                Either a file path or Pandas Dataframe to ingest into Feast
                Files that are currently supported:
                    * parquet
                    * csv
                    * json

            project: Feast project to locate FeatureTable

            chunk_size (int):
                Amount of rows to load and ingest at a time.

            max_workers (int):
                Number of worker processes to use to encode values.

            timeout (int):
                Timeout in seconds to wait for completion.

        Examples:
            >>> from feast import Client
            >>>
            >>> client = Client(core_url="localhost:6565")
            >>> ft_df = pd.DataFrame(
            >>>         {
            >>>            "datetime": [pd.datetime.now()],
            >>>            "driver": [1001],
            >>>            "rating": [4.3],
            >>>         }
            >>>     )
            >>> client.set_project("project1")
            >>>
            >>> driver_ft = client.get_feature_table("driver")
            >>> client.ingest(driver_ft, ft_df)
        """

        if project is None:
            project = self.project
        if isinstance(feature_table, FeatureTable):
            name = feature_table.name

        fetched_feature_table: Optional[FeatureTable] = self.get_feature_table(
            name, project
        )
        if fetched_feature_table is not None:
            feature_table = fetched_feature_table
        else:
            raise Exception(f"FeatureTable, {name} cannot be found.")

        # Check 1) Only parquet file format for FeatureTable batch source is supported
        if (
            feature_table.batch_source
            and issubclass(type(feature_table.batch_source), FileSource)
            and "".join(
                feature_table.batch_source.file_options.file_format.split()
            ).lower()
            != "parquet"
        ):
            raise Exception(
                f"No suitable batch source found for FeatureTable, {name}."
                f"Only BATCH_FILE source with parquet format is supported for batch ingestion."
            )

        pyarrow_table, column_names = _read_table_from_source(source)
        # Check 2) Check if FeatureTable batch source field mappings can be found in provided source table
        _check_field_mappings(
            column_names,
            name,
            feature_table.batch_source.timestamp_column,
            feature_table.batch_source.field_mapping,
        )

        dir_path = None
        with_partitions = False
        if (
            issubclass(type(feature_table.batch_source), FileSource)
            and feature_table.batch_source.date_partition_column
        ):
            with_partitions = True
            dest_path = _write_partitioned_table_from_source(
                column_names,
                pyarrow_table,
                feature_table.batch_source.date_partition_column,
                feature_table.batch_source.timestamp_column,
            )
        else:
            dir_path, dest_path = _write_non_partitioned_table_from_source(
                column_names, pyarrow_table, chunk_size, max_workers,
            )

        try:
            if issubclass(type(feature_table.batch_source), FileSource):
                file_url = feature_table.batch_source.file_options.file_url[:-1]
                _upload_to_file_source(file_url, with_partitions, dest_path)
            if issubclass(type(feature_table.batch_source), BigQuerySource):
                bq_table_ref = feature_table.batch_source.bigquery_options.table_ref
                feature_table_timestamp_column = (
                    feature_table.batch_source.timestamp_column
                )

                _upload_to_bq_source(
                    bq_table_ref, feature_table_timestamp_column, dest_path
                )
        finally:
            # Remove parquet file(s) that were created earlier
            print("Removing temporary file(s)...")
            if dir_path:
                shutil.rmtree(dir_path)

        print("Data has been successfully ingested into FeatureTable batch source.")

    def _get_grpc_metadata(self):
        """
        Returns a metadata tuple to attach to gRPC requests. This is primarily
        used when authentication is enabled but SSL/TLS is disabled.

        Returns: Tuple of metadata to attach to each gRPC call
        """
        if self._config.getboolean(CONFIG_ENABLE_AUTH_KEY) and self._auth_metadata:
            return self._auth_metadata.get_signed_meta()
        return ()
Beispiel #8
0
class Client:
    """
    Feast Client: Used for creating, managing, and retrieving features.
    """
    def __init__(self, options: Optional[Dict[str, str]] = None, **kwargs):
        """
        The Feast Client should be initialized with at least one service url

        Args:
            core_url: Feast Core URL. Used to manage features
            serving_url: Feast Serving URL. Used to retrieve features
            project: Sets the active project. This field is optional.
            core_secure: Use client-side SSL/TLS for Core gRPC API
            serving_secure: Use client-side SSL/TLS for Serving gRPC API
            options: Configuration options to initialize client with
            **kwargs: Additional keyword arguments that will be used as
                configuration options along with "options"
        """

        if options is None:
            options = dict()
        self._config = Config(options={**options, **kwargs})

        self.__core_channel: grpc.Channel = None
        self.__serving_channel: grpc.Channel = None
        self._core_service_stub: CoreServiceStub = None
        self._serving_service_stub: ServingServiceStub = None

    @property
    def core_url(self) -> str:
        """
        Retrieve Feast Core URL

        Returns:
            Feast Core URL string
        """
        return self._config.get(CONFIG_CORE_URL_KEY)

    @core_url.setter
    def core_url(self, value: str):
        """
        Set the Feast Core URL

        Args:
            value: Feast Core URL
        """
        self._config.set(CONFIG_CORE_URL_KEY, value)

    @property
    def serving_url(self) -> str:
        """
        Retrieve Serving Core URL

        Returns:
            Feast Serving URL string
        """
        return self._config.get(CONFIG_SERVING_URL_KEY)

    @serving_url.setter
    def serving_url(self, value: str):
        """
        Set the Feast Serving URL

        Args:
            value: Feast Serving URL
        """
        self._config.set(CONFIG_SERVING_URL_KEY, value)

    @property
    def core_secure(self) -> bool:
        """
        Retrieve Feast Core client-side SSL/TLS setting

        Returns:
            Whether client-side SSL/TLS is enabled
        """
        return self._config.getboolean(CONFIG_CORE_SECURE_KEY)

    @core_secure.setter
    def core_secure(self, value: bool):
        """
        Set the Feast Core client-side SSL/TLS setting

        Args:
            value: True to enable client-side SSL/TLS
        """
        self._config.set(CONFIG_CORE_SECURE_KEY, value)

    @property
    def serving_secure(self) -> bool:
        """
        Retrieve Feast Serving client-side SSL/TLS setting

        Returns:
            Whether client-side SSL/TLS is enabled
        """
        return self._config.getboolean(CONFIG_SERVING_SECURE_KEY)

    @serving_secure.setter
    def serving_secure(self, value: bool):
        """
        Set the Feast Serving client-side SSL/TLS setting

        Args:
            value: True to enable client-side SSL/TLS
        """
        self._config.set(CONFIG_SERVING_SECURE_KEY, value)

    def version(self):
        """
        Returns version information from Feast Core and Feast Serving
        """
        result = {}

        if self.serving_url:
            self._connect_serving()
            serving_version = self._serving_service_stub.GetFeastServingInfo(
                GetFeastServingInfoRequest(),
                timeout=self._config.getint(
                    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
            ).version
            result["serving"] = {
                "url": self.serving_url,
                "version": serving_version
            }

        if self.core_url:
            self._connect_core()
            core_version = self._core_service_stub.GetFeastCoreVersion(
                GetFeastCoreVersionRequest(),
                timeout=self._config.getint(
                    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
            ).version
            result["core"] = {"url": self.core_url, "version": core_version}

        return result

    def _connect_core(self, skip_if_connected: bool = True):
        """
        Connect to Core API

        Args:
            skip_if_connected: Do not attempt to connect if already connected
        """
        if skip_if_connected and self._core_service_stub:
            return

        if not self.core_url:
            raise ValueError("Please set Feast Core URL.")

        if self.__core_channel is None:
            if self.core_secure or self.core_url.endswith(":443"):
                self.__core_channel = grpc.secure_channel(
                    self.core_url, grpc.ssl_channel_credentials())
            else:
                self.__core_channel = grpc.insecure_channel(self.core_url)

        try:
            grpc.channel_ready_future(
                self.__core_channel).result(timeout=self._config.getint(
                    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY))
        except grpc.FutureTimeoutError:
            raise ConnectionError(
                f"Connection timed out while attempting to connect to Feast "
                f"Core gRPC server {self.core_url} ")
        else:
            self._core_service_stub = CoreServiceStub(self.__core_channel)

    def _connect_serving(self, skip_if_connected=True):
        """
        Connect to Serving API

        Args:
            skip_if_connected: Do not attempt to connect if already connected
        """

        if skip_if_connected and self._serving_service_stub:
            return

        if not self.serving_url:
            raise ValueError("Please set Feast Serving URL.")

        if self.__serving_channel is None:
            if self.serving_secure or self.serving_url.endswith(":443"):
                self.__serving_channel = grpc.secure_channel(
                    self.serving_url, grpc.ssl_channel_credentials())
            else:
                self.__serving_channel = grpc.insecure_channel(
                    self.serving_url)

        try:
            grpc.channel_ready_future(
                self.__serving_channel).result(timeout=self._config.getint(
                    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY))
        except grpc.FutureTimeoutError:
            raise ConnectionError(
                f"Connection timed out while attempting to connect to Feast "
                f"Serving gRPC server {self.serving_url} ")
        else:
            self._serving_service_stub = ServingServiceStub(
                self.__serving_channel)

    @property
    def project(self) -> Union[str, None]:
        """
        Retrieve currently active project

        Returns:
            Project name
        """
        return self._config.get(CONFIG_PROJECT_KEY)

    def set_project(self, project: str):
        """
        Set currently active Feast project

        Args:
            project: Project to set as active
        """
        self._config.set(CONFIG_PROJECT_KEY, project)

    def list_projects(self) -> List[str]:
        """
        List all active Feast projects

        Returns:
            List of project names

        """
        self._connect_core()
        response = self._core_service_stub.ListProjects(
            ListProjectsRequest(),
            timeout=self._config.getint(
                CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
        )  # type: ListProjectsResponse
        return list(response.projects)

    def create_project(self, project: str):
        """
        Creates a Feast project

        Args:
            project: Name of project
        """

        self._connect_core()
        self._core_service_stub.CreateProject(
            CreateProjectRequest(name=project),
            timeout=self._config.getint(
                CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
        )  # type: CreateProjectResponse

    def archive_project(self, project):
        """
        Archives a project. Project will still continue to function for
        ingestion and retrieval, but will be in a read-only state. It will
        also not be visible from the Core API for management purposes.

        Args:
            project: Name of project to archive
        """

        self._connect_core()
        self._core_service_stub.ArchiveProject(
            ArchiveProjectRequest(name=project),
            timeout=self._config.getint(
                CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
        )  # type: ArchiveProjectResponse

        if self._project == project:
            self._project = ""

    def apply(self, feature_sets: Union[List[FeatureSet], FeatureSet]):
        """
        Idempotently registers feature set(s) with Feast Core. Either a single
        feature set or a list can be provided.

        Args:
            feature_sets: List of feature sets that will be registered
        """
        if not isinstance(feature_sets, list):
            feature_sets = [feature_sets]
        for feature_set in feature_sets:
            if isinstance(feature_set, FeatureSet):
                self._apply_feature_set(feature_set)
                continue
            raise ValueError(
                f"Could not determine feature set type to apply {feature_set}")

    def _apply_feature_set(self, feature_set: FeatureSet):
        """
        Registers a single feature set with Feast

        Args:
            feature_set: Feature set that will be registered
        """
        self._connect_core()

        feature_set.is_valid()
        feature_set_proto = feature_set.to_proto()
        if len(feature_set_proto.spec.project) == 0:
            if self.project is None:
                raise ValueError(
                    f"No project found in feature set {feature_set.name}. "
                    f"Please set the project within the feature set or within "
                    f"your Feast Client.")
            else:
                feature_set_proto.spec.project = self.project

        # Convert the feature set to a request and send to Feast Core
        try:
            apply_fs_response = self._core_service_stub.ApplyFeatureSet(
                ApplyFeatureSetRequest(feature_set=feature_set_proto),
                timeout=self._config.getint(
                    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
            )  # type: ApplyFeatureSetResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        # Extract the returned feature set
        applied_fs = FeatureSet.from_proto(apply_fs_response.feature_set)

        # If the feature set has changed, update the local copy
        if apply_fs_response.status == ApplyFeatureSetResponse.Status.CREATED:
            print(
                f'Feature set updated/created: "{applied_fs.name}:{applied_fs.version}"'
            )

        # If no change has been applied, do nothing
        if apply_fs_response.status == ApplyFeatureSetResponse.Status.NO_CHANGE:
            print(f"No change detected or applied: {feature_set.name}")

        # Deep copy from the returned feature set to the local feature set
        feature_set._update_from_feature_set(applied_fs)

    def list_feature_sets(self,
                          project: str = None,
                          name: str = None,
                          version: str = None) -> List[FeatureSet]:
        """
        Retrieve a list of feature sets from Feast Core

        Args:
            project: Filter feature sets based on project name
            name: Filter feature sets based on feature set name
            version: Filter feature sets based on version numbf,

        Returns:
            List of feature sets
        """
        self._connect_core()

        if project is None:
            if self.project is not None:
                project = self.project
            else:
                project = "*"

        if name is None:
            name = "*"

        if version is None:
            version = "*"

        filter = ListFeatureSetsRequest.Filter(project=project,
                                               feature_set_name=name,
                                               feature_set_version=version)

        # Get latest feature sets from Feast Core
        feature_set_protos = self._core_service_stub.ListFeatureSets(
            ListFeatureSetsRequest(
                filter=filter))  # type: ListFeatureSetsResponse

        # Extract feature sets and return
        feature_sets = []
        for feature_set_proto in feature_set_protos.feature_sets:
            feature_set = FeatureSet.from_proto(feature_set_proto)
            feature_set._client = self
            feature_sets.append(feature_set)
        return feature_sets

    def get_feature_set(self,
                        name: str,
                        version: int = None,
                        project: str = None) -> Union[FeatureSet, None]:
        """
        Retrieves a feature set. If no version is specified then the latest
        version will be returned.

        Args:
            project: Feast project that this feature set belongs to
            name: Name of feature set
            version: Version of feature set

        Returns:
            Returns either the specified feature set, or raises an exception if
            none is found
        """
        self._connect_core()

        if project is None:
            if self.project is not None:
                project = self.project
            else:
                raise ValueError("No project has been configured.")

        if version is None:
            version = 0

        try:
            get_feature_set_response = self._core_service_stub.GetFeatureSet(
                GetFeatureSetRequest(
                    project=project, name=name.strip(),
                    version=int(version)))  # type: GetFeatureSetResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())
        return FeatureSet.from_proto(get_feature_set_response.feature_set)

    def list_entities(self) -> Dict[str, Entity]:
        """
        Returns a dictionary of entities across all feature sets

        Returns:
            Dictionary of entities, indexed by name
        """
        entities_dict = OrderedDict()
        for fs in self.list_feature_sets():
            for entity in fs.entities:
                entities_dict[entity.name] = entity
        return entities_dict

    def get_batch_features(
        self,
        feature_refs: List[str],
        entity_rows: Union[pd.DataFrame, str],
        default_project: str = None,
    ) -> RetrievalJob:
        """
        Retrieves historical features from a Feast Serving deployment.

        Args:
            feature_refs (List[str]):
                List of feature references that will be returned for each entity.
                Each feature reference should have the following format
                "project/feature:version".

            entity_rows (Union[pd.DataFrame, str]):
                Pandas dataframe containing entities and a 'datetime' column.
                Each entity in a feature set must be present as a column in this
                dataframe. The datetime column must contain timestamps in
                datetime64 format.
            default_project: Default project where feature values will be found.

        Returns:
            feast.job.RetrievalJob:
                Returns a retrival job object that can be used to monitor retrieval
                progress asynchronously, and can be used to materialize the
                results.

        Examples:
            >>> from feast import Client
            >>> from datetime import datetime
            >>>
            >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566")
            >>> feature_refs = ["my_project/bookings_7d:1", "booking_14d"]
            >>> entity_rows = pd.DataFrame(
            >>>         {
            >>>            "datetime": [pd.datetime.now() for _ in range(3)],
            >>>            "customer": [1001, 1002, 1003],
            >>>         }
            >>>     )
            >>> feature_retrieval_job = feast_client.get_batch_features(
            >>>     feature_refs, entity_rows, default_project="my_project")
            >>> df = feature_retrieval_job.to_dataframe()
            >>> print(df)
        """

        self._connect_serving()

        feature_references = _build_feature_references(
            feature_refs=feature_refs, default_project=default_project)

        # Retrieve serving information to determine store type and
        # staging location
        serving_info = self._serving_service_stub.GetFeastServingInfo(
            GetFeastServingInfoRequest(),
            timeout=self._config.getint(
                CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
        )  # type: GetFeastServingInfoResponse

        if serving_info.type != FeastServingType.FEAST_SERVING_TYPE_BATCH:
            raise Exception(
                f'You are connected to a store "{self._serving_url}" which '
                f"does not support batch retrieval ")

        if isinstance(entity_rows, pd.DataFrame):
            # Pandas DataFrame detected

            # Remove timezone from datetime column
            if isinstance(entity_rows["datetime"].dtype,
                          pd.core.dtypes.dtypes.DatetimeTZDtype):
                entity_rows["datetime"] = pd.DatetimeIndex(
                    entity_rows["datetime"]).tz_localize(None)
        elif isinstance(entity_rows, str):
            # String based source
            if not entity_rows.endswith((".avro", "*")):
                raise Exception(
                    f"Only .avro and wildcard paths are accepted as entity_rows"
                )
        else:
            raise Exception(f"Only pandas.DataFrame and str types are allowed"
                            f" as entity_rows, but got {type(entity_rows)}.")

        # Export and upload entity row DataFrame to staging location
        # provided by Feast
        staged_files = export_source_to_staging_location(
            entity_rows, serving_info.job_staging_location)  # type: List[str]

        request = GetBatchFeaturesRequest(
            features=feature_references,
            dataset_source=DatasetSource(file_source=DatasetSource.FileSource(
                file_uris=staged_files,
                data_format=DataFormat.DATA_FORMAT_AVRO)),
        )

        # Retrieve Feast Job object to manage life cycle of retrieval
        response = self._serving_service_stub.GetBatchFeatures(request)
        return RetrievalJob(response.job, self._serving_service_stub)

    def get_online_features(
        self,
        feature_refs: List[str],
        entity_rows: List[GetOnlineFeaturesRequest.EntityRow],
        default_project: Optional[str] = None,
    ) -> GetOnlineFeaturesResponse:
        """
        Retrieves the latest online feature data from Feast Serving

        Args:
            feature_refs: List of feature references in the following format
                [project]/[feature_name]:[version]. Only the feature name
                is a required component in the reference.
                example:
                    ["my_project/my_feature_1:3",
                    "my_project3/my_feature_4:1",]
            entity_rows: List of GetFeaturesRequest.EntityRow where each row
                contains entities. Timestamp should not be set for online
                retrieval. All entity types within a feature
            default_project: This project will be used if the project name is
                not provided in the feature reference

        Returns:
            Returns a list of maps where each item in the list contains the
            latest feature values for the provided entities
        """
        self._connect_serving()

        return self._serving_service_stub.GetOnlineFeatures(
            GetOnlineFeaturesRequest(
                features=_build_feature_references(
                    feature_refs=feature_refs,
                    default_project=(default_project
                                     if not self.project else self.project),
                ),
                entity_rows=entity_rows,
            ))

    def list_ingest_jobs(
        self,
        job_id: str = None,
        feature_set_ref: FeatureSetRef = None,
        store_name: str = None,
    ):
        """
        List the ingestion jobs currently registered in Feast, with optional filters.
        Provides detailed metadata about each ingestion job.

        Args:
            job_id: Select specific ingestion job with the given job_id
            feature_set_ref: Filter ingestion jobs by target feature set (via reference)
            store_name: Filter ingestion jobs by target feast store's name

        Returns:
            List of IngestJobs matching the given filters
        """
        self._connect_core()
        # construct list request
        feature_set_ref = None
        list_filter = ListIngestionJobsRequest.Filter(
            id=job_id,
            feature_set_reference=feature_set_ref,
            store_name=store_name,
        )
        request = ListIngestionJobsRequest(filter=list_filter)
        # make list request & unpack response
        response = self._core_service_stub.ListIngestionJobs(request)
        ingest_jobs = [
            IngestJob(proto, self._core_service_stub)
            for proto in response.jobs
        ]
        return ingest_jobs

    def restart_ingest_job(self, job: IngestJob):
        """
        Restart ingestion job currently registered in Feast.
        NOTE: Data might be lost during the restart for some job runners.
        Does not support stopping a job in a transitional (ie pending, suspending, aborting),
        terminal state (ie suspended or aborted) or unknown status

        Args:
            job: IngestJob to restart
        """
        self._connect_core()
        request = RestartIngestionJobRequest(id=job.id)
        try:
            self._core_service_stub.RestartIngestionJob(request)
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

    def stop_ingest_job(self, job: IngestJob):
        """
        Stop ingestion job currently resgistered in Feast
        Does nothing if the target job if already in a terminal state (ie suspended or aborted).
        Does not support stopping a job in a transitional (ie pending, suspending, aborting)
        or in a unknown status

        Args:
            job: IngestJob to restart
        """
        self._connect_core()
        request = StopIngestionJobRequest(id=job.id)
        try:
            self._core_service_stub.StopIngestionJob(request)
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

    def ingest(
        self,
        feature_set: Union[str, FeatureSet],
        source: Union[pd.DataFrame, str],
        chunk_size: int = 10000,
        version: int = None,
        force_update: bool = False,
        max_workers: int = max(CPU_COUNT - 1, 1),
        disable_progress_bar: bool = False,
        timeout: int = KAFKA_CHUNK_PRODUCTION_TIMEOUT,
    ) -> None:
        """
        Loads feature data into Feast for a specific feature set.

        Args:
            feature_set (typing.Union[str, feast.feature_set.FeatureSet]):
                Feature set object or the string name of the feature set
                (without a version).

            source (typing.Union[pd.DataFrame, str]):
                Either a file path or Pandas Dataframe to ingest into Feast
                Files that are currently supported:
                    * parquet
                    * csv
                    * json

            chunk_size (int):
                Amount of rows to load and ingest at a time.

            version (int):
                Feature set version.

            force_update (bool):
                Automatically update feature set based on source data prior to
                ingesting. This will also register changes to Feast.

            max_workers (int):
                Number of worker processes to use to encode values.

            disable_progress_bar (bool):
                Disable printing of progress statistics.

            timeout (int):
                Timeout in seconds to wait for completion.

        Returns:
            None:
                None
        """

        if isinstance(feature_set, FeatureSet):
            name = feature_set.name
            if version is None:
                version = feature_set.version
        elif isinstance(feature_set, str):
            name = feature_set
        else:
            raise Exception(f"Feature set name must be provided")

        # Read table and get row count
        dir_path, dest_path = _read_table_from_source(source, chunk_size,
                                                      max_workers)

        pq_file = pq.ParquetFile(dest_path)

        row_count = pq_file.metadata.num_rows

        # Update the feature set based on PyArrow table of first row group
        if force_update:
            feature_set.infer_fields_from_pa(
                table=pq_file.read_row_group(0),
                discard_unused_fields=True,
                replace_existing_features=True,
            )
            self.apply(feature_set)
        current_time = time.time()

        print("Waiting for feature set to be ready for ingestion...")
        while True:
            if timeout is not None and time.time() - current_time >= timeout:
                raise TimeoutError(
                    "Timed out waiting for feature set to be ready")
            feature_set = self.get_feature_set(name, version)
            if (feature_set is not None
                    and feature_set.status == FeatureSetStatus.STATUS_READY):
                break
            time.sleep(3)

        if timeout is not None:
            timeout = timeout - int(time.time() - current_time)

        try:
            # Kafka configs
            brokers = feature_set.get_kafka_source_brokers()
            topic = feature_set.get_kafka_source_topic()
            producer = get_producer(brokers, row_count, disable_progress_bar)

            # Loop optimization declarations
            produce = producer.produce
            flush = producer.flush

            # Transform and push data to Kafka
            if feature_set.source.source_type == "Kafka":
                for chunk in get_feature_row_chunks(
                        file=dest_path,
                        row_groups=list(range(pq_file.num_row_groups)),
                        fs=feature_set,
                        max_workers=max_workers,
                ):

                    # Push FeatureRow one chunk at a time to kafka
                    for serialized_row in chunk:
                        produce(topic=topic, value=serialized_row)

                    # Force a flush after each chunk
                    flush(timeout=timeout)

                    # Remove chunk from memory
                    del chunk

            else:
                raise Exception(
                    f"Could not determine source type for feature set "
                    f'"{feature_set.name}" with source type '
                    f'"{feature_set.source.source_type}"')

            # Print ingestion statistics
            producer.print_results()
        finally:
            # Remove parquet file(s) that were created earlier
            print("Removing temporary file(s)...")
            shutil.rmtree(dir_path)

        return None
Beispiel #9
0
class Client:
    """
    Feast Client: Used for creating, managing, and retrieving features.
    """
    def __init__(self, options: Optional[Dict[str, str]] = None, **kwargs):
        """
        The Feast Client should be initialized with at least one service url
        Please see constants.py for configuration options. Commonly used options
        or arguments include:
            core_url: Feast Core URL. Used to manage features
            serving_url: Feast Serving URL. Used to retrieve features
            project: Sets the active project. This field is optional.
            core_secure: Use client-side SSL/TLS for Core gRPC API
            serving_secure: Use client-side SSL/TLS for Serving gRPC API
            enable_auth: Enable authentication and authorization
            auth_provider: Authentication provider – "google" or "oauth"
            if auth_provider is "oauth", the following fields are mandatory –
            oauth_grant_type, oauth_client_id, oauth_client_secret, oauth_audience, oauth_token_request_url

        Args:
            options: Configuration options to initialize client with
            **kwargs: Additional keyword arguments that will be used as
                configuration options along with "options"
        """

        if options is None:
            options = dict()
        self._config = Config(options={**options, **kwargs})

        self._core_service_stub: Optional[CoreServiceStub] = None
        self._serving_service_stub: Optional[ServingServiceStub] = None
        self._job_service_stub: Optional[JobServiceStub] = None
        self._auth_metadata: Optional[grpc.AuthMetadataPlugin] = None

        # Configure Auth Metadata Plugin if auth is enabled
        if self._config.getboolean(opt.ENABLE_AUTH):
            self._auth_metadata = feast_auth.get_auth_metadata_plugin(
                self._config)

    @property
    def _core_service(self):
        """
        Creates or returns the gRPC Feast Core Service Stub

        Returns: CoreServiceStub
        """
        if not self._core_service_stub:
            channel = create_grpc_channel(
                url=self._config.get(opt.CORE_URL),
                enable_ssl=self._config.getboolean(opt.CORE_ENABLE_SSL),
                enable_auth=self._config.getboolean(opt.ENABLE_AUTH),
                ssl_server_cert_path=self._config.get(
                    opt.CORE_SERVER_SSL_CERT),
                auth_metadata_plugin=self._auth_metadata,
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
            )
            self._core_service_stub = CoreServiceStub(channel)
        return self._core_service_stub

    @property
    def _serving_service(self):
        """
        Creates or returns the gRPC Feast Serving Service Stub. If both `opentracing`
        and `grpcio-opentracing` are installed, an opentracing interceptor will be
        instantiated based on the global tracer.

        Returns: ServingServiceStub
        """
        if not self._serving_service_stub:
            channel = create_grpc_channel(
                url=self._config.get(opt.SERVING_URL),
                enable_ssl=self._config.getboolean(opt.SERVING_ENABLE_SSL),
                enable_auth=self._config.getboolean(opt.ENABLE_AUTH),
                ssl_server_cert_path=self._config.get(
                    opt.SERVING_SERVER_SSL_CERT),
                auth_metadata_plugin=self._auth_metadata,
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
            )
            try:
                import opentracing
                from grpc_opentracing import open_tracing_client_interceptor
                from grpc_opentracing.grpcext import intercept_channel

                interceptor = open_tracing_client_interceptor(
                    opentracing.global_tracer())
                channel = intercept_channel(channel, interceptor)
            except ImportError:
                pass
            self._serving_service_stub = ServingServiceStub(channel)
        return self._serving_service_stub

    @property
    def _use_job_service(self) -> bool:
        return self._config.exists(opt.JOB_SERVICE_URL)

    @property
    def _job_service(self):
        """
        Creates or returns the gRPC Feast Job Service Stub

        Returns: JobServiceStub
        """
        # Don't try to initialize job service stub if the job service is disabled
        if not self._use_job_service:
            return None

        if not self._job_service_stub:
            channel = create_grpc_channel(
                url=self._config.get(opt.JOB_SERVICE_URL),
                enable_ssl=self._config.getboolean(opt.JOB_SERVICE_ENABLE_SSL),
                enable_auth=self._config.getboolean(opt.ENABLE_AUTH),
                ssl_server_cert_path=self._config.get(
                    opt.JOB_SERVICE_SERVER_SSL_CERT),
                auth_metadata_plugin=self._auth_metadata,
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
            )
            self._job_service_service_stub = JobServiceStub(channel)
        return self._job_service_service_stub

    def _extra_grpc_params(self) -> Dict[str, Any]:
        return dict(
            timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
            metadata=self._get_grpc_metadata(),
        )

    @property
    def core_url(self) -> str:
        """
        Retrieve Feast Core URL

        Returns:
            Feast Core URL string
        """
        return self._config.get(opt.CORE_URL)

    @core_url.setter
    def core_url(self, value: str):
        """
        Set the Feast Core URL

        Args:
            value: Feast Core URL
        """
        self._config.set(opt.CORE_URL, value)

    @property
    def serving_url(self) -> str:
        """
        Retrieve Feast Serving URL

        Returns:
            Feast Serving URL string
        """
        return self._config.get(opt.SERVING_URL)

    @serving_url.setter
    def serving_url(self, value: str):
        """
        Set the Feast Serving URL

        Args:
            value: Feast Serving URL
        """
        self._config.set(opt.SERVING_URL, value)

    @property
    def job_service_url(self) -> str:
        """
        Retrieve Feast Job Service URL

        Returns:
            Feast Job Service URL string
        """
        return self._config.get(opt.JOB_SERVICE_URL)

    @job_service_url.setter
    def job_service_url(self, value: str):
        """
        Set the Feast Job Service URL

        Args:
            value: Feast Job Service URL
        """
        self._config.set(opt.JOB_SERVICE_URL, value)

    @property
    def core_secure(self) -> bool:
        """
        Retrieve Feast Core client-side SSL/TLS setting

        Returns:
            Whether client-side SSL/TLS is enabled
        """
        return self._config.getboolean(opt.CORE_ENABLE_SSL)

    @core_secure.setter
    def core_secure(self, value: bool):
        """
        Set the Feast Core client-side SSL/TLS setting

        Args:
            value: True to enable client-side SSL/TLS
        """
        self._config.set(opt.CORE_ENABLE_SSL, value)

    @property
    def serving_secure(self) -> bool:
        """
        Retrieve Feast Serving client-side SSL/TLS setting

        Returns:
            Whether client-side SSL/TLS is enabled
        """
        return self._config.getboolean(opt.SERVING_ENABLE_SSL)

    @serving_secure.setter
    def serving_secure(self, value: bool):
        """
        Set the Feast Serving client-side SSL/TLS setting

        Args:
            value: True to enable client-side SSL/TLS
        """
        self._config.set(opt.SERVING_ENABLE_SSL, value)

    @property
    def job_service_secure(self) -> bool:
        """
        Retrieve Feast Job Service client-side SSL/TLS setting

        Returns:
            Whether client-side SSL/TLS is enabled
        """
        return self._config.getboolean(opt.JOB_SERVICE_ENABLE_SSL)

    @job_service_secure.setter
    def job_service_secure(self, value: bool):
        """
        Set the Feast Job Service client-side SSL/TLS setting

        Args:
            value: True to enable client-side SSL/TLS
        """
        self._config.set(opt.JOB_SERVICE_ENABLE_SSL, value)

    def version(self):
        """
        Returns version information from Feast Core and Feast Serving
        """
        import pkg_resources

        result = {
            "sdk": {
                "version": pkg_resources.get_distribution("feast").version
            },
            "serving": "not configured",
            "core": "not configured",
        }

        if self.serving_url:
            serving_version = self._serving_service.GetFeastServingInfo(
                GetFeastServingInfoRequest(),
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                metadata=self._get_grpc_metadata(),
            ).version
            result["serving"] = {
                "url": self.serving_url,
                "version": serving_version
            }

        if self.core_url:
            core_version = self._core_service.GetFeastCoreVersion(
                GetFeastCoreVersionRequest(),
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                metadata=self._get_grpc_metadata(),
            ).version
            result["core"] = {"url": self.core_url, "version": core_version}

        return result

    @property
    def project(self) -> str:
        """
        Retrieve currently active project

        Returns:
            Project name
        """
        if not self._config.get(opt.PROJECT):
            raise ValueError("No project has been configured.")
        return self._config.get(opt.PROJECT)

    def set_project(self, project: Optional[str] = None):
        """
        Set currently active Feast project

        Args:
            project: Project to set as active. If unset, will reset to the default project.
        """
        if project is None:
            project = opt().PROJECT
        self._config.set(opt.PROJECT, project)

    def list_projects(self) -> List[str]:
        """
        List all active Feast projects

        Returns:
            List of project names

        """

        response = self._core_service.ListProjects(
            ListProjectsRequest(),
            timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
            metadata=self._get_grpc_metadata(),
        )  # type: ListProjectsResponse
        return list(response.projects)

    def create_project(self, project: str):
        """
        Creates a Feast project

        Args:
            project: Name of project
        """

        self._core_service.CreateProject(
            CreateProjectRequest(name=project),
            timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
            metadata=self._get_grpc_metadata(),
        )  # type: CreateProjectResponse

    def archive_project(self, project):
        """
        Archives a project. Project will still continue to function for
        ingestion and retrieval, but will be in a read-only state. It will
        also not be visible from the Core API for management purposes.

        Args:
            project: Name of project to archive
        """

        try:
            self._core_service_stub.ArchiveProject(
                ArchiveProjectRequest(name=project),
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                metadata=self._get_grpc_metadata(),
            )  # type: ArchiveProjectResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        # revert to the default project
        if self._project == project:
            self._project = opt().PROJECT

    def apply(
        self,
        objects: Union[List[Union[Entity, FeatureTable]], Entity,
                       FeatureTable],
        project: str = None,
    ):
        """
        Idempotently registers entities and feature tables with Feast Core. Either a single
        entity or feature table or a list can be provided.

        Args:
            objects: List of entities and/or feature tables that will be registered

        Examples:
            >>> from feast import Client
            >>> from feast.entity import Entity
            >>> from feast.value_type import ValueType
            >>>
            >>> feast_client = Client(core_url="localhost:6565")
            >>> entity = Entity(
            >>>     name="driver_entity",
            >>>     description="Driver entity for car rides",
            >>>     value_type=ValueType.STRING,
            >>>     labels={
            >>>         "key": "val"
            >>>     }
            >>> )
            >>> feast_client.apply(entity)
        """

        if project is None:
            project = self.project

        if not isinstance(objects, list):
            objects = [objects]
        for obj in objects:
            if isinstance(obj, Entity):
                self._apply_entity(project, obj)  # type: ignore
            elif isinstance(obj, FeatureTable):
                self._apply_feature_table(project, obj)  # type: ignore
            else:
                raise ValueError(
                    f"Could not determine object type to apply {obj} with type {type(obj)}. Type must be Entity or FeatureTable."
                )

    def apply_entity(self,
                     entities: Union[List[Entity], Entity],
                     project: str = None):
        """
        Deprecated. Please see apply().
        """
        warnings.warn(
            "The method apply_entity() is being deprecated. Please use apply() instead. Feast 0.10 and onwards will not support apply_entity().",
            DeprecationWarning,
        )

        if project is None:
            project = self.project

        if not isinstance(entities, list):
            entities = [entities]
        for entity in entities:
            if isinstance(entity, Entity):
                self._apply_entity(project, entity)  # type: ignore
                continue
            raise ValueError(
                f"Could not determine entity type to apply {entity}")

    def _apply_entity(self, project: str, entity: Entity):
        """
        Registers a single entity with Feast

        Args:
            entity: Entity that will be registered
        """

        entity.is_valid()
        entity_proto = entity.to_spec_proto()

        # Convert the entity to a request and send to Feast Core
        try:
            apply_entity_response = self._core_service.ApplyEntity(
                ApplyEntityRequest(project=project,
                                   spec=entity_proto),  # type: ignore
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                metadata=self._get_grpc_metadata(),
            )  # type: ApplyEntityResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        # Extract the returned entity
        applied_entity = Entity.from_proto(apply_entity_response.entity)

        # Deep copy from the returned entity to the local entity
        entity._update_from_entity(applied_entity)

    def list_entities(self,
                      project: str = None,
                      labels: Dict[str, str] = dict()) -> List[Entity]:
        """
        Retrieve a list of entities from Feast Core

        Args:
            project: Filter entities based on project name
            labels: User-defined labels that these entities are associated with

        Returns:
            List of entities
        """

        if project is None:
            project = self.project

        filter = ListEntitiesRequest.Filter(project=project, labels=labels)

        # Get latest entities from Feast Core
        entity_protos = self._core_service.ListEntities(
            ListEntitiesRequest(filter=filter),
            metadata=self._get_grpc_metadata(),
        )  # type: ListEntitiesResponse

        # Extract entities and return
        entities = []
        for entity_proto in entity_protos.entities:
            entity = Entity.from_proto(entity_proto)
            entity._client = self
            entities.append(entity)
        return entities

    def get_entity(self, name: str, project: str = None) -> Entity:
        """
        Retrieves an entity.

        Args:
            project: Feast project that this entity belongs to
            name: Name of entity

        Returns:
            Returns either the specified entity, or raises an exception if
            none is found
        """

        if project is None:
            project = self.project

        try:
            get_entity_response = self._core_service.GetEntity(
                GetEntityRequest(project=project, name=name.strip()),
                metadata=self._get_grpc_metadata(),
            )  # type: GetEntityResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())
        entity = Entity.from_proto(get_entity_response.entity)

        return entity

    def apply_feature_table(
        self,
        feature_tables: Union[List[FeatureTable], FeatureTable],
        project: str = None,
    ):
        """
        Deprecated. Please see apply().
        """
        warnings.warn(
            "The method apply_feature_table() is being deprecated. Please use apply() instead. Feast 0.10 and onwards will not support apply_feature_table().",
            DeprecationWarning,
        )

        if project is None:
            project = self.project

        if not isinstance(feature_tables, list):
            feature_tables = [feature_tables]
        for feature_table in feature_tables:
            if isinstance(feature_table, FeatureTable):
                self._apply_feature_table(project,
                                          feature_table)  # type: ignore
                continue
            raise ValueError(
                f"Could not determine feature table type to apply {feature_table}"
            )

    def _apply_feature_table(self, project: str, feature_table: FeatureTable):
        """
        Registers a single feature table with Feast

        Args:
            feature_table: Feature table that will be registered
        """

        feature_table.is_valid()
        feature_table_proto = feature_table.to_spec_proto()

        # Convert the feature table to a request and send to Feast Core
        try:
            apply_feature_table_response = self._core_service.ApplyFeatureTable(
                ApplyFeatureTableRequest(
                    project=project,
                    table_spec=feature_table_proto),  # type: ignore
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                metadata=self._get_grpc_metadata(),
            )  # type: ApplyFeatureTableResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        # Extract the returned feature table
        applied_feature_table = FeatureTable.from_proto(
            apply_feature_table_response.table)

        # Deep copy from the returned feature table to the local entity
        feature_table._update_from_feature_table(applied_feature_table)

    def list_feature_tables(
        self,
        project: str = None,
        labels: Dict[str, str] = dict()
    ) -> List[FeatureTable]:
        """
        Retrieve a list of feature tables from Feast Core

        Args:
            project: Filter feature tables based on project name

        Returns:
            List of feature tables
        """

        if project is None:
            project = self.project

        filter = ListFeatureTablesRequest.Filter(project=project,
                                                 labels=labels)

        # Get latest feature tables from Feast Core
        feature_table_protos = self._core_service.ListFeatureTables(
            ListFeatureTablesRequest(filter=filter),
            metadata=self._get_grpc_metadata(),
        )  # type: ListFeatureTablesResponse

        # Extract feature tables and return
        feature_tables = []
        for feature_table_proto in feature_table_protos.tables:
            feature_table = FeatureTable.from_proto(feature_table_proto)
            feature_table._client = self
            feature_tables.append(feature_table)
        return feature_tables

    def get_feature_table(self,
                          name: str,
                          project: str = None) -> FeatureTable:
        """
        Retrieves a feature table.

        Args:
            project: Feast project that this feature table belongs to
            name: Name of feature table

        Returns:
            Returns either the specified feature table, or raises an exception if
            none is found
        """

        if project is None:
            project = self.project

        try:
            get_feature_table_response = self._core_service.GetFeatureTable(
                GetFeatureTableRequest(project=project, name=name.strip()),
                metadata=self._get_grpc_metadata(),
            )  # type: GetFeatureTableResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())
        return FeatureTable.from_proto(get_feature_table_response.table)

    def delete_feature_table(self, name: str, project: str = None) -> None:
        """
        Deletes a feature table.

        Args:
            project: Feast project that this feature table belongs to
            name: Name of feature table
        """

        if project is None:
            project = self.project

        try:
            self._core_service.DeleteFeatureTable(
                DeleteFeatureTableRequest(project=project, name=name.strip()),
                metadata=self._get_grpc_metadata(),
            )
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

    def list_features_by_ref(
            self,
            project: str = None,
            entities: List[str] = list(),
            labels: Dict[str, str] = dict(),
    ) -> Dict[FeatureRef, Feature]:
        """
        Retrieve a dictionary of feature reference to feature from Feast Core based on filters provided.

        Args:
            project: Feast project that these features belongs to
            entities: Feast entity that these features are associated with
            labels: Feast labels that these features are associated with

        Returns:
            Dictionary of <feature references: features>

        Examples:
            >>> from feast import Client
            >>>
            >>> feast_client = Client(core_url="localhost:6565")
            >>> features = feast_client.list_features(project="test_project", entities=["driver_id"], labels={"key1":"val1","key2":"val2"})
            >>> print(features)
        """

        if project is None:
            project = self.project

        filter = ListFeaturesRequest.Filter(project=project,
                                            entities=entities,
                                            labels=labels)

        feature_protos = self._core_service.ListFeatures(
            ListFeaturesRequest(filter=filter),
            metadata=self._get_grpc_metadata(),
        )  # type: ListFeaturesResponse

        # Extract features and return
        features_dict = {}
        for ref_str, feature_proto in feature_protos.features.items():
            feature_ref = FeatureRef.from_str(ref_str)
            feature = Feature.from_proto(feature_proto)
            features_dict[feature_ref] = feature

        return features_dict

    def ingest(
        self,
        feature_table: Union[str, FeatureTable],
        source: Union[pd.DataFrame, str],
        project: str = None,
        chunk_size: int = 10000,
        max_workers: int = max(CPU_COUNT - 1, 1),
        timeout: int = int(opt().BATCH_INGESTION_PRODUCTION_TIMEOUT),
    ) -> None:
        """
        Batch load feature data into a FeatureTable.

        Args:
            feature_table (typing.Union[str, feast.feature_table.FeatureTable]):
                FeatureTable object or the string name of the feature table

            source (typing.Union[pd.DataFrame, str]):
                Either a file path or Pandas Dataframe to ingest into Feast
                Files that are currently supported:
                    * parquet
                    * csv
                    * json

            project: Feast project to locate FeatureTable

            chunk_size (int):
                Amount of rows to load and ingest at a time.

            max_workers (int):
                Number of worker processes to use to encode values.

            timeout (int):
                Timeout in seconds to wait for completion.

        Examples:
            >>> from feast import Client
            >>>
            >>> client = Client(core_url="localhost:6565")
            >>> ft_df = pd.DataFrame(
            >>>         {
            >>>            "datetime": [pd.datetime.now()],
            >>>            "driver": [1001],
            >>>            "rating": [4.3],
            >>>         }
            >>>     )
            >>> client.set_project("project1")
            >>>
            >>> driver_ft = client.get_feature_table("driver")
            >>> client.ingest(driver_ft, ft_df)
        """

        if project is None:
            project = self.project
        if isinstance(feature_table, str):
            name = feature_table
        if isinstance(feature_table, FeatureTable):
            name = feature_table.name

        fetched_feature_table: Optional[FeatureTable] = self.get_feature_table(
            name, project)
        if fetched_feature_table is not None:
            feature_table = fetched_feature_table
        else:
            raise Exception(f"FeatureTable, {name} cannot be found.")

        # Check 1) Only parquet file format for FeatureTable batch source is supported
        if (feature_table.batch_source
                and issubclass(type(feature_table.batch_source), FileSource)
                and isinstance(
                    type(feature_table.batch_source.file_options.file_format),
                    ParquetFormat)):
            raise Exception(
                f"No suitable batch source found for FeatureTable, {name}."
                f"Only BATCH_FILE source with parquet format is supported for batch ingestion."
            )

        pyarrow_table, column_names = _read_table_from_source(source)
        # Check 2) Check if FeatureTable batch source field mappings can be found in provided source table
        _check_field_mappings(
            column_names,
            name,
            feature_table.batch_source.event_timestamp_column,
            feature_table.batch_source.field_mapping,
        )

        dir_path = None
        with_partitions = False
        if (issubclass(type(feature_table.batch_source), FileSource)
                and feature_table.batch_source.date_partition_column):
            with_partitions = True
            dest_path = _write_partitioned_table_from_source(
                column_names,
                pyarrow_table,
                feature_table.batch_source.date_partition_column,
                feature_table.batch_source.event_timestamp_column,
            )
        else:
            dir_path, dest_path = _write_non_partitioned_table_from_source(
                column_names,
                pyarrow_table,
                chunk_size,
                max_workers,
            )

        try:
            if issubclass(type(feature_table.batch_source), FileSource):
                file_url = feature_table.batch_source.file_options.file_url.rstrip(
                    "*")
                _upload_to_file_source(file_url, with_partitions, dest_path,
                                       self._config)
            if issubclass(type(feature_table.batch_source), BigQuerySource):
                bq_table_ref = feature_table.batch_source.bigquery_options.table_ref
                feature_table_timestamp_column = (
                    feature_table.batch_source.event_timestamp_column)

                _upload_to_bq_source(bq_table_ref,
                                     feature_table_timestamp_column, dest_path)
        finally:
            # Remove parquet file(s) that were created earlier
            print("Removing temporary file(s)...")
            if dir_path:
                shutil.rmtree(dir_path)

        print(
            "Data has been successfully ingested into FeatureTable batch source."
        )

    def _get_grpc_metadata(self):
        """
        Returns a metadata tuple to attach to gRPC requests. This is primarily
        used when authentication is enabled but SSL/TLS is disabled.

        Returns: Tuple of metadata to attach to each gRPC call
        """
        if self._config.getboolean(opt.ENABLE_AUTH) and self._auth_metadata:
            return self._auth_metadata.get_signed_meta()
        return ()

    def get_online_features(
        self,
        feature_refs: List[str],
        entity_rows: List[Dict[str, Any]],
        project: Optional[str] = None,
    ) -> OnlineResponse:
        """
        Retrieves the latest online feature data from Feast Serving.
        Args:
            feature_refs: List of feature references that will be returned for each entity.
                Each feature reference should have the following format:
                "feature_table:feature" where "feature_table" & "feature" refer to
                the feature and feature table names respectively.
                Only the feature name is required.
            entity_rows: A list of dictionaries where each key-value is an entity-name, entity-value pair.
            project: Optionally specify the the project override. If specified, uses given project for retrieval.
                Overrides the projects specified in Feature References if also are specified.
        Returns:
            GetOnlineFeaturesResponse containing the feature data in records.
            Each EntityRow provided will yield one record, which contains
            data fields with data value and field status metadata (if included).
        Examples:
            >>> from feast import Client
            >>>
            >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566")
            >>> feature_refs = ["sales:daily_transactions"]
            >>> entity_rows = [{"customer_id": 0},{"customer_id": 1}]
            >>>
            >>> online_response = feast_client.get_online_features(
            >>>     feature_refs, entity_rows, project="my_project")
            >>> online_response_dict = online_response.to_dict()
            >>> print(online_response_dict)
            {'sales:daily_transactions': [1.1,1.2], 'sales:customer_id': [0,1]}
        """

        try:
            response = self._serving_service.GetOnlineFeaturesV2(
                GetOnlineFeaturesRequestV2(
                    features=_build_feature_references(
                        feature_ref_strs=feature_refs),
                    entity_rows=_infer_online_entity_rows(entity_rows),
                    project=project if project is not None else self.project,
                ),
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                metadata=self._get_grpc_metadata(),
            )
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        response = OnlineResponse(response)
        return response

    def get_historical_features(
        self,
        feature_refs: List[str],
        entity_source: Union[pd.DataFrame, FileSource, BigQuerySource],
        output_location: Optional[str] = None,
    ) -> RetrievalJob:
        """
        Launch a historical feature retrieval job.

        Args:
            feature_refs: List of feature references that will be returned for each entity.
                Each feature reference should have the following format:
                "feature_table:feature" where "feature_table" & "feature" refer to
                the feature and feature table names respectively.
            entity_source (Union[pd.DataFrame, FileSource, BigQuerySource]): Source for the entity rows.
                If entity_source is a Panda DataFrame, the dataframe will be staged
                to become accessible by spark workers.
                If one of feature tables' source is in BigQuery - entities will be upload to BQ.
                Otherwise to remote file storage (derived from configured staging location).
                It is also assumed that the column event_timestamp is present
                in the dataframe, and is of type datetime without timezone information.

                The user needs to make sure that the source (or staging location, if entity_source is
                a Panda DataFrame) is accessible from the Spark cluster that will be used for the
                retrieval job.
            destination_path: Specifies the path in a bucket to write the exported feature data files

        Returns:
                Returns a retrieval job object that can be used to monitor retrieval
                progress asynchronously, and can be used to materialize the
                results.

        Examples:
            >>> from feast import Client
            >>> from feast.data_format import ParquetFormat
            >>> from datetime import datetime
            >>> feast_client = Client(core_url="localhost:6565")
            >>> feature_refs = ["bookings:bookings_7d", "bookings:booking_14d"]
            >>> entity_source = FileSource("event_timestamp", ParquetFormat(), "gs://some-bucket/customer")
            >>> feature_retrieval_job = feast_client.get_historical_features(
            >>>     feature_refs, entity_source)
            >>> output_file_uri = feature_retrieval_job.get_output_file_uri()
                "gs://some-bucket/output/
        """
        feature_tables = self._get_feature_tables_from_feature_refs(
            feature_refs, self.project)

        assert all(
            ft.batch_source.created_timestamp_column
            for ft in feature_tables), (
                "All BatchSources attached to retrieved FeatureTables "
                "must have specified `created_timestamp_column` to be used in "
                "historical dataset generation.")

        if output_location is None:
            output_location = os.path.join(
                self._config.get(opt.HISTORICAL_FEATURE_OUTPUT_LOCATION),
                str(uuid.uuid4()),
            )
        output_format = self._config.get(opt.HISTORICAL_FEATURE_OUTPUT_FORMAT)
        feature_sources = [
            feature_table.batch_source for feature_table in feature_tables
        ]

        if isinstance(entity_source, pd.DataFrame):
            if any(
                    isinstance(source, BigQuerySource)
                    for source in feature_sources):
                first_bq_source = [
                    source for source in feature_sources
                    if isinstance(source, BigQuerySource)
                ][0]
                source_ref = table_reference_from_string(
                    first_bq_source.bigquery_options.table_ref)
                entity_source = stage_entities_to_bq(entity_source,
                                                     source_ref.project,
                                                     source_ref.dataset_id)
            else:
                entity_source = stage_entities_to_fs(
                    entity_source,
                    staging_location=self._config.get(
                        opt.SPARK_STAGING_LOCATION),
                    config=self._config,
                )

        if self._use_job_service:
            response = self._job_service.GetHistoricalFeatures(
                GetHistoricalFeaturesRequest(
                    feature_refs=feature_refs,
                    entity_source=entity_source.to_proto(),
                    project=self.project,
                    output_format=output_format,
                    output_location=output_location,
                ),
                **self._extra_grpc_params(),
            )
            return RemoteRetrievalJob(
                self._job_service,
                self._extra_grpc_params,
                response.id,
                output_file_uri=response.output_file_uri,
            )
        else:
            return start_historical_feature_retrieval_job(
                client=self,
                project=self.project,
                entity_source=entity_source,
                feature_tables=feature_tables,
                output_format=output_format,
                output_path=output_location,
            )

    def get_historical_features_df(
        self,
        feature_refs: List[str],
        entity_source: Union[FileSource, BigQuerySource],
    ):
        """
        Launch a historical feature retrieval job.

        Args:
            feature_refs: List of feature references that will be returned for each entity.
                Each feature reference should have the following format:
                "feature_table:feature" where "feature_table" & "feature" refer to
                the feature and feature table names respectively.
            entity_source (Union[FileSource, BigQuerySource]): Source for the entity rows.
                The user needs to make sure that the source is accessible from the Spark cluster
                that will be used for the retrieval job.

        Returns:
                Returns the historical feature retrieval result in the form of Spark dataframe.

        Examples:
            >>> from feast import Client
            >>> from feast.data_format import ParquetFormat
            >>> from datetime import datetime
            >>> from pyspark.sql import SparkSession
            >>> spark = SparkSession.builder.getOrCreate()
            >>> feast_client = Client(core_url="localhost:6565")
            >>> feature_refs = ["bookings:bookings_7d", "bookings:booking_14d"]
            >>> entity_source = FileSource("event_timestamp", ParquetFormat, "gs://some-bucket/customer")
            >>> df = feast_client.get_historical_features(
            >>>     feature_refs, entity_source)
        """
        feature_tables = self._get_feature_tables_from_feature_refs(
            feature_refs, self.project)
        return start_historical_feature_retrieval_spark_session(
            client=self,
            project=self.project,
            entity_source=entity_source,
            feature_tables=feature_tables,
        )

    def _get_feature_tables_from_feature_refs(self, feature_refs: List[str],
                                              project: Optional[str]):
        feature_refs_grouped_by_table = [
            (feature_table_name, list(grouped_feature_refs))
            for feature_table_name, grouped_feature_refs in groupby(
                feature_refs, lambda x: x.split(":")[0])
        ]

        feature_tables = []
        for feature_table_name, grouped_feature_refs in feature_refs_grouped_by_table:
            feature_table = self.get_feature_table(feature_table_name, project)
            feature_names = [f.split(":")[-1] for f in grouped_feature_refs]
            feature_table.features = [
                f for f in feature_table.features if f.name in feature_names
            ]
            feature_tables.append(feature_table)
        return feature_tables

    def start_offline_to_online_ingestion(
        self,
        feature_table: FeatureTable,
        start: datetime,
        end: datetime,
    ) -> SparkJob:
        """

        Launch Ingestion Job from Batch Source to Online Store for given featureTable

        :param feature_table: FeatureTable which will be ingested
        :param start: lower datetime boundary
        :param end: upper datetime boundary
        :return: Spark Job Proxy object
        """
        if not self._use_job_service:
            return start_offline_to_online_ingestion(
                client=self,
                project=self.project,
                feature_table=feature_table,
                start=start,
                end=end,
            )
        else:
            request = StartOfflineToOnlineIngestionJobRequest(
                project=self.project,
                table_name=feature_table.name,
            )
            request.start_date.FromDatetime(start)
            request.end_date.FromDatetime(end)
            response = self._job_service.StartOfflineToOnlineIngestionJob(
                request)
            return RemoteBatchIngestionJob(
                self._job_service,
                self._extra_grpc_params,
                response.id,
            )

    def start_stream_to_online_ingestion(
        self,
        feature_table: FeatureTable,
        extra_jars: Optional[List[str]] = None,
        project: str = None,
    ) -> SparkJob:
        if not self._use_job_service:
            return start_stream_to_online_ingestion(
                client=self,
                project=project or self.project,
                feature_table=feature_table,
                extra_jars=extra_jars or [],
            )
        else:
            request = StartStreamToOnlineIngestionJobRequest(
                project=self.project,
                table_name=feature_table.name,
            )
            response = self._job_service.StartStreamToOnlineIngestionJob(
                request)
            return RemoteStreamIngestionJob(self._job_service,
                                            self._extra_grpc_params,
                                            response.id)

    def list_jobs(self, include_terminated: bool) -> List[SparkJob]:
        if not self._use_job_service:
            return list_jobs(include_terminated, self)
        else:
            request = ListJobsRequest(include_terminated=include_terminated)
            response = self._job_service.ListJobs(request)
            return [
                get_remote_job_from_proto(self._job_service,
                                          self._extra_grpc_params, job)
                for job in response.jobs
            ]

    def get_job_by_id(self, job_id: str) -> SparkJob:
        if not self._use_job_service:
            return get_job_by_id(job_id, self)
        else:
            request = GetJobRequest(job_id=job_id)
            response = self._job_service.GetJob(request)
            return get_remote_job_from_proto(self._job_service,
                                             self._extra_grpc_params,
                                             response.job)

    def stage_dataframe(
        self,
        df: pd.DataFrame,
        event_timestamp_column: str,
    ) -> FileSource:
        return stage_dataframe(df, event_timestamp_column, self._config)
Beispiel #10
0
class Client:
    """
    Feast Client: Used for creating, managing, and retrieving features.
    """
    def __init__(self, options: Optional[Dict[str, str]] = None, **kwargs):
        """
        The Feast Client should be initialized with at least one service url
        Please see constants.py for configuration options. Commonly used options
        or arguments include:
            core_url: Feast Core URL. Used to manage features
            serving_url: Feast Serving URL. Used to retrieve features
            project: Sets the active project. This field is optional.
            core_secure: Use client-side SSL/TLS for Core gRPC API
            serving_secure: Use client-side SSL/TLS for Serving gRPC API
            enable_auth: Enable authentication and authorization
            auth_provider: Authentication provider – "google" or "oauth"
            if auth_provider is "oauth", the following fields are mandatory –
            oauth_grant_type, oauth_client_id, oauth_client_secret, oauth_audience, oauth_token_request_url

        Args:
            options: Configuration options to initialize client with
            **kwargs: Additional keyword arguments that will be used as
                configuration options along with "options"
        """

        if options is None:
            options = dict()
        self._config = Config(options={**options, **kwargs})

        self._core_service_stub: Optional[CoreServiceStub] = None
        self._serving_service_stub: Optional[ServingServiceStub] = None
        self._auth_metadata: Optional[grpc.AuthMetadataPlugin] = None

        # Configure Auth Metadata Plugin if auth is enabled
        if self._config.getboolean(CONFIG_ENABLE_AUTH_KEY):
            self._auth_metadata = feast_auth.get_auth_metadata_plugin(
                self._config)

    @property
    def _core_service(self):
        """
        Creates or returns the gRPC Feast Core Service Stub

        Returns: CoreServiceStub
        """
        if not self._core_service_stub:
            channel = create_grpc_channel(
                url=self._config.get(CONFIG_CORE_URL_KEY),
                enable_ssl=self._config.getboolean(CONFIG_CORE_ENABLE_SSL_KEY),
                enable_auth=self._config.getboolean(CONFIG_ENABLE_AUTH_KEY),
                ssl_server_cert_path=self._config.get(
                    CONFIG_CORE_SERVER_SSL_CERT_KEY),
                auth_metadata_plugin=self._auth_metadata,
                timeout=self._config.getint(
                    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
            )
            self._core_service_stub = CoreServiceStub(channel)
        return self._core_service_stub

    @property
    def _serving_service(self):
        """
        Creates or returns the gRPC Feast Serving Service Stub

        Returns: ServingServiceStub
        """
        if not self._serving_service_stub:
            channel = create_grpc_channel(
                url=self._config.get(CONFIG_SERVING_URL_KEY),
                enable_ssl=self._config.getboolean(
                    CONFIG_SERVING_ENABLE_SSL_KEY),
                enable_auth=self._config.getboolean(CONFIG_ENABLE_AUTH_KEY),
                ssl_server_cert_path=self._config.get(
                    CONFIG_SERVING_SERVER_SSL_CERT_KEY),
                auth_metadata_plugin=self._auth_metadata,
                timeout=self._config.getint(
                    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
            )
            self._serving_service_stub = ServingServiceStub(channel)
        return self._serving_service_stub

    @property
    def core_url(self) -> str:
        """
        Retrieve Feast Core URL

        Returns:
            Feast Core URL string
        """
        return self._config.get(CONFIG_CORE_URL_KEY)

    @core_url.setter
    def core_url(self, value: str):
        """
        Set the Feast Core URL

        Args:
            value: Feast Core URL
        """
        self._config.set(CONFIG_CORE_URL_KEY, value)

    @property
    def serving_url(self) -> str:
        """
        Retrieve Serving Core URL

        Returns:
            Feast Serving URL string
        """
        return self._config.get(CONFIG_SERVING_URL_KEY)

    @serving_url.setter
    def serving_url(self, value: str):
        """
        Set the Feast Serving URL

        Args:
            value: Feast Serving URL
        """
        self._config.set(CONFIG_SERVING_URL_KEY, value)

    @property
    def core_secure(self) -> bool:
        """
        Retrieve Feast Core client-side SSL/TLS setting

        Returns:
            Whether client-side SSL/TLS is enabled
        """
        return self._config.getboolean(CONFIG_CORE_ENABLE_SSL_KEY)

    @core_secure.setter
    def core_secure(self, value: bool):
        """
        Set the Feast Core client-side SSL/TLS setting

        Args:
            value: True to enable client-side SSL/TLS
        """
        self._config.set(CONFIG_CORE_ENABLE_SSL_KEY, value)

    @property
    def serving_secure(self) -> bool:
        """
        Retrieve Feast Serving client-side SSL/TLS setting

        Returns:
            Whether client-side SSL/TLS is enabled
        """
        return self._config.getboolean(CONFIG_SERVING_ENABLE_SSL_KEY)

    @serving_secure.setter
    def serving_secure(self, value: bool):
        """
        Set the Feast Serving client-side SSL/TLS setting

        Args:
            value: True to enable client-side SSL/TLS
        """
        self._config.set(CONFIG_SERVING_ENABLE_SSL_KEY, value)

    def version(self):
        """
        Returns version information from Feast Core and Feast Serving
        """
        import pkg_resources

        result = {
            "sdk": {
                "version": pkg_resources.get_distribution("feast").version
            },
            "serving": "not configured",
            "core": "not configured",
        }

        if self.serving_url:
            serving_version = self._serving_service.GetFeastServingInfo(
                GetFeastServingInfoRequest(),
                timeout=self._config.getint(
                    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
                metadata=self._get_grpc_metadata(),
            ).version
            result["serving"] = {
                "url": self.serving_url,
                "version": serving_version
            }

        if self.core_url:
            core_version = self._core_service.GetFeastCoreVersion(
                GetFeastCoreVersionRequest(),
                timeout=self._config.getint(
                    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
                metadata=self._get_grpc_metadata(),
            ).version
            result["core"] = {"url": self.core_url, "version": core_version}

        return result

    @property
    def project(self) -> Union[str, None]:
        """
        Retrieve currently active project

        Returns:
            Project name
        """
        return self._config.get(CONFIG_PROJECT_KEY)

    def set_project(self, project: Optional[str] = None):
        """
        Set currently active Feast project

        Args:
            project: Project to set as active. If unset, will reset to the default project.
        """
        if project is None:
            project = FEAST_DEFAULT_OPTIONS[CONFIG_PROJECT_KEY]
        self._config.set(CONFIG_PROJECT_KEY, project)

    def list_projects(self) -> List[str]:
        """
        List all active Feast projects

        Returns:
            List of project names

        """

        response = self._core_service.ListProjects(
            ListProjectsRequest(),
            timeout=self._config.getint(
                CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
            metadata=self._get_grpc_metadata(),
        )  # type: ListProjectsResponse
        return list(response.projects)

    def create_project(self, project: str):
        """
        Creates a Feast project

        Args:
            project: Name of project
        """

        self._core_service.CreateProject(
            CreateProjectRequest(name=project),
            timeout=self._config.getint(
                CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
            metadata=self._get_grpc_metadata(),
        )  # type: CreateProjectResponse

    def archive_project(self, project):
        """
        Archives a project. Project will still continue to function for
        ingestion and retrieval, but will be in a read-only state. It will
        also not be visible from the Core API for management purposes.

        Args:
            project: Name of project to archive
        """

        try:
            self._core_service_stub.ArchiveProject(
                ArchiveProjectRequest(name=project),
                timeout=self._config.getint(
                    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
                metadata=self._get_grpc_metadata(),
            )  # type: ArchiveProjectResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        # revert to the default project
        if self._project == project:
            self._project = FEAST_DEFAULT_OPTIONS[CONFIG_PROJECT_KEY]

    def apply(self, feature_sets: Union[List[FeatureSet], FeatureSet]):
        """
        Idempotently registers feature set(s) with Feast Core. Either a single
        feature set or a list can be provided.

        Args:
            feature_sets: List of feature sets that will be registered
        """
        if not isinstance(feature_sets, list):
            feature_sets = [feature_sets]
        for feature_set in feature_sets:
            if isinstance(feature_set, FeatureSet):
                self._apply_feature_set(feature_set)
                continue
            raise ValueError(
                f"Could not determine feature set type to apply {feature_set}")

    def _apply_feature_set(self, feature_set: FeatureSet):
        """
        Registers a single feature set with Feast

        Args:
            feature_set: Feature set that will be registered
        """

        feature_set.is_valid()
        feature_set_proto = feature_set.to_proto()
        if len(feature_set_proto.spec.project) == 0:
            if self.project is not None:
                feature_set_proto.spec.project = self.project

        # Convert the feature set to a request and send to Feast Core
        try:
            apply_fs_response = self._core_service.ApplyFeatureSet(
                ApplyFeatureSetRequest(feature_set=feature_set_proto),
                timeout=self._config.getint(
                    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
                metadata=self._get_grpc_metadata(),
            )  # type: ApplyFeatureSetResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        # Extract the returned feature set
        applied_fs = FeatureSet.from_proto(apply_fs_response.feature_set)

        # If the feature set has changed, update the local copy
        if apply_fs_response.status == ApplyFeatureSetResponse.Status.CREATED:
            print(f'Feature set created: "{applied_fs.name}"')

        if apply_fs_response.status == ApplyFeatureSetResponse.Status.UPDATED:
            print(f'Feature set updated: "{applied_fs.name}"')

        # If no change has been applied, do nothing
        if apply_fs_response.status == ApplyFeatureSetResponse.Status.NO_CHANGE:
            print(f"No change detected or applied: {feature_set.name}")

        # Deep copy from the returned feature set to the local feature set
        feature_set._update_from_feature_set(applied_fs)

    def list_feature_sets(
        self,
        project: str = None,
        name: str = None,
        labels: Dict[str, str] = dict()) -> List[FeatureSet]:
        """
        Retrieve a list of feature sets from Feast Core

        Args:
            project: Filter feature sets based on project name
            name: Filter feature sets based on feature set name

        Returns:
            List of feature sets
        """

        if project is None:
            if self.project is not None:
                project = self.project
            else:
                project = "*"

        if name is None:
            name = "*"

        filter = ListFeatureSetsRequest.Filter(project=project,
                                               feature_set_name=name,
                                               labels=labels)

        # Get latest feature sets from Feast Core
        feature_set_protos = self._core_service.ListFeatureSets(
            ListFeatureSetsRequest(filter=filter),
            metadata=self._get_grpc_metadata(),
        )  # type: ListFeatureSetsResponse

        # Extract feature sets and return
        feature_sets = []
        for feature_set_proto in feature_set_protos.feature_sets:
            feature_set = FeatureSet.from_proto(feature_set_proto)
            feature_set._client = self
            feature_sets.append(feature_set)
        return feature_sets

    def get_feature_set(self,
                        name: str,
                        project: str = None) -> Union[FeatureSet, None]:
        """
        Retrieves a feature set.

        Args:
            project: Feast project that this feature set belongs to
            name: Name of feature set

        Returns:
            Returns either the specified feature set, or raises an exception if
            none is found
        """

        if project is None:
            if self.project is not None:
                project = self.project
            else:
                raise ValueError("No project has been configured.")

        try:
            get_feature_set_response = self._core_service.GetFeatureSet(
                GetFeatureSetRequest(project=project, name=name.strip()),
                metadata=self._get_grpc_metadata(),
            )  # type: GetFeatureSetResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())
        return FeatureSet.from_proto(get_feature_set_response.feature_set)

    def list_features_by_ref(
            self,
            project: str = None,
            entities: List[str] = list(),
            labels: Dict[str, str] = dict(),
    ) -> Dict[FeatureRef, Feature]:
        """
        Returns a list of features based on filters provided.

        Args:
            project: Feast project that these features belongs to
            entities: Feast entity that these features are associated with
            labels: Feast labels that these features are associated with

        Returns:
            Dictionary of <feature references: features>

        Examples:
            >>> from feast import Client
            >>>
            >>> feast_client = Client(core_url="localhost:6565")
            >>> features = list_features_by_ref(project="test_project", entities=["driver_id"], labels={"key1":"val1","key2":"val2"})
            >>> print(features)
        """
        if project is None:
            if self.project is not None:
                project = self.project
            else:
                project = "default"

        filter = ListFeaturesRequest.Filter(project=project,
                                            entities=entities,
                                            labels=labels)

        feature_protos = self._core_service.ListFeatures(
            ListFeaturesRequest(filter=filter),
            metadata=self._get_grpc_metadata(),
        )  # type: ListFeaturesResponse

        features_dict = {}
        for ref_str, feature_proto in feature_protos.features.items():
            feature_ref = FeatureRef.from_str(ref_str, ignore_project=True)
            feature = Feature.from_proto(feature_proto)
            features_dict[feature_ref] = feature

        return features_dict

    def list_entities(self) -> Dict[str, Entity]:
        """
        Returns a dictionary of entities across all feature sets
        Returns:
            Dictionary of entities, indexed by name
        """
        entities_dict = OrderedDict()
        for fs in self.list_feature_sets():
            for entity in fs.entities:
                entities_dict[entity.name] = entity
        return entities_dict

    def get_batch_features(
        self,
        feature_refs: List[str],
        entity_rows: Union[pd.DataFrame, str],
        compute_statistics: bool = False,
        project: str = None,
    ) -> RetrievalJob:
        """
        Deprecated. Please see get_historical_features.
        """
        warnings.warn(
            "The method get_batch_features() is being deprecated. Please use the identical get_historical_features(). "
            "Feast 0.7 and onwards will not support get_batch_features().",
            DeprecationWarning,
        )
        return self.get_historical_features(feature_refs, entity_rows,
                                            compute_statistics, project)

    def get_historical_features(
        self,
        feature_refs: List[str],
        entity_rows: Union[pd.DataFrame, str],
        compute_statistics: bool = False,
        project: str = None,
    ) -> RetrievalJob:
        """
        Retrieves historical features from a Feast Serving deployment.

        Args:
            feature_refs: List of feature references that will be returned for each entity.
                Each feature reference should have the following format:
                "feature_set:feature" where "feature_set" & "feature" refer to
                the feature and feature set names respectively.
                Only the feature name is required.
            entity_rows (Union[pd.DataFrame, str]):
                Pandas dataframe containing entities and a 'datetime' column.
                Each entity in a feature set must be present as a column in this
                dataframe. The datetime column must contain timestamps in
                datetime64 format.
            compute_statistics (bool):
                Indicates whether Feast should compute statistics over the retrieved dataset.
            project: Specifies the project which contain the FeatureSets
                which the requested features belong to.

        Returns:
            feast.job.RetrievalJob:
                Returns a retrival job object that can be used to monitor retrieval
                progress asynchronously, and can be used to materialize the
                results.

        Examples:
            >>> from feast import Client
            >>> from datetime import datetime
            >>>
            >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566")
            >>> feature_refs = ["my_project/bookings_7d", "booking_14d"]
            >>> entity_rows = pd.DataFrame(
            >>>         {
            >>>            "datetime": [pd.datetime.now() for _ in range(3)],
            >>>            "customer": [1001, 1002, 1003],
            >>>         }
            >>>     )
            >>> feature_retrieval_job = feast_client.get_historical_features(
            >>>     feature_refs, entity_rows, project="my_project")
            >>> df = feature_retrieval_job.to_dataframe()
            >>> print(df)
        """

        # Retrieve serving information to determine store type and
        # staging location
        serving_info = self._serving_service.GetFeastServingInfo(
            GetFeastServingInfoRequest(),
            timeout=self._config.getint(
                CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
            metadata=self._get_grpc_metadata(),
        )  # type: GetFeastServingInfoResponse

        if serving_info.type != FeastServingType.FEAST_SERVING_TYPE_BATCH:
            raise Exception(
                f'You are connected to a store "{self.serving_url}" which '
                f"does not support batch retrieval ")

        if isinstance(entity_rows, pd.DataFrame):
            # Pandas DataFrame detected

            # Remove timezone from datetime column
            if isinstance(entity_rows["datetime"].dtype,
                          pd.core.dtypes.dtypes.DatetimeTZDtype):
                entity_rows["datetime"] = pd.DatetimeIndex(
                    entity_rows["datetime"]).tz_localize(None)
        elif isinstance(entity_rows, str):
            # String based source
            if not entity_rows.endswith((".avro", "*")):
                raise Exception(
                    "Only .avro and wildcard paths are accepted as entity_rows"
                )
        else:
            raise Exception(f"Only pandas.DataFrame and str types are allowed"
                            f" as entity_rows, but got {type(entity_rows)}.")

        # Export and upload entity row DataFrame to staging location
        # provided by Feast
        staged_files = export_source_to_staging_location(
            entity_rows, serving_info.job_staging_location)  # type: List[str]
        request = GetBatchFeaturesRequest(
            features=_build_feature_references(
                feature_ref_strs=feature_refs,
                project=project if project is not None else self.project,
            ),
            dataset_source=DatasetSource(file_source=DatasetSource.FileSource(
                file_uris=staged_files,
                data_format=DataFormat.DATA_FORMAT_AVRO)),
            compute_statistics=compute_statistics,
        )

        # Retrieve Feast Job object to manage life cycle of retrieval
        try:
            response = self._serving_service.GetBatchFeatures(
                request, metadata=self._get_grpc_metadata())
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        return RetrievalJob(
            response.job,
            self._serving_service,
            auth_metadata_plugin=self._auth_metadata,
        )

    def get_online_features(
        self,
        feature_refs: List[str],
        entity_rows: List[Union[GetOnlineFeaturesRequest.EntityRow,
                                Dict[str, Any]]],
        project: Optional[str] = None,
        omit_entities: bool = False,
    ) -> OnlineResponse:
        """
        Retrieves the latest online feature data from Feast Serving

        Args:
            feature_refs: List of feature references that will be returned for each entity.
                Each feature reference should have the following format:
                "feature_set:feature" where "feature_set" & "feature" refer to
                the feature and feature set names respectively.
                Only the feature name is required.
            entity_rows: A list of dictionaries where each key is an entity and each value is
                feast.types.Value or Python native form.
            project: Optionally specify the the project override. If specified, uses given project for retrieval.
                Overrides the projects specified in Feature References if also are specified.
            omit_entities: If true will omit entity values in the returned feature data.
        Returns:
            GetOnlineFeaturesResponse containing the feature data in records.
            Each EntityRow provided will yield one record, which contains
            data fields with data value and field status metadata (if included).

        Examples:
            >>> from feast import Client
            >>>
            >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566")
            >>> feature_refs = ["daily_transactions"]
            >>> entity_rows = [{"customer_id": 0},{"customer_id": 1}]
            >>>
            >>> online_response = feast_client.get_online_features(
            >>>     feature_refs, entity_rows, project="my_project")
            >>> online_response_dict = online_response.to_dict()
            >>> print(online_response_dict)
            {'daily_transactions': [1.1,1.2], 'customer_id': [0,1]}
        """

        try:
            response = self._serving_service.GetOnlineFeatures(
                GetOnlineFeaturesRequest(
                    omit_entities_in_response=omit_entities,
                    features=_build_feature_references(
                        feature_ref_strs=feature_refs),
                    entity_rows=_infer_online_entity_rows(entity_rows),
                    project=project if project is not None else self.project,
                ),
                metadata=self._get_grpc_metadata(),
            )
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        response = OnlineResponse(response)
        return response

    def list_ingest_jobs(
        self,
        job_id: str = None,
        feature_set_ref: FeatureSetRef = None,
        store_name: str = None,
    ):
        """
        List the ingestion jobs currently registered in Feast, with optional filters.
        Provides detailed metadata about each ingestion job.

        Args:
            job_id: Select specific ingestion job with the given job_id
            feature_set_ref: Filter ingestion jobs by target feature set (via reference)
            store_name: Filter ingestion jobs by target feast store's name

        Returns:
            List of IngestJobs matching the given filters
        """
        # construct list request
        feature_set_ref_proto = None
        if feature_set_ref:
            feature_set_ref_proto = feature_set_ref.to_proto()
        list_filter = ListIngestionJobsRequest.Filter(
            id=job_id,
            feature_set_reference=feature_set_ref_proto,
            store_name=store_name,
        )
        request = ListIngestionJobsRequest(filter=list_filter)
        # make list request & unpack response
        response = self._core_service.ListIngestionJobs(
            request,
            metadata=self._get_grpc_metadata(),
        )  # type: ignore
        ingest_jobs = [
            IngestJob(proto,
                      self._core_service,
                      auth_metadata_plugin=self._auth_metadata)
            for proto in response.jobs  # type: ignore
        ]

        return ingest_jobs

    def restart_ingest_job(self, job: IngestJob):
        """
        Restart ingestion job currently registered in Feast.
        NOTE: Data might be lost during the restart for some job runners.
        Does not support stopping a job in a transitional (ie pending, suspending, aborting),
        terminal state (ie suspended or aborted) or unknown status

        Args:
            job: IngestJob to restart
        """
        request = RestartIngestionJobRequest(id=job.id)
        try:
            self._core_service.RestartIngestionJob(
                request,
                metadata=self._get_grpc_metadata(),
            )  # type: ignore
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

    def stop_ingest_job(self, job: IngestJob):
        """
        Stop ingestion job currently resgistered in Feast
        Does nothing if the target job if already in a terminal state (ie suspended or aborted).
        Does not support stopping a job in a transitional (ie pending, suspending, aborting)
        or in a unknown status

        Args:
            job: IngestJob to restart
        """
        request = StopIngestionJobRequest(id=job.id)
        try:
            self._core_service.StopIngestionJob(
                request,
                metadata=self._get_grpc_metadata(),
            )  # type: ignore
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

    def ingest(
        self,
        feature_set: Union[str, FeatureSet],
        source: Union[pd.DataFrame, str],
        chunk_size: int = 10000,
        max_workers: int = max(CPU_COUNT - 1, 1),
        disable_progress_bar: bool = False,
        timeout: int = KAFKA_CHUNK_PRODUCTION_TIMEOUT,
    ) -> str:
        """
        Loads feature data into Feast for a specific feature set.

        Args:
            feature_set (typing.Union[str, feast.feature_set.FeatureSet]):
                Feature set object or the string name of the feature set

            source (typing.Union[pd.DataFrame, str]):
                Either a file path or Pandas Dataframe to ingest into Feast
                Files that are currently supported:
                    * parquet
                    * csv
                    * json

            chunk_size (int):
                Amount of rows to load and ingest at a time.

            max_workers (int):
                Number of worker processes to use to encode values.

            disable_progress_bar (bool):
                Disable printing of progress statistics.

            timeout (int):
                Timeout in seconds to wait for completion.

        Returns:
            str:
                ingestion id for this dataset

        Examples:
            >>> from feast import Client
            >>>
            >>> client = Client(core_url="localhost:6565")
            >>> fs_df = pd.DataFrame(
            >>>         {
            >>>            "datetime": [pd.datetime.now()],
            >>>            "driver": [1001],
            >>>            "rating": [4.3],
            >>>         }
            >>>     )
            >>> client.set_project("project1")
            >>> client.ingest("driver", fs_df)
            >>>
            >>> driver_fs = client.get_feature_set(name="driver", project="project1")
            >>> client.ingest(driver_fs, fs_df)
        """

        if isinstance(feature_set, FeatureSet):
            name = feature_set.name
            project = feature_set.project
        elif isinstance(feature_set, str):
            if self.project is not None:
                project = self.project
            else:
                project = "default"
            name = feature_set
        else:
            raise Exception("Feature set name must be provided")

        # Read table and get row count
        dir_path, dest_path = _read_table_from_source(source, chunk_size,
                                                      max_workers)

        pq_file = pq.ParquetFile(dest_path)

        row_count = pq_file.metadata.num_rows

        current_time = time.time()

        print("Waiting for feature set to be ready for ingestion...")
        while True:
            if timeout is not None and time.time() - current_time >= timeout:
                raise TimeoutError(
                    "Timed out waiting for feature set to be ready")
            fetched_feature_set: Optional[FeatureSet] = self.get_feature_set(
                name, project)
            if (fetched_feature_set is not None and fetched_feature_set.status
                    == FeatureSetStatus.STATUS_READY):
                feature_set = fetched_feature_set
                break
            time.sleep(3)

        if timeout is not None:
            timeout = timeout - int(time.time() - current_time)

        try:
            # Kafka configs
            brokers = feature_set.get_kafka_source_brokers()
            topic = feature_set.get_kafka_source_topic()
            producer = get_producer(brokers, row_count, disable_progress_bar)

            # Loop optimization declarations
            produce = producer.produce
            flush = producer.flush
            ingestion_id = _generate_ingestion_id(feature_set)

            # Transform and push data to Kafka
            if feature_set.source.source_type == "Kafka":
                for chunk in get_feature_row_chunks(
                        file=dest_path,
                        row_groups=list(range(pq_file.num_row_groups)),
                        fs=feature_set,
                        ingestion_id=ingestion_id,
                        max_workers=max_workers,
                ):

                    # Push FeatureRow one chunk at a time to kafka
                    for serialized_row in chunk:
                        produce(topic=topic, value=serialized_row)

                    # Force a flush after each chunk
                    flush(timeout=timeout)

                    # Remove chunk from memory
                    del chunk

            else:
                raise Exception(
                    f"Could not determine source type for feature set "
                    f'"{feature_set.name}" with source type '
                    f'"{feature_set.source.source_type}"')

            # Print ingestion statistics
            producer.print_results()
        finally:
            # Remove parquet file(s) that were created earlier
            print("Removing temporary file(s)...")
            shutil.rmtree(dir_path)

        return ingestion_id

    def get_statistics(
        self,
        feature_set_id: str,
        store: str,
        features: List[str] = [],
        ingestion_ids: Optional[List[str]] = None,
        start_date: Optional[datetime.datetime] = None,
        end_date: Optional[datetime.datetime] = None,
        force_refresh: bool = False,
        project: Optional[str] = None,
    ) -> statistics_pb2.DatasetFeatureStatisticsList:
        """
        Retrieves the feature featureStatistics computed over the data in the batch
        stores.

        Args:
            feature_set_id: Feature set id to retrieve batch featureStatistics for. If project
                is not provided, the default ("default") will be used.
            store: Name of the store to retrieve feature featureStatistics over. This
                store must be a historical store.
            features: Optional list of feature names to filter from the results.
            ingestion_ids: Optional list of dataset Ids by which to filter data
                before retrieving featureStatistics. Cannot be used with start_date
                and end_date.
                If multiple dataset ids are provided, unaggregatable featureStatistics
                will be dropped.
            start_date: Optional start date over which to filter statistical data.
                Data from this date will be included.
                Cannot be used with dataset_ids. If the provided period spans
                multiple days, unaggregatable featureStatistics will be dropped.
            end_date: Optional end date over which to filter statistical data.
                Data from this data will not be included.
                Cannot be used with dataset_ids. If the provided period spans
                multiple days, unaggregatable featureStatistics will be dropped.
            force_refresh: Setting this flag to true will force a recalculation
                of featureStatistics and overwrite results currently in the cache, if any.
            project: Manual override for default project.

        Returns:
           Returns a tensorflow DatasetFeatureStatisticsList containing TFDV featureStatistics.
        """

        if ingestion_ids is not None and (start_date is not None
                                          or end_date is not None):
            raise ValueError(
                "Only one of dataset_id or [start_date, end_date] can be provided."
            )

        if project != "" and "/" not in feature_set_id:
            feature_set_id = f"{project}/{feature_set_id}"

        request = GetFeatureStatisticsRequest(
            feature_set_id=feature_set_id,
            features=features,
            store=store,
            force_refresh=force_refresh,
        )
        if ingestion_ids is not None:
            request.ingestion_ids.extend(ingestion_ids)
        else:
            if start_date is not None:
                request.start_date.CopyFrom(
                    Timestamp(seconds=int(start_date.timestamp())))
            if end_date is not None:
                request.end_date.CopyFrom(
                    Timestamp(seconds=int(end_date.timestamp())))

        return self._core_service.GetFeatureStatistics(
            request).dataset_feature_statistics_list

    def _get_grpc_metadata(self):
        """
        Returns a metadata tuple to attach to gRPC requests. This is primarily
        used when authentication is enabled but SSL/TLS is disabled.

        Returns: Tuple of metadata to attach to each gRPC call
        """
        if self._config.getboolean(
                CONFIG_ENABLE_AUTH_KEY) and self._auth_metadata:
            return self._auth_metadata.get_signed_meta()
        return ()
Beispiel #11
0
class Client:
    def __init__(self,
                 core_url: str = None,
                 serving_url: str = None,
                 verbose: bool = False):
        self._core_url = core_url
        self._serving_url = serving_url
        self._verbose = verbose
        self.__core_channel: grpc.Channel = None
        self.__serving_channel: grpc.Channel = None
        self._core_service_stub: CoreServiceStub = None
        self._serving_service_stub: ServingServiceStub = None

    @property
    def core_url(self) -> str:
        if self._core_url is not None:
            return self._core_url
        if os.getenv(FEAST_CORE_URL_ENV_KEY) is not None:
            return os.getenv(FEAST_CORE_URL_ENV_KEY)
        return ""

    @core_url.setter
    def core_url(self, value: str):
        self._core_url = value

    @property
    def serving_url(self) -> str:
        if self._serving_url is not None:
            return self._serving_url
        if os.getenv(FEAST_SERVING_URL_ENV_KEY) is not None:
            return os.getenv(FEAST_SERVING_URL_ENV_KEY)
        return ""

    @serving_url.setter
    def serving_url(self, value: str):
        self._serving_url = value

    def version(self):
        """
        Returns version information from Feast Core and Feast Serving
        :return: Dictionary containing Core and Serving versions and status
        """

        self._connect_core()
        self._connect_serving()

        core_version = ""
        serving_version = ""
        core_status = "not connected"
        serving_status = "not connected"

        try:
            core_version = self._core_service_stub.GetFeastCoreVersion(
                GetFeastCoreVersionRequest(),
                timeout=GRPC_CONNECTION_TIMEOUT_DEFAULT).version
            core_status = "connected"
        except grpc.RpcError as e:
            print(
                format_grpc_exception("GetFeastCoreVersion", e.code(),
                                      e.details()))

        try:
            serving_version = self._serving_service_stub.GetFeastServingInfo(
                GetFeastServingInfoRequest(),
                timeout=GRPC_CONNECTION_TIMEOUT_DEFAULT).version
            serving_status = "connected"
        except grpc.RpcError as e:
            print(
                format_grpc_exception("GetFeastServingInfo", e.code(),
                                      e.details()))

        return {
            "core": {
                "url": self.core_url,
                "version": core_version,
                "status": core_status,
            },
            "serving": {
                "url": self.serving_url,
                "version": serving_version,
                "status": serving_status,
            },
        }

    def _connect_core(self, skip_if_connected=True):
        """
        Connect to Core API
        """
        if skip_if_connected and self._core_service_stub:
            return

        if not self.core_url:
            raise ValueError("Please set Feast Core URL.")

        if self.__core_channel is None:
            self.__core_channel = grpc.insecure_channel(self.core_url)

        try:
            grpc.channel_ready_future(self.__core_channel).result(
                timeout=GRPC_CONNECTION_TIMEOUT_DEFAULT)
        except grpc.FutureTimeoutError:
            print(
                f"Connection timed out while attempting to connect to Feast Core gRPC server {self.core_url}"
            )
            sys.exit(1)
        else:
            self._core_service_stub = CoreServiceStub(self.__core_channel)

    def _connect_serving(self, skip_if_connected=True):
        """
        Connect to Serving API
        """

        if skip_if_connected and self._serving_service_stub:
            return

        if not self.serving_url:
            raise ValueError("Please set Feast Serving URL.")

        if self.__serving_channel is None:
            self.__serving_channel = grpc.insecure_channel(self.serving_url)

        try:
            grpc.channel_ready_future(self.__serving_channel).result(
                timeout=GRPC_CONNECTION_TIMEOUT_DEFAULT)
        except grpc.FutureTimeoutError:
            print(
                f"Connection timed out while attempting to connect to Feast Serving gRPC server {self.serving_url} "
            )
            sys.exit(1)
        else:
            self._serving_service_stub = ServingServiceStub(
                self.__serving_channel)

    def apply(self, feature_sets: Union[List[FeatureSet], FeatureSet]):
        """
        Idempotently registers feature set(s) with Feast Core. Either a single feature set or a list can be provided.
        :param feature_sets: Union[List[FeatureSet], FeatureSet]
        """
        if not isinstance(feature_sets, list):
            feature_sets = [feature_sets]
        for feature_set in feature_sets:
            if isinstance(feature_set, FeatureSet):
                self._apply_feature_set(feature_set)
                continue
            raise ValueError(
                f"Could not determine feature set type to apply {feature_set}")

    def _apply_feature_set(self, feature_set: FeatureSet):
        self._connect_core()
        feature_set._client = self

        valid, message = feature_set.is_valid()
        if not valid:
            raise Exception(message)
        try:
            apply_fs_response = self._core_service_stub.ApplyFeatureSet(
                ApplyFeatureSetRequest(feature_set=feature_set.to_proto()),
                timeout=GRPC_CONNECTION_TIMEOUT_APPLY,
            )  # type: ApplyFeatureSetResponse
            applied_fs = FeatureSet.from_proto(apply_fs_response.feature_set)

            if apply_fs_response.status == ApplyFeatureSetResponse.Status.CREATED:
                print(
                    f'Feature set updated/created: "{applied_fs.name}:{applied_fs.version}".'
                )
                feature_set._update_from_feature_set(applied_fs,
                                                     is_dirty=False)
                return
            if apply_fs_response.status == ApplyFeatureSetResponse.Status.NO_CHANGE:
                print(f"No change detected in feature set {feature_set.name}")
                return
        except grpc.RpcError as e:
            print(
                format_grpc_exception("ApplyFeatureSet", e.code(),
                                      e.details()))

    def list_feature_sets(self) -> List[FeatureSet]:
        """
        Retrieve a list of feature sets from Feast Core
        :return: Returns a list of feature sets
        """
        self._connect_core()

        try:
            # Get latest feature sets from Feast Core
            feature_set_protos = self._core_service_stub.ListFeatureSets(
                ListFeatureSetsRequest())  # type: ListFeatureSetsResponse
        except grpc.RpcError as e:
            raise Exception(
                format_grpc_exception("ListFeatureSets", e.code(),
                                      e.details()))

        # Store list of feature sets
        feature_sets = []
        for feature_set_proto in feature_set_protos.feature_sets:
            feature_set = FeatureSet.from_proto(feature_set_proto)
            feature_set._client = self
            feature_sets.append(feature_set)
        return feature_sets

    def get_feature_set(
            self,
            name: str,
            version: int = None,
            fail_if_missing: bool = False) -> Union[FeatureSet, None]:
        """
        Retrieve a single feature set from Feast Core
        :param name: (str) Name of feature set
        :param version: (int) Version of feature set
        :param fail_if_missing: (bool) Throws an exception if the feature set is not
         found
        :return: Returns a single feature set

        """
        self._connect_core()
        try:
            get_feature_set_response = self._core_service_stub.GetFeatureSet(
                GetFeatureSetRequest(
                    name=name.strip(),
                    version=str(version)))  # type: GetFeatureSetResponse
            feature_set = get_feature_set_response.feature_set
        except grpc.RpcError as e:
            print(format_grpc_exception("GetFeatureSet", e.code(),
                                        e.details()))
        else:
            if feature_set is not None:
                return FeatureSet.from_proto(feature_set)

            if fail_if_missing:
                raise Exception(
                    f'Could not find feature set with name "{name}" and '
                    f'version "{version}"')

    def list_entities(self) -> Dict[str, Entity]:
        """
        Returns a dictionary of entities across all feature sets
        :return: Dictionary of entity name to Entity
        """
        entities_dict = OrderedDict()
        for fs in self.list_feature_sets():
            for entity in fs.entities:
                entities_dict[entity.name] = entity
        return entities_dict

    def get_batch_features(self, feature_ids: List[str],
                           entity_rows: pd.DataFrame) -> Job:
        """
        Retrieves historical features from a Feast Serving deployment.

        Args:
            feature_ids: List of feature ids that will be returned for each entity.
            Each feature id should have the following format "feature_set_name:version:feature_name".

            entity_rows: Pandas dataframe containing entities and a 'datetime' column. Each entity in
            a feature set must be present as a column in this dataframe. The datetime column must
            contain timestamps in datetime64 format

        Returns:
            Feast batch retrieval job: feast.job.Job
            
        Example usage:
        ============================================================
        >>> from feast import Client
        >>> from datetime import datetime
        >>>
        >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566")
        >>> feature_ids = ["customer:1:bookings_7d"]
        >>> entity_rows = pd.DataFrame(
        >>>         {
        >>>            "datetime": [pd.datetime.now() for _ in range(3)],
        >>>            "customer": [1001, 1002, 1003],
        >>>         }
        >>>     )
        >>> feature_retrieval_job = feast_client.get_batch_features(feature_ids, entity_rows)
        >>> df = feature_retrieval_job.to_dataframe()
        >>> print(df)
        """

        self._connect_serving()

        try:
            fs_request = _build_feature_set_request(feature_ids)

            # Validate entity rows based on entities in Feast Core
            self._validate_entity_rows_for_batch_retrieval(
                entity_rows, fs_request)

            # We want the timestamp column naming to be consistent with the
            # rest of Feast
            entity_rows.columns = [
                "event_timestamp" if col == "datetime" else col
                for col in entity_rows.columns
            ]

            # Remove timezone from datetime column
            if isinstance(
                    entity_rows["event_timestamp"].dtype,
                    pd.core.dtypes.dtypes.DatetimeTZDtype,
            ):
                entity_rows["event_timestamp"] = pd.DatetimeIndex(
                    entity_rows["event_timestamp"]).tz_localize(None)

            # Retrieve serving information to determine store type and staging location
            serving_info = self._serving_service_stub.GetFeastServingInfo(
                GetFeastServingInfoRequest(),
                timeout=GRPC_CONNECTION_TIMEOUT_DEFAULT
            )  # type: GetFeastServingInfoResponse

            if serving_info.type != FeastServingType.FEAST_SERVING_TYPE_BATCH:
                raise Exception(
                    f'You are connected to a store "{self._serving_url}" which does not support batch retrieval'
                )

            # Export and upload entity row dataframe to staging location provided by Feast
            staged_file = export_dataframe_to_staging_location(
                entity_rows, serving_info.job_staging_location)  # type: str

            request = GetBatchFeaturesRequest(
                feature_sets=fs_request,
                dataset_source=DatasetSource(
                    file_source=DatasetSource.FileSource(
                        file_uris=[staged_file],
                        data_format=DataFormat.DATA_FORMAT_AVRO)),
            )

            # Retrieve Feast Job object to manage life cycle of retrieval
            response = self._serving_service_stub.GetBatchFeatures(request)
            return Job(response.job, self._serving_service_stub)

        except grpc.RpcError as e:
            print(
                format_grpc_exception("GetBatchFeatures", e.code(),
                                      e.details()))

    def _validate_entity_rows_for_batch_retrieval(self, entity_rows,
                                                  feature_sets_request):
        """
        Validate whether an entity_row dataframe contains the correct information for batch retrieval
        :param entity_rows: Pandas dataframe containing entities and datetime column. Each entity in a feature set
        must be present as a column in this dataframe.
        :param feature_sets_request: Feature sets that will
        """

        # Ensure datetime column exists
        if "datetime" not in entity_rows.columns:
            raise ValueError(
                f'Entity rows does not contain "datetime" column in columns {entity_rows.columns}'
            )

        # Validate dataframe columns based on feature set entities
        for feature_set in feature_sets_request:
            fs = self.get_feature_set(name=feature_set.name,
                                      version=feature_set.version)
            if fs is None:
                raise ValueError(
                    f'Feature set "{feature_set.name}:{feature_set.version}" could not be found'
                )
            for entity_type in fs.entities:
                if entity_type.name not in entity_rows.columns:
                    raise ValueError(
                        f'Dataframe does not contain entity "{entity_type.name}" column in columns "{entity_rows.columns}"'
                    )

    def get_online_features(
        self,
        feature_ids: List[str],
        entity_rows: List[GetOnlineFeaturesRequest.EntityRow],
    ) -> GetOnlineFeaturesResponse:
        """
        Retrieves the latest online feature data from Feast Serving
        :param feature_ids: List of feature Ids in the following format
                            [feature_set_name]:[version]:[feature_name]
                            example: ["feature_set_1:6:my_feature_1",
                                     "feature_set_1:6:my_feature_2",]

        :param entity_rows: List of GetFeaturesRequest.EntityRow where each row
                            contains entities. Timestamp should not be set for
                            online retrieval. All entity types within a feature
                            set must be provided for each entity key.
        :return: Returns a list of maps where each item in the list contains
                 the latest feature values for the provided entities
        """
        self._connect_serving()

        try:
            response = self._serving_service_stub.GetOnlineFeatures(
                GetOnlineFeaturesRequest(
                    feature_sets=_build_feature_set_request(feature_ids),
                    entity_rows=entity_rows,
                ))  # type: GetOnlineFeaturesResponse
        except grpc.RpcError as e:
            print(
                format_grpc_exception("GetOnlineFeatures", e.code(),
                                      e.details()))
        else:
            return response

    def ingest(
        self,
        feature_set: Union[str, FeatureSet],
        dataframe: pd.DataFrame,
        version: int = None,
        force_update: bool = False,
        max_workers: int = CPU_COUNT,
        disable_progress_bar: bool = False,
        chunk_size: int = 5000,
    ):
        """
        Loads data into Feast for a specific feature set.

        :param feature_set: (str, FeatureSet) Feature set object or the
        string name of the feature set (without a version)
        :param dataframe:
        Pandas dataframe to load into Feast for this feature set
        :param
        version: (int) Version of the feature set for which this ingestion
        should happen
        :param force_update: (bool) Automatically update
        feature set based on data frame before ingesting data
        :param max_workers: Number of
        worker processes to use to encode the dataframe
        :param
        disable_progress_bar: Disable progress bar during ingestion
        :param
        chunk_size: Number of rows per chunk to encode before ingesting to
        Feast
        """
        if isinstance(feature_set, FeatureSet):
            name = feature_set.name
            if version is None:
                version = feature_set.version
        elif isinstance(feature_set, str):
            name = feature_set
        else:
            raise Exception(f"Feature set name must be provided")

        feature_set = self.get_feature_set(name, version, fail_if_missing=True)

        # Update the feature set based on dataframe schema
        if force_update:
            feature_set.infer_fields_from_df(dataframe,
                                             discard_unused_fields=True,
                                             replace_existing_features=True)
            self.apply(feature_set)

        if feature_set.source.source_type == "Kafka":
            ingest_kafka(
                feature_set=feature_set,
                dataframe=dataframe,
                max_workers=max_workers,
                disable_progress_bar=disable_progress_bar,
                chunk_size=chunk_size,
            )
        else:
            raise Exception(f"Could not determine source type for feature set "
                            f'"{feature_set.name}" with source type '
                            f'"{feature_set.source.source_type}"')
Beispiel #12
0
class Client:
    def __init__(self, core_url=None, serving_url=None, verbose=False):
        """Create an instance of Feast client which is connected to feast
        endpoint specified in the parameter. If no url is provided, the
        client will default to the url specified in the environment variable
        FEAST_CORE_URL.

        Args:
            core_url (str, optional): feast's grpc endpoint URL
                                  (e.g.: "my.feast.com:8433")
            serving_url (str, optional): feast serving's grpc endpoint URL
                                  (e.g.: "my.feast.com:8433")
        """

        if core_url is None:
            core_url = os.getenv(FEAST_CORE_URL_ENV_KEY)
        self._core_url = core_url

        if serving_url is None:
            serving_url = os.getenv(FEAST_SERVING_URL_ENV_KEY)
        self._serving_url = serving_url

        self.__core_channel = None
        self.__serving_channel = None
        self._core_service_stub = None
        self._job_service_stub = None
        self._dataset_service_stub = None
        self._serving_service_stub = None

        self._verbose = verbose
        self._table_downloader = TableDownloader()

    @property
    def core_url(self):
        if self._core_url is None:
            self._core_url = os.getenv(FEAST_CORE_URL_ENV_KEY)
            if self._core_url is None:
                raise ValueError(
                    "Core API URL not set. Either set the " +
                    "environment variable {} or set it explicitly.".format(
                        FEAST_CORE_URL_ENV_KEY))
        return self._core_url

    @core_url.setter
    def core_url(self, value):
        self._core_url = value

    @property
    def serving_url(self):
        if self._serving_url is None:
            self._serving_url = os.getenv(FEAST_SERVING_URL_ENV_KEY)
            if self._serving_url is None:
                raise ValueError(
                    "Serving API URL not set. Either set the " +
                    "environment variable {} or set it explicitly.".format(
                        FEAST_SERVING_URL_ENV_KEY))
        return self._serving_url

    @serving_url.setter
    def serving_url(self, value):
        self._serving_url = value

    @property
    def verbose(self):
        return self._verbose

    @verbose.setter
    def verbose(self, val):
        if not isinstance(val, bool):
            raise TypeError("verbose should be a boolean value")
        self._verbose = val

    def apply(self, obj):
        """Create or update one or many feast's resource
        (feature, entity, importer, storage).

        Args:
            obj (object): one or many feast's resource
            // create_entity (bool, optional):  (default: {None})
            // create_features (bool, optional): [description] (default: {None})
        """
        if isinstance(obj, list):
            ids = []
            for resource in obj:
                ids.append(self._apply(resource))
            return ids
        else:
            return self._apply(obj)

    def run(self,
            importer,
            name_override=None,
            apply_entity=False,
            apply_features=False):
        """
        Run an import job
        Args:
            importer (feast.sdk.importer.Importer): importer instance
            name_override (str, optional): Job name override
            apply_entity (bool, optional): (default: False) create/update
                entity inside importer
            apply_features (bool, optional): (default: False) create/update
                features inside importer

        Returns:
            (str) job ID of the import job
        """
        request = JobServiceTypes.SubmitImportJobRequest(
            importSpec=importer.spec)
        if name_override is not None:
            request.name = name_override

        if apply_entity:
            self._apply_entity(importer.entity)
        if apply_features:
            for feature in importer.features:
                self._apply_feature(importer.features[feature])

        if importer.require_staging:
            print("Staging file to remote path {}".format(
                importer.remote_path))
            importer.stage()
        print("Submitting job with spec:\n {}".format(
            spec_to_yaml(importer.spec)))
        self._connect_core()
        response = self._job_service_stub.SubmitJob(request)
        print("Submitted job with id: {}".format(response.jobId))
        return response.jobId

    def create_dataset(self,
                       feature_set,
                       start_date,
                       end_date,
                       limit=None,
                       name_prefix=None):
        """
        Create training dataset for a feature set. The training dataset
        will be bounded by event timestamp between start_date and end_date.
        Specify limit to limit number of row returned. The training dataset
        will reside in a bigquery table specified by destination.

        Args:
            feature_set (feast.sdk.resources.feature_set.FeatureSet):
                feature set representing the data wanted
            start_date (str): starting date of the training data in ISO 8601
                format (e.g.: "2018-12-31")
            end_date (str): end date of training data in ISO 8601 format (e.g.:
                "2018-12-31")
            limit (int, optional): (default: None) maximum number of row
                returned
            name_prefix (str, optional): (default: None) name prefix.
        :return:
            feast.resources.feature_set.DatasetInfo: DatasetInfo containing
            the information of training dataset
        """
        self._check_create_dataset_args(feature_set, start_date, end_date,
                                        limit)

        req = DatasetServiceTypes.CreateDatasetRequest(
            featureSet=feature_set.proto,
            startDate=_timestamp_from_datetime(_parse_date(start_date)),
            endDate=_timestamp_from_datetime(_parse_date(end_date)),
            limit=limit,
            namePrefix=name_prefix,
        )
        if self.verbose:
            print("creating training dataset for features: " +
                  str(feature_set.features))
        self._connect_core()
        resp = self._dataset_service_stub.CreateDataset(req)

        if self.verbose:
            print("created dataset {}: {}".format(resp.datasetInfo.name,
                                                  resp.datasetInfo.tableUrl))
        return DatasetInfo(resp.datasetInfo.name, resp.datasetInfo.tableUrl)

    def get_serving_data(self, feature_set, entity_keys, ts_range=None):
        """Get feature value from feast serving API.

        If server_url is not provided, the value stored in the environment variable
        FEAST_SERVING_URL is used to connect to the serving server instead.

        Args:
            feature_set (feast.sdk.resources.feature_set.FeatureSet): feature set
                representing the data wanted
            entity_keys (:obj: `list` of :obj: `str): list of entity keys
            ts_range (:obj: `list` of str, optional): size 2 list of start
                and end time, in datetime type. It will
                filter out any feature value having event timestamp outside
                of the ts_range.

        Returns:
            pandas.DataFrame: DataFrame of results
        """
        start = None
        end = None
        if ts_range is not None:
            if len(ts_range) != 2:
                raise ValueError("ts_range must have len 2")
            start = ts_range[0]
            end = ts_range[1]
            if type(start) is not datetime or type(end) is not datetime:
                raise TypeError("start and end must be datetime type")

        request = self._build_serving_request(feature_set, entity_keys)
        self._connect_serving()
        return self._response_to_df(
            feature_set, self._serving_service_stub.QueryFeatures(request),
            start, end)

    def download_dataset(self,
                         dataset_info,
                         dest,
                         staging_location,
                         file_type=FileType.CSV):
        """
        Download training dataset as file
        Args:
            dataset_info (feast.sdk.resources.feature_set.DatasetInfo) :
                dataset_info to be downloaded
            dest (str): destination's file path
            staging_location (str): url to staging_location (currently
                support a folder in GCS)
            file_type (feast.sdk.resources.feature_set.FileType): (default:
                FileType.CSV) exported file format
        Returns:
            str: path to the downloaded file
        """
        return self._table_downloader.download_table_as_file(
            dataset_info.full_table_id, dest, staging_location, file_type)

    def download_dataset_to_df(self, dataset_info, staging_location):
        """
        Download training dataset as Pandas Dataframe
        Args:
            dataset_info (feast.sdk.resources.feature_set.DatasetInfo) :
                dataset_info to be downloaded
            staging_location: url to staging_location (currently
                support a folder in GCS)

        Returns: pandas.DataFrame: dataframe of the training dataset

        """
        return self._table_downloader.download_table_as_df(
            dataset_info.full_table_id, staging_location)

    def close(self):
        """
        Close underlying connection to Feast's core and serving end points.
        """
        self.__core_channel.close()
        self.__core_channel = None
        self.__serving_channel.close()
        self.__serving_channel = None

    def _connect_core(self):
        """Connect to core api"""
        if self.__core_channel is None:
            self.__core_channel = grpc.insecure_channel(self.core_url)
            self._core_service_stub = CoreServiceStub(self.__core_channel)
            self._job_service_stub = JobServiceStub(self.__core_channel)
            self._dataset_service_stub = DatasetServiceStub(
                self.__core_channel)

    def _connect_serving(self):
        """Connect to serving api"""
        if self.__serving_channel is None:
            self.__serving_channel = grpc.insecure_channel(self.serving_url)
            self._serving_service_stub = ServingAPIStub(self.__serving_channel)

    def _build_serving_request(self, feature_set, entity_keys):
        """Helper function to build serving service request."""
        return QueryFeaturesRequest(
            entityName=feature_set.entity,
            entityId=entity_keys,
            featureId=feature_set.features,
        )

    def _response_to_df(self, feature_set, response, start=None, end=None):
        is_filter_time = start is not None and end is not None
        df = pd.DataFrame(columns=[feature_set.entity] + feature_set.features)
        dtypes = {}
        for entity_id in response.entities:
            feature_map = response.entities[entity_id].features
            row = {response.entityName: entity_id}
            for feature_id in feature_map:
                v = feature_map[feature_id].value
                if is_filter_time:
                    ts = feature_map[feature_id].timestamp.ToDatetime()
                    if ts < start or ts > end:
                        continue
                feast_valuetype = v.WhichOneof("val")
                if feast_valuetype not in dtypes:
                    dtypes[feature_id] = types.FEAST_VALUETYPE_TO_DTYPE[
                        feast_valuetype]
                v = getattr(v, v.WhichOneof("val"))
                row[feature_id] = v
            df = df.append(row, ignore_index=True)
        return df.astype(dtypes).reset_index(drop=True)

    def _apply(self, obj):
        """Applies a single object to feast core.

        Args:
            obj (object): one of
                [Feature, Entity, FeatureGroup, Storage, Importer]
        """
        if isinstance(obj, Feature):
            return self._apply_feature(obj)
        elif isinstance(obj, Entity):
            return self._apply_entity(obj)
        elif isinstance(obj, FeatureGroup):
            return self._apply_feature_group(obj)
        elif isinstance(obj, Storage):
            return self._apply_storage(obj)
        else:
            raise TypeError("Apply can only be passed one of the following \
            types: [Feature, Entity, FeatureGroup, Storage, Importer]")

    def _apply_feature(self, feature):
        """Apply the feature to the core API

        Args:
            feature (feast.sdk.resources.feature.Feature): feature to apply
        """
        self._connect_core()
        response = self._core_service_stub.ApplyFeature(feature.spec)
        if self.verbose:
            print("Successfully applied feature with id: {}\n---\n{}".format(
                response.featureId, feature))
        return response.featureId

    def _apply_entity(self, entity):
        """Apply the entity to the core API

        Args:
            entity (feast.sdk.resources.entity.Entity): entity to apply
        """
        self._connect_core()
        response = self._core_service_stub.ApplyEntity(entity.spec)
        if self.verbose:
            print("Successfully applied entity with name: {}\n---\n{}".format(
                response.entityName, entity))
        return response.entityName

    def _apply_feature_group(self, feature_group):
        """Apply the feature group to the core API

        Args:
            feature_group (feast.sdk.resources.feature_group.FeatureGroup):
                feature group to apply
        """
        self._connect_core()
        response = self._core_service_stub.ApplyFeatureGroup(
            feature_group.spec)
        if self.verbose:
            print("Successfully applied feature group with id: " +
                  "{}\n---\n{}".format(response.featureGroupId, feature_group))
        return response.featureGroupId

    def _apply_storage(self, storage):
        """Apply the storage to the core API

        Args:
            storage (feast.sdk.resources.storage.Storage): storage to apply
        """
        self._connect_core()
        response = self._core_service_stub.ApplyStorage(storage.spec)
        if self.verbose:
            print("Successfully applied storage with id: " +
                  "{}\n{}".format(response.storageId, storage))
        return response.storageId

    def _check_create_dataset_args(self, feature_set, start_date, end_date,
                                   limit):
        if len(feature_set.features) < 1:
            raise ValueError("feature set is empty")

        start = _parse_date(start_date)
        end = _parse_date(end_date)
        if end < start:
            raise ValueError("end_date is before start_date")

        if limit is not None and limit < 1:
            raise ValueError("limit is not a positive integer")
Beispiel #13
0
class Client:
    """
    Feast Client: Used for creating, managing, and retrieving features.
    """
    def __init__(self,
                 core_url: str = None,
                 serving_url: str = None,
                 verbose: bool = False):
        """
        The Feast Client should be initialized with at least one service url

        Args:
            core_url: Feast Core URL. Used to manage features
            serving_url: Feast Serving URL. Used to retrieve features
            verbose: Enable verbose logging
        """
        self._core_url = core_url
        self._serving_url = serving_url
        self._verbose = verbose
        self.__core_channel: grpc.Channel = None
        self.__serving_channel: grpc.Channel = None
        self._core_service_stub: CoreServiceStub = None
        self._serving_service_stub: ServingServiceStub = None

    @property
    def core_url(self) -> str:
        """
        Retrieve Feast Core URL
        """

        if self._core_url is not None:
            return self._core_url
        if os.getenv(FEAST_CORE_URL_ENV_KEY) is not None:
            return os.getenv(FEAST_CORE_URL_ENV_KEY)
        return ""

    @core_url.setter
    def core_url(self, value: str):
        """
        Set the Feast Core URL

        Returns:
            Feast Core URL string
        """
        self._core_url = value

    @property
    def serving_url(self) -> str:
        """
        Retrieve Serving Core URL
        """
        if self._serving_url is not None:
            return self._serving_url
        if os.getenv(FEAST_SERVING_URL_ENV_KEY) is not None:
            return os.getenv(FEAST_SERVING_URL_ENV_KEY)
        return ""

    @serving_url.setter
    def serving_url(self, value: str):
        """
        Set the Feast Serving URL

        Returns:
            Feast Serving URL string
        """
        self._serving_url = value

    def version(self):
        """
        Returns version information from Feast Core and Feast Serving
        """
        result = {}

        if self.serving_url:
            self._connect_serving()
            serving_version = self._serving_service_stub.GetFeastServingInfo(
                GetFeastServingInfoRequest(),
                timeout=GRPC_CONNECTION_TIMEOUT_DEFAULT).version
            result["serving"] = {
                "url": self.serving_url,
                "version": serving_version
            }

        if self.core_url:
            self._connect_core()
            core_version = self._core_service_stub.GetFeastCoreVersion(
                GetFeastCoreVersionRequest(),
                timeout=GRPC_CONNECTION_TIMEOUT_DEFAULT).version
            result["core"] = {"url": self.core_url, "version": core_version}

        return result

    def _connect_core(self, skip_if_connected: bool = True):
        """
        Connect to Core API

        Args:
            skip_if_connected: Do not attempt to connect if already connected
        """
        if skip_if_connected and self._core_service_stub:
            return

        if not self.core_url:
            raise ValueError("Please set Feast Core URL.")

        if self.__core_channel is None:
            self.__core_channel = grpc.insecure_channel(self.core_url)

        try:
            grpc.channel_ready_future(self.__core_channel).result(
                timeout=GRPC_CONNECTION_TIMEOUT_DEFAULT)
        except grpc.FutureTimeoutError:
            raise ConnectionError(
                f"Connection timed out while attempting to connect to Feast "
                f"Core gRPC server {self.core_url} ")
        else:
            self._core_service_stub = CoreServiceStub(self.__core_channel)

    def _connect_serving(self, skip_if_connected=True):
        """
        Connect to Serving API

        Args:
            skip_if_connected: Do not attempt to connect if already connected
        """

        if skip_if_connected and self._serving_service_stub:
            return

        if not self.serving_url:
            raise ValueError("Please set Feast Serving URL.")

        if self.__serving_channel is None:
            self.__serving_channel = grpc.insecure_channel(self.serving_url)

        try:
            grpc.channel_ready_future(self.__serving_channel).result(
                timeout=GRPC_CONNECTION_TIMEOUT_DEFAULT)
        except grpc.FutureTimeoutError:
            raise ConnectionError(
                f"Connection timed out while attempting to connect to Feast "
                f"Serving gRPC server {self.serving_url} ")
        else:
            self._serving_service_stub = ServingServiceStub(
                self.__serving_channel)

    def apply(self, feature_sets: Union[List[FeatureSet], FeatureSet]):
        """
        Idempotently registers feature set(s) with Feast Core. Either a single
        feature set or a list can be provided.

        Args:
            feature_sets: List of feature sets that will be registered
        """
        if not isinstance(feature_sets, list):
            feature_sets = [feature_sets]
        for feature_set in feature_sets:
            if isinstance(feature_set, FeatureSet):
                self._apply_feature_set(feature_set)
                continue
            raise ValueError(
                f"Could not determine feature set type to apply {feature_set}")

    def _apply_feature_set(self, feature_set: FeatureSet):
        """
        Registers a single feature set with Feast

        Args:
            feature_set: Feature set that will be registered
        """
        self._connect_core()
        feature_set._client = self

        feature_set.is_valid()

        # Convert the feature set to a request and send to Feast Core
        apply_fs_response = self._core_service_stub.ApplyFeatureSet(
            ApplyFeatureSetRequest(feature_set=feature_set.to_proto()),
            timeout=GRPC_CONNECTION_TIMEOUT_APPLY,
        )  # type: ApplyFeatureSetResponse

        # Extract the returned feature set
        applied_fs = FeatureSet.from_proto(apply_fs_response.feature_set)

        # If the feature set has changed, update the local copy
        if apply_fs_response.status == ApplyFeatureSetResponse.Status.CREATED:
            print(
                f'Feature set updated/created: "{applied_fs.name}:{applied_fs.version}"'
            )

        # If no change has been applied, do nothing
        if apply_fs_response.status == ApplyFeatureSetResponse.Status.NO_CHANGE:
            print(f"No change detected or applied: {feature_set.name}")

        # Deep copy from the returned feature set to the local feature set
        feature_set._update_from_feature_set(applied_fs)

    def list_feature_sets(self) -> List[FeatureSet]:
        """
        Retrieve a list of feature sets from Feast Core

        Returns:
            List of feature sets
        """
        self._connect_core()

        # Get latest feature sets from Feast Core
        feature_set_protos = self._core_service_stub.ListFeatureSets(
            ListFeatureSetsRequest())  # type: ListFeatureSetsResponse

        # Extract feature sets and return
        feature_sets = []
        for feature_set_proto in feature_set_protos.feature_sets:
            feature_set = FeatureSet.from_proto(feature_set_proto)
            feature_set._client = self
            feature_sets.append(feature_set)
        return feature_sets

    def get_feature_set(self,
                        name: str,
                        version: int = None) -> Union[FeatureSet, None]:
        """
        Retrieves a feature set. If no version is specified then the latest
        version will be returned.

        Args:
            name: Name of feature set
            version: Version of feature set

        Returns:
            Returns either the specified feature set, or raises an exception if
            none is found
        """
        self._connect_core()

        if version is None:
            version = 0
        get_feature_set_response = self._core_service_stub.GetFeatureSet(
            GetFeatureSetRequest(
                name=name.strip(),
                version=int(version)))  # type: GetFeatureSetResponse
        return FeatureSet.from_proto(get_feature_set_response.feature_set)

    def list_entities(self) -> Dict[str, Entity]:
        """
        Returns a dictionary of entities across all feature sets

        Returns:
            Dictionary of entities, indexed by name
        """
        entities_dict = OrderedDict()
        for fs in self.list_feature_sets():
            for entity in fs.entities:
                entities_dict[entity.name] = entity
        return entities_dict

    def get_batch_features(self, feature_ids: List[str],
                           entity_rows: Union[pd.DataFrame, str]) -> Job:
        """
        Retrieves historical features from a Feast Serving deployment.

        Args:
            feature_ids (List[str]):
                List of feature ids that will be returned for each entity.
                Each feature id should have the following format
                "feature_set_name:version:feature_name".

            entity_rows (Union[pd.DataFrame, str]):
                Pandas dataframe containing entities and a 'datetime' column.
                Each entity in a feature set must be present as a column in this
                dataframe. The datetime column must contain timestamps in
                datetime64 format.

        Returns:
            feast.job.Job:
                Returns a job object that can be used to monitor retrieval
                progress asynchronously, and can be used to materialize the
                results.

        Examples:
            >>> from feast import Client
            >>> from datetime import datetime
            >>>
            >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566")
            >>> feature_ids = ["customer:1:bookings_7d"]
            >>> entity_rows = pd.DataFrame(
            >>>         {
            >>>            "datetime": [pd.datetime.now() for _ in range(3)],
            >>>            "customer": [1001, 1002, 1003],
            >>>         }
            >>>     )
            >>> feature_retrieval_job = feast_client.get_batch_features(feature_ids, entity_rows)
            >>> df = feature_retrieval_job.to_dataframe()
            >>> print(df)
        """

        self._connect_serving()

        fs_request = _build_feature_set_request(feature_ids)

        # Retrieve serving information to determine store type and
        # staging location
        serving_info = self._serving_service_stub.GetFeastServingInfo(
            GetFeastServingInfoRequest(),
            timeout=GRPC_CONNECTION_TIMEOUT_DEFAULT
        )  # type: GetFeastServingInfoResponse

        if serving_info.type != FeastServingType.FEAST_SERVING_TYPE_BATCH:
            raise Exception(
                f'You are connected to a store "{self._serving_url}" which '
                f"does not support batch retrieval ")

        if isinstance(entity_rows, pd.DataFrame):
            # Pandas DataFrame detected
            # Validate entity rows to based on entities in Feast Core
            self._validate_dataframe_for_batch_retrieval(
                entity_rows=entity_rows, feature_sets_request=fs_request)

            # Remove timezone from datetime column
            if isinstance(entity_rows["datetime"].dtype,
                          pd.core.dtypes.dtypes.DatetimeTZDtype):
                entity_rows["datetime"] = pd.DatetimeIndex(
                    entity_rows["datetime"]).tz_localize(None)
        elif isinstance(entity_rows, str):
            # String based source
            if entity_rows.endswith((".avro", "*")):
                # Validate Avro entity rows to based on entities in Feast Core
                self._validate_avro_for_batch_retrieval(
                    source=entity_rows, feature_sets_request=fs_request)
            else:
                raise Exception(
                    f"Only .avro and wildcard paths are accepted as entity_rows"
                )
        else:
            raise Exception(f"Only pandas.DataFrame and str types are allowed"
                            f" as entity_rows, but got {type(entity_rows)}.")

        # Export and upload entity row DataFrame to staging location
        # provided by Feast
        staged_files = export_source_to_staging_location(
            entity_rows, serving_info.job_staging_location)  # type: List[str]

        request = GetBatchFeaturesRequest(
            feature_sets=fs_request,
            dataset_source=DatasetSource(file_source=DatasetSource.FileSource(
                file_uris=staged_files,
                data_format=DataFormat.DATA_FORMAT_AVRO)),
        )

        # Retrieve Feast Job object to manage life cycle of retrieval
        response = self._serving_service_stub.GetBatchFeatures(request)
        return Job(response.job, self._serving_service_stub)

    def _validate_dataframe_for_batch_retrieval(self,
                                                entity_rows: pd.DataFrame,
                                                feature_sets_request):
        """
        Validate whether an the entity rows in a DataFrame contains the correct
        information for batch retrieval.

        Datetime column must be present in the DataFrame.

        Args:
            entity_rows (pd.DataFrame):
                Pandas DataFrame containing entities and datetime column. Each
                entity in a feature set must be present as a column in this
                DataFrame.

            feature_sets_request:
                Feature sets that will be requested.
        """

        self._validate_columns(columns=entity_rows.columns,
                               feature_sets_request=feature_sets_request,
                               datetime_field="datetime")

    def _validate_avro_for_batch_retrieval(self, source: str,
                                           feature_sets_request):
        """
        Validate whether the entity rows in an Avro source file contains the
        correct information for batch retrieval.

        Only gs:// and local files (file://) uri schemes are allowed.

        Avro file must have a column named "event_timestamp".

        No checks will be done if a GCS path is provided.

        Args:
            source (str):
                File path to Avro.

            feature_sets_request:
                Feature sets that will be requested.
        """
        p = urlparse(source)

        if p.scheme == "gs":
            # GCS path provided (Risk is delegated to user)
            # No validation if GCS path is provided
            return
        elif p.scheme == "file" or not p.scheme:
            # Local file (file://) provided
            file_path = os.path.abspath(os.path.join(p.netloc, p.path))
        else:
            raise Exception(
                f"Unsupported uri scheme provided {p.scheme}, only "
                f"local files (file://), and gs:// schemes are "
                f"allowed")

        with open(file_path, "rb") as f:
            reader = fastavro.reader(f)
            schema = json.loads(reader.metadata["avro.schema"])
            columns = [x["name"] for x in schema["fields"]]
            self._validate_columns(columns=columns,
                                   feature_sets_request=feature_sets_request,
                                   datetime_field="event_timestamp")

    def _validate_columns(self, columns: List[str], feature_sets_request,
                          datetime_field: str) -> None:
        """
        Check if the required column contains the correct values for batch
        retrieval.

        Args:
            columns (List[str]):
                List of columns to validate against feature_sets_request.

            feature_sets_request ():
                Feature sets that will be requested.

            datetime_field (str):
                Name of the datetime field that must be enforced and present as
                a column in the data source.

        Returns:
            None:
                None
        """
        # Ensure datetime column exists
        if datetime_field not in columns:
            raise ValueError(
                f'Entity rows does not contain "{datetime_field}" column in '
                f'columns {columns}')

        # Validate Avro columns based on feature set entities
        for feature_set in feature_sets_request:
            fs = self.get_feature_set(name=feature_set.name,
                                      version=feature_set.version)
            if fs is None:
                raise ValueError(
                    f'Feature set "{feature_set.name}:{feature_set.version}" '
                    f"could not be found")
            for entity_type in fs.entities:
                if entity_type.name not in columns:
                    raise ValueError(
                        f'Input does not contain entity'
                        f' "{entity_type.name}" column in columns "{columns}"')

    def get_online_features(
        self,
        feature_ids: List[str],
        entity_rows: List[GetOnlineFeaturesRequest.EntityRow],
    ) -> GetOnlineFeaturesResponse:
        """
        Retrieves the latest online feature data from Feast Serving

        Args:
            feature_ids: List of feature Ids in the following format
                [feature_set_name]:[version]:[feature_name]
                example:
                    ["feature_set_1:6:my_feature_1",
                    "feature_set_1:6:my_feature_2",]
            entity_rows: List of GetFeaturesRequest.EntityRow where each row
                contains entities. Timestamp should not be set for online
                retrieval. All entity types within a feature

        Returns:
            Returns a list of maps where each item in the list contains the
            latest feature values for the provided entities
        """

        self._connect_serving()

        return self._serving_service_stub.GetOnlineFeatures(
            GetOnlineFeaturesRequest(
                feature_sets=_build_feature_set_request(feature_ids),
                entity_rows=entity_rows,
            ))  # type: GetOnlineFeaturesResponse

    def ingest(self,
               feature_set: Union[str, FeatureSet],
               source: Union[pd.DataFrame, str],
               chunk_size: int = 10000,
               version: int = None,
               force_update: bool = False,
               max_workers: int = max(CPU_COUNT - 1, 1),
               disable_progress_bar: bool = False,
               timeout: int = KAFKA_CHUNK_PRODUCTION_TIMEOUT) -> None:
        """
        Loads feature data into Feast for a specific feature set.

        Args:
            feature_set (typing.Union[str, FeatureSet]):
                Feature set object or the string name of the feature set
                (without a version).

            source (typing.Union[pd.DataFrame, str]):
                Either a file path or Pandas Dataframe to ingest into Feast
                Files that are currently supported:
                    * parquet
                    * csv
                    * json

            chunk_size (int):
                Amount of rows to load and ingest at a time.

            version (int):
                Feature set version.

            force_update (bool):
                Automatically update feature set based on source data prior to
                ingesting. This will also register changes to Feast.

            max_workers (int):
                Number of worker processes to use to encode values.

            disable_progress_bar (bool):
                Disable printing of progress statistics.

            timeout (int):
                Timeout in seconds to wait for completion.

        Returns:
            None:
                None
        """

        if isinstance(feature_set, FeatureSet):
            name = feature_set.name
            if version is None:
                version = feature_set.version
        elif isinstance(feature_set, str):
            name = feature_set
        else:
            raise Exception(f"Feature set name must be provided")

        # Read table and get row count
        tmp_table_name = _read_table_from_source(source, chunk_size,
                                                 max_workers)

        pq_file = pq.ParquetFile(tmp_table_name)

        row_count = pq_file.metadata.num_rows

        # Update the feature set based on PyArrow table of first row group
        if force_update:
            feature_set.infer_fields_from_pa(table=pq_file.read_row_group(0),
                                             discard_unused_fields=True,
                                             replace_existing_features=True)
            self.apply(feature_set)
        current_time = time.time()

        print("Waiting for feature set to be ready for ingestion...")
        while True:
            if timeout is not None and time.time() - current_time >= timeout:
                raise TimeoutError(
                    "Timed out waiting for feature set to be ready")
            feature_set = self.get_feature_set(name, version)
            if (feature_set is not None
                    and feature_set.status == FeatureSetStatus.STATUS_READY):
                break
            time.sleep(3)

        if timeout is not None:
            timeout = timeout - int(time.time() - current_time)

        try:
            # Kafka configs
            brokers = feature_set.get_kafka_source_brokers()
            topic = feature_set.get_kafka_source_topic()
            producer = get_producer(brokers, row_count, disable_progress_bar)

            # Loop optimization declarations
            produce = producer.produce
            flush = producer.flush

            # Transform and push data to Kafka
            if feature_set.source.source_type == "Kafka":
                for chunk in get_feature_row_chunks(
                        file=tmp_table_name,
                        row_groups=list(range(pq_file.num_row_groups)),
                        fs=feature_set,
                        max_workers=max_workers):

                    # Push FeatureRow one chunk at a time to kafka
                    for serialized_row in chunk:
                        produce(topic=topic, value=serialized_row)

                    # Force a flush after each chunk
                    flush(timeout=timeout)

                    # Remove chunk from memory
                    del chunk

            else:
                raise Exception(
                    f"Could not determine source type for feature set "
                    f'"{feature_set.name}" with source type '
                    f'"{feature_set.source.source_type}"')

            # Print ingestion statistics
            producer.print_results()
        finally:
            # Remove parquet file(s) that were created earlier
            print("Removing temporary file(s)...")
            os.remove(tmp_table_name)

        return None
Beispiel #14
0
class Client:
    """
    Feast Client: Used for creating, managing, and retrieving features.
    """
    def __init__(self, options: Optional[Dict[str, str]] = None, **kwargs):
        """
        The Feast Client should be initialized with at least one service url
        Please see constants.py for configuration options. Commonly used options
        or arguments include:
            core_url: Feast Core URL. Used to manage features
            serving_url: Feast Serving URL. Used to retrieve features
            project: Sets the active project. This field is optional.
            core_secure: Use client-side SSL/TLS for Core gRPC API
            serving_secure: Use client-side SSL/TLS for Serving gRPC API
            enable_auth: Enable authentication and authorization
            auth_provider: Authentication provider – "google" or "oauth"
            if auth_provider is "oauth", the following fields are mandatory –
            oauth_grant_type, oauth_client_id, oauth_client_secret, oauth_audience, oauth_token_request_url

        Args:
            options: Configuration options to initialize client with
            **kwargs: Additional keyword arguments that will be used as
                configuration options along with "options"
        """

        if options is None:
            options = dict()
        self._config = Config(options={**options, **kwargs})

        self._core_service_stub: Optional[CoreServiceStub] = None
        self._serving_service_stub: Optional[ServingServiceStub] = None
        self._auth_metadata: Optional[grpc.AuthMetadataPlugin] = None

        # Configure Auth Metadata Plugin if auth is enabled
        if self._config.getboolean(opt.ENABLE_AUTH):
            self._auth_metadata = feast_auth.get_auth_metadata_plugin(
                self._config)

        self._configure_telemetry()

    @property
    def config(self) -> Config:
        return self._config

    @property
    def _core_service(self):
        """
        Creates or returns the gRPC Feast Core Service Stub

        Returns: CoreServiceStub
        """
        if not self._core_service_stub:
            channel = create_grpc_channel(
                url=self._config.get(opt.CORE_URL),
                enable_ssl=self._config.getboolean(opt.CORE_ENABLE_SSL),
                enable_auth=self._config.getboolean(opt.ENABLE_AUTH),
                ssl_server_cert_path=self._config.get(
                    opt.CORE_SERVER_SSL_CERT),
                auth_metadata_plugin=self._auth_metadata,
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
            )
            self._core_service_stub = CoreServiceStub(channel)
        return self._core_service_stub

    @property
    def _serving_service(self):
        """
        Creates or returns the gRPC Feast Serving Service Stub. If both `opentracing`
        and `grpcio-opentracing` are installed, an opentracing interceptor will be
        instantiated based on the global tracer.

        Returns: ServingServiceStub
        """
        if not self._serving_service_stub:
            channel = create_grpc_channel(
                url=self._config.get(opt.SERVING_URL),
                enable_ssl=self._config.getboolean(opt.SERVING_ENABLE_SSL),
                enable_auth=self._config.getboolean(opt.ENABLE_AUTH),
                ssl_server_cert_path=self._config.get(
                    opt.SERVING_SERVER_SSL_CERT),
                auth_metadata_plugin=self._auth_metadata,
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
            )
            try:
                import opentracing
                from grpc_opentracing import open_tracing_client_interceptor
                from grpc_opentracing.grpcext import intercept_channel

                interceptor = open_tracing_client_interceptor(
                    opentracing.global_tracer())
                channel = intercept_channel(channel, interceptor)
            except ImportError:
                pass
            self._serving_service_stub = ServingServiceStub(channel)
        return self._serving_service_stub

    def _extra_grpc_params(self) -> Dict[str, Any]:
        return dict(
            timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
            metadata=self._get_grpc_metadata(),
        )

    @property
    def core_url(self) -> str:
        """
        Retrieve Feast Core URL

        Returns:
            Feast Core URL string
        """
        return self._config.get(opt.CORE_URL)

    @core_url.setter
    def core_url(self, value: str):
        """
        Set the Feast Core URL

        Args:
            value: Feast Core URL
        """
        self._config.set(opt.CORE_URL, value)

    @property
    def serving_url(self) -> str:
        """
        Retrieve Feast Serving URL

        Returns:
            Feast Serving URL string
        """
        return self._config.get(opt.SERVING_URL)

    @serving_url.setter
    def serving_url(self, value: str):
        """
        Set the Feast Serving URL

        Args:
            value: Feast Serving URL
        """
        self._config.set(opt.SERVING_URL, value)

    @property
    def job_service_url(self) -> str:
        """
        Retrieve Feast Job Service URL

        Returns:
            Feast Job Service URL string
        """
        return self._config.get(opt.JOB_SERVICE_URL)

    @job_service_url.setter
    def job_service_url(self, value: str):
        """
        Set the Feast Job Service URL

        Args:
            value: Feast Job Service URL
        """
        self._config.set(opt.JOB_SERVICE_URL, value)

    @property
    def core_secure(self) -> bool:
        """
        Retrieve Feast Core client-side SSL/TLS setting

        Returns:
            Whether client-side SSL/TLS is enabled
        """
        return self._config.getboolean(opt.CORE_ENABLE_SSL)

    @core_secure.setter
    def core_secure(self, value: bool):
        """
        Set the Feast Core client-side SSL/TLS setting

        Args:
            value: True to enable client-side SSL/TLS
        """
        self._config.set(opt.CORE_ENABLE_SSL, value)

    @property
    def serving_secure(self) -> bool:
        """
        Retrieve Feast Serving client-side SSL/TLS setting

        Returns:
            Whether client-side SSL/TLS is enabled
        """
        return self._config.getboolean(opt.SERVING_ENABLE_SSL)

    @serving_secure.setter
    def serving_secure(self, value: bool):
        """
        Set the Feast Serving client-side SSL/TLS setting

        Args:
            value: True to enable client-side SSL/TLS
        """
        self._config.set(opt.SERVING_ENABLE_SSL, value)

    @property
    def job_service_secure(self) -> bool:
        """
        Retrieve Feast Job Service client-side SSL/TLS setting

        Returns:
            Whether client-side SSL/TLS is enabled
        """
        return self._config.getboolean(opt.JOB_SERVICE_ENABLE_SSL)

    @job_service_secure.setter
    def job_service_secure(self, value: bool):
        """
        Set the Feast Job Service client-side SSL/TLS setting

        Args:
            value: True to enable client-side SSL/TLS
        """
        self._config.set(opt.JOB_SERVICE_ENABLE_SSL, value)

    def version(self, sdk_only=False):
        """
        Returns version information from Feast Core and Feast Serving
        """
        import pkg_resources

        try:
            sdk_version = pkg_resources.get_distribution("feast").version
        except pkg_resources.DistributionNotFound:
            sdk_version = "local build"
        if sdk_only:
            return sdk_version

        result = {
            "sdk": {
                "version": sdk_version
            },
            "serving": "not configured",
            "core": "not configured",
        }

        if self.serving_url:
            serving_version = self._serving_service.GetFeastServingInfo(
                GetFeastServingInfoRequest(),
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                metadata=self._get_grpc_metadata(),
            ).version
            result["serving"] = {
                "url": self.serving_url,
                "version": serving_version
            }

        if self.core_url:
            core_version = self._core_service.GetFeastCoreVersion(
                GetFeastCoreVersionRequest(),
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                metadata=self._get_grpc_metadata(),
            ).version
            result["core"] = {"url": self.core_url, "version": core_version}

        return result

    def _configure_telemetry(self):
        telemetry_filepath = join(expanduser("~"), ".feast", "telemetry")
        self._telemetry_enabled = (
            self._config.get(opt.TELEMETRY, "True") == "True"
        )  # written this way to turn the env var string into a boolean
        if self._telemetry_enabled:
            self._telemetry_counter = {"get_online_features": 0}
            if os.path.exists(telemetry_filepath):
                with open(telemetry_filepath, "r") as f:
                    self._telemetry_id = f.read()
            else:
                self._telemetry_id = str(uuid.uuid4())
                print(
                    "Feast is an open source project that collects anonymized usage statistics. To opt out or learn more see https://docs.feast.dev/v/master/advanced/telemetry"
                )
                with open(telemetry_filepath, "w") as f:
                    f.write(self._telemetry_id)
        else:
            if os.path.exists(telemetry_filepath):
                os.remove(telemetry_filepath)

    @property
    def project(self) -> str:
        """
        Retrieve currently active project

        Returns:
            Project name
        """
        if not self._config.get(opt.PROJECT):
            raise ValueError("No project has been configured.")
        return self._config.get(opt.PROJECT)

    def set_project(self, project: Optional[str] = None):
        """
        Set currently active Feast project

        Args:
            project: Project to set as active. If unset, will reset to the default project.
        """
        if project is None:
            project = opt().PROJECT
        self._config.set(opt.PROJECT, project)

    def list_projects(self) -> List[str]:
        """
        List all active Feast projects

        Returns:
            List of project names

        """

        response = self._core_service.ListProjects(
            ListProjectsRequest(),
            timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
            metadata=self._get_grpc_metadata(),
        )  # type: ListProjectsResponse
        return list(response.projects)

    def create_project(self, project: str):
        """
        Creates a Feast project

        Args:
            project: Name of project
        """

        self._core_service.CreateProject(
            CreateProjectRequest(name=project),
            timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
            metadata=self._get_grpc_metadata(),
        )  # type: CreateProjectResponse

    def archive_project(self, project):
        """
        Archives a project. Project will still continue to function for
        ingestion and retrieval, but will be in a read-only state. It will
        also not be visible from the Core API for management purposes.

        Args:
            project: Name of project to archive
        """

        try:
            self._core_service_stub.ArchiveProject(
                ArchiveProjectRequest(name=project),
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                metadata=self._get_grpc_metadata(),
            )  # type: ArchiveProjectResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        # revert to the default project
        if self._project == project:
            self._project = opt().PROJECT

    def apply(
        self,
        objects: Union[List[Union[Entity, FeatureTable]], Entity,
                       FeatureTable],
        project: str = None,
    ):
        """
        Idempotently registers entities and feature tables with Feast Core. Either a single
        entity or feature table or a list can be provided.

        Args:
            objects: List of entities and/or feature tables that will be registered

        Examples:
            >>> from feast import Client
            >>> from feast.entity import Entity
            >>> from feast.value_type import ValueType
            >>>
            >>> feast_client = Client(core_url="localhost:6565")
            >>> entity = Entity(
            >>>     name="driver_entity",
            >>>     description="Driver entity for car rides",
            >>>     value_type=ValueType.STRING,
            >>>     labels={
            >>>         "key": "val"
            >>>     }
            >>> )
            >>> feast_client.apply(entity)
        """

        if self._telemetry_enabled:
            log_usage(
                "apply",
                self._telemetry_id,
                datetime.utcnow(),
                self.version(sdk_only=True),
            )
        if project is None:
            project = self.project

        if not isinstance(objects, list):
            objects = [objects]
        for obj in objects:
            if isinstance(obj, Entity):
                self._apply_entity(project, obj)  # type: ignore
            elif isinstance(obj, FeatureTable):
                self._apply_feature_table(project, obj)  # type: ignore
            else:
                raise ValueError(
                    f"Could not determine object type to apply {obj} with type {type(obj)}. Type must be Entity or FeatureTable."
                )

    def apply_entity(self,
                     entities: Union[List[Entity], Entity],
                     project: str = None):
        """
        Deprecated. Please see apply().
        """
        warnings.warn(
            "The method apply_entity() is being deprecated. Please use apply() instead. Feast 0.10 and onwards will not support apply_entity().",
            DeprecationWarning,
        )

        if project is None:
            project = self.project

        if not isinstance(entities, list):
            entities = [entities]
        for entity in entities:
            if isinstance(entity, Entity):
                self._apply_entity(project, entity)  # type: ignore
                continue
            raise ValueError(
                f"Could not determine entity type to apply {entity}")

    def _apply_entity(self, project: str, entity: Entity):
        """
        Registers a single entity with Feast

        Args:
            entity: Entity that will be registered
        """

        entity.is_valid()
        entity_proto = entity.to_spec_proto()

        # Convert the entity to a request and send to Feast Core
        try:
            apply_entity_response = self._core_service.ApplyEntity(
                ApplyEntityRequest(project=project,
                                   spec=entity_proto),  # type: ignore
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                metadata=self._get_grpc_metadata(),
            )  # type: ApplyEntityResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        # Extract the returned entity
        applied_entity = Entity.from_proto(apply_entity_response.entity)

        # Deep copy from the returned entity to the local entity
        entity._update_from_entity(applied_entity)

    def list_entities(self,
                      project: str = None,
                      labels: Dict[str, str] = dict()) -> List[Entity]:
        """
        Retrieve a list of entities from Feast Core

        Args:
            project: Filter entities based on project name
            labels: User-defined labels that these entities are associated with

        Returns:
            List of entities
        """

        if project is None:
            project = self.project

        filter = ListEntitiesRequest.Filter(project=project, labels=labels)

        # Get latest entities from Feast Core
        entity_protos = self._core_service.ListEntities(
            ListEntitiesRequest(filter=filter),
            metadata=self._get_grpc_metadata(),
        )  # type: ListEntitiesResponse

        # Extract entities and return
        entities = []
        for entity_proto in entity_protos.entities:
            entity = Entity.from_proto(entity_proto)
            entity._client = self
            entities.append(entity)
        return entities

    def get_entity(self, name: str, project: str = None) -> Entity:
        """
        Retrieves an entity.

        Args:
            project: Feast project that this entity belongs to
            name: Name of entity

        Returns:
            Returns either the specified entity, or raises an exception if
            none is found
        """

        if self._telemetry_enabled:
            log_usage(
                "get_entity",
                self._telemetry_id,
                datetime.utcnow(),
                self.version(sdk_only=True),
            )
        if project is None:
            project = self.project

        try:
            get_entity_response = self._core_service.GetEntity(
                GetEntityRequest(project=project, name=name.strip()),
                metadata=self._get_grpc_metadata(),
            )  # type: GetEntityResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())
        entity = Entity.from_proto(get_entity_response.entity)

        return entity

    def apply_feature_table(
        self,
        feature_tables: Union[List[FeatureTable], FeatureTable],
        project: str = None,
    ):
        """
        Deprecated. Please see apply().
        """
        warnings.warn(
            "The method apply_feature_table() is being deprecated. Please use apply() instead. Feast 0.10 and onwards will not support apply_feature_table().",
            DeprecationWarning,
        )

        if project is None:
            project = self.project

        if not isinstance(feature_tables, list):
            feature_tables = [feature_tables]
        for feature_table in feature_tables:
            if isinstance(feature_table, FeatureTable):
                self._apply_feature_table(project,
                                          feature_table)  # type: ignore
                continue
            raise ValueError(
                f"Could not determine feature table type to apply {feature_table}"
            )

    def _apply_feature_table(self, project: str, feature_table: FeatureTable):
        """
        Registers a single feature table with Feast

        Args:
            feature_table: Feature table that will be registered
        """

        feature_table.is_valid()
        feature_table_proto = feature_table.to_spec_proto()

        # Convert the feature table to a request and send to Feast Core
        try:
            apply_feature_table_response = self._core_service.ApplyFeatureTable(
                ApplyFeatureTableRequest(
                    project=project,
                    table_spec=feature_table_proto),  # type: ignore
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                metadata=self._get_grpc_metadata(),
            )  # type: ApplyFeatureTableResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        # Extract the returned feature table
        applied_feature_table = FeatureTable.from_proto(
            apply_feature_table_response.table)

        # Deep copy from the returned feature table to the local entity
        feature_table._update_from_feature_table(applied_feature_table)

    def list_feature_tables(
        self,
        project: str = None,
        labels: Dict[str, str] = dict()
    ) -> List[FeatureTable]:
        """
        Retrieve a list of feature tables from Feast Core

        Args:
            project: Filter feature tables based on project name

        Returns:
            List of feature tables
        """

        if project is None:
            project = self.project

        filter = ListFeatureTablesRequest.Filter(project=project,
                                                 labels=labels)

        # Get latest feature tables from Feast Core
        feature_table_protos = self._core_service.ListFeatureTables(
            ListFeatureTablesRequest(filter=filter),
            metadata=self._get_grpc_metadata(),
        )  # type: ListFeatureTablesResponse

        # Extract feature tables and return
        feature_tables = []
        for feature_table_proto in feature_table_protos.tables:
            feature_table = FeatureTable.from_proto(feature_table_proto)
            feature_table._client = self
            feature_tables.append(feature_table)
        return feature_tables

    def get_feature_table(self,
                          name: str,
                          project: str = None) -> FeatureTable:
        """
        Retrieves a feature table.

        Args:
            project: Feast project that this feature table belongs to
            name: Name of feature table

        Returns:
            Returns either the specified feature table, or raises an exception if
            none is found
        """

        if self._telemetry_enabled:
            log_usage(
                "get_feature_table",
                self._telemetry_id,
                datetime.utcnow(),
                self.version(sdk_only=True),
            )
        if project is None:
            project = self.project

        try:
            get_feature_table_response = self._core_service.GetFeatureTable(
                GetFeatureTableRequest(project=project, name=name.strip()),
                metadata=self._get_grpc_metadata(),
            )  # type: GetFeatureTableResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())
        return FeatureTable.from_proto(get_feature_table_response.table)

    def delete_feature_table(self, name: str, project: str = None) -> None:
        """
        Deletes a feature table.

        Args:
            project: Feast project that this feature table belongs to
            name: Name of feature table
        """

        if project is None:
            project = self.project

        try:
            self._core_service.DeleteFeatureTable(
                DeleteFeatureTableRequest(project=project, name=name.strip()),
                metadata=self._get_grpc_metadata(),
            )
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

    def list_features_by_ref(
            self,
            project: str = None,
            entities: List[str] = list(),
            labels: Dict[str, str] = dict(),
    ) -> Dict[FeatureRef, Feature]:
        """
        Retrieve a dictionary of feature reference to feature from Feast Core based on filters provided.

        Args:
            project: Feast project that these features belongs to
            entities: Feast entity that these features are associated with
            labels: Feast labels that these features are associated with

        Returns:
            Dictionary of <feature references: features>

        Examples:
            >>> from feast import Client
            >>>
            >>> feast_client = Client(core_url="localhost:6565")
            >>> features = feast_client.list_features(project="test_project", entities=["driver_id"], labels={"key1":"val1","key2":"val2"})
            >>> print(features)
        """

        if project is None:
            project = self.project

        filter = ListFeaturesRequest.Filter(project=project,
                                            entities=entities,
                                            labels=labels)

        feature_protos = self._core_service.ListFeatures(
            ListFeaturesRequest(filter=filter),
            metadata=self._get_grpc_metadata(),
        )  # type: ListFeaturesResponse

        # Extract features and return
        features_dict = {}
        for ref_str, feature_proto in feature_protos.features.items():
            feature_ref = FeatureRef.from_str(ref_str)
            feature = Feature.from_proto(feature_proto)
            features_dict[feature_ref] = feature

        return features_dict

    def ingest(
        self,
        feature_table: Union[str, FeatureTable],
        source: Union[pd.DataFrame, str],
        project: str = None,
        chunk_size: int = 10000,
        max_workers: int = max(CPU_COUNT - 1, 1),
        timeout: int = int(opt().BATCH_INGESTION_PRODUCTION_TIMEOUT),
    ) -> None:
        """
        Batch load feature data into a FeatureTable.

        Args:
            feature_table (typing.Union[str, feast.feature_table.FeatureTable]):
                FeatureTable object or the string name of the feature table

            source (typing.Union[pd.DataFrame, str]):
                Either a file path or Pandas Dataframe to ingest into Feast
                Files that are currently supported:
                    * parquet
                    * csv
                    * json

            project: Feast project to locate FeatureTable

            chunk_size (int):
                Amount of rows to load and ingest at a time.

            max_workers (int):
                Number of worker processes to use to encode values.

            timeout (int):
                Timeout in seconds to wait for completion.

        Examples:
            >>> from feast import Client
            >>>
            >>> client = Client(core_url="localhost:6565")
            >>> ft_df = pd.DataFrame(
            >>>         {
            >>>            "datetime": [pd.datetime.now()],
            >>>            "driver": [1001],
            >>>            "rating": [4.3],
            >>>         }
            >>>     )
            >>> client.set_project("project1")
            >>>
            >>> driver_ft = client.get_feature_table("driver")
            >>> client.ingest(driver_ft, ft_df)
        """

        if self._telemetry_enabled:
            log_usage(
                "ingest",
                self._telemetry_id,
                datetime.utcnow(),
                self.version(sdk_only=True),
            )
        if project is None:
            project = self.project
        if isinstance(feature_table, str):
            name = feature_table
        if isinstance(feature_table, FeatureTable):
            name = feature_table.name

        fetched_feature_table: Optional[FeatureTable] = self.get_feature_table(
            name, project)
        if fetched_feature_table is not None:
            feature_table = fetched_feature_table
        else:
            raise Exception(f"FeatureTable, {name} cannot be found.")

        # Check 1) Only parquet file format for FeatureTable batch source is supported
        if (feature_table.batch_source
                and issubclass(type(feature_table.batch_source), FileSource)
                and isinstance(
                    type(feature_table.batch_source.file_options.file_format),
                    ParquetFormat)):
            raise Exception(
                f"No suitable batch source found for FeatureTable, {name}."
                f"Only BATCH_FILE source with parquet format is supported for batch ingestion."
            )

        pyarrow_table, column_names = _read_table_from_source(source)
        # Check 2) Check if FeatureTable batch source field mappings can be found in provided source table
        _check_field_mappings(
            column_names,
            name,
            feature_table.batch_source.event_timestamp_column,
            feature_table.batch_source.field_mapping,
        )

        dir_path = None
        with_partitions = False
        if (issubclass(type(feature_table.batch_source), FileSource)
                and feature_table.batch_source.date_partition_column):
            with_partitions = True
            dest_path = _write_partitioned_table_from_source(
                column_names,
                pyarrow_table,
                feature_table.batch_source.date_partition_column,
                feature_table.batch_source.event_timestamp_column,
            )
        else:
            dir_path, dest_path = _write_non_partitioned_table_from_source(
                column_names,
                pyarrow_table,
                chunk_size,
                max_workers,
            )

        try:
            if issubclass(type(feature_table.batch_source), FileSource):
                file_url = feature_table.batch_source.file_options.file_url.rstrip(
                    "*")
                _upload_to_file_source(file_url, with_partitions, dest_path,
                                       self._config)
            if issubclass(type(feature_table.batch_source), BigQuerySource):
                bq_table_ref = feature_table.batch_source.bigquery_options.table_ref
                feature_table_timestamp_column = (
                    feature_table.batch_source.event_timestamp_column)

                _upload_to_bq_source(bq_table_ref,
                                     feature_table_timestamp_column, dest_path)
        finally:
            # Remove parquet file(s) that were created earlier
            print("Removing temporary file(s)...")
            if dir_path:
                shutil.rmtree(dir_path)

        print(
            "Data has been successfully ingested into FeatureTable batch source."
        )

    def _get_grpc_metadata(self):
        """
        Returns a metadata tuple to attach to gRPC requests. This is primarily
        used when authentication is enabled but SSL/TLS is disabled.

        Returns: Tuple of metadata to attach to each gRPC call
        """
        if self._config.getboolean(opt.ENABLE_AUTH) and self._auth_metadata:
            return self._auth_metadata.get_signed_meta()
        return ()

    def get_online_features(
        self,
        feature_refs: List[str],
        entity_rows: List[Dict[str, Any]],
        project: Optional[str] = None,
    ) -> OnlineResponse:
        """
        Retrieves the latest online feature data from Feast Serving.
        Args:
            feature_refs: List of feature references that will be returned for each entity.
                Each feature reference should have the following format:
                "feature_table:feature" where "feature_table" & "feature" refer to
                the feature and feature table names respectively.
                Only the feature name is required.
            entity_rows: A list of dictionaries where each key-value is an entity-name, entity-value pair.
            project: Optionally specify the the project override. If specified, uses given project for retrieval.
                Overrides the projects specified in Feature References if also are specified.
        Returns:
            GetOnlineFeaturesResponse containing the feature data in records.
            Each EntityRow provided will yield one record, which contains
            data fields with data value and field status metadata (if included).
        Examples:
            >>> from feast import Client
            >>>
            >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566")
            >>> feature_refs = ["sales:daily_transactions"]
            >>> entity_rows = [{"customer_id": 0},{"customer_id": 1}]
            >>>
            >>> online_response = feast_client.get_online_features(
            >>>     feature_refs, entity_rows, project="my_project")
            >>> online_response_dict = online_response.to_dict()
            >>> print(online_response_dict)
            {'sales:daily_transactions': [1.1,1.2], 'sales:customer_id': [0,1]}
        """

        if self._telemetry_enabled:
            if self._telemetry_counter["get_online_features"] % 1000 == 0:
                log_usage(
                    "get_online_features",
                    self._telemetry_id,
                    datetime.utcnow(),
                    self.version(sdk_only=True),
                )
            self._telemetry_counter["get_online_features"] += 1
        try:
            response = self._serving_service.GetOnlineFeaturesV2(
                GetOnlineFeaturesRequestV2(
                    features=_build_feature_references(
                        feature_ref_strs=feature_refs),
                    entity_rows=_infer_online_entity_rows(entity_rows),
                    project=project if project is not None else self.project,
                ),
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                metadata=self._get_grpc_metadata(),
            )
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        response = OnlineResponse(response)
        return response
Beispiel #15
0
class Client:
    def __init__(self, core_url=None, serving_url=None, verbose=False):
        """Create an instance of Feast client which is connected to feast
        endpoint specified in the parameter. If no url is provided, the
        client will default to the url specified in the environment variable
        FEAST_CORE_URL.

        Args:
            core_url (str, optional): feast's grpc endpoint URL
                                  (e.g.: "my.feast.com:8433")
            serving_url (str, optional): feast serving's grpc endpoint URL
                                  (e.g.: "my.feast.com:8433")
        """

        if core_url is None:
            core_url = os.getenv(FEAST_CORE_URL_ENV_KEY)
        self._core_url = core_url

        if serving_url is None:
            serving_url = os.getenv(FEAST_SERVING_URL_ENV_KEY)
        self._serving_url = serving_url

        self.__core_channel = None
        self.__serving_channel = None
        self._core_service_stub = None
        self._job_service_stub = None
        self._dataset_service_stub = None
        self._serving_service_stub = None

        self._verbose = verbose
        self._table_downloader = TableDownloader()

    @property
    def core_url(self):
        if self._core_url is None:
            self._core_url = os.getenv(FEAST_CORE_URL_ENV_KEY)
            if self._core_url is None:
                raise ValueError(
                    "Core API URL not set. Either set the " +
                    "environment variable {} or set it explicitly.".format(
                        FEAST_CORE_URL_ENV_KEY))
        return self._core_url

    @core_url.setter
    def core_url(self, value):
        self._core_url = value

    @property
    def serving_url(self):
        if self._serving_url is None:
            self._serving_url = os.getenv(FEAST_SERVING_URL_ENV_KEY)
            if self._serving_url is None:
                raise ValueError(
                    "Serving API URL not set. Either set the " +
                    "environment variable {} or set it explicitly.".format(
                        FEAST_SERVING_URL_ENV_KEY))
        return self._serving_url

    @serving_url.setter
    def serving_url(self, value):
        self._serving_url = value

    @property
    def verbose(self):
        return self._verbose

    @verbose.setter
    def verbose(self, val):
        if not isinstance(val, bool):
            raise TypeError("verbose should be a boolean value")
        self._verbose = val

    def apply(self, obj):
        """Create or update one or many feast's resource
        (feature, entity, importer, storage).

        Args:
            obj (object): one or many feast's resource
            // create_entity (bool, optional):  (default: {None})
            // create_features (bool, optional): [description] (default: {None})
        """
        if isinstance(obj, list):
            ids = []
            for resource in obj:
                ids.append(self._apply(resource))
            return ids
        else:
            return self._apply(obj)

    def run(self,
            importer,
            name_override=None,
            apply_entity=False,
            apply_features=False):
        """
        Run an import job
        Args:
            importer (feast.sdk.importer.Importer): importer instance
            name_override (str, optional): Job name override
            apply_entity (bool, optional): (default: False) create/update
                entity inside importer
            apply_features (bool, optional): (default: False) create/update
                features inside importer

        Returns:
            (str) job ID of the import job
        """
        request = JobServiceTypes.SubmitImportJobRequest(
            importSpec=importer.spec)
        if name_override is not None:
            request.name = name_override

        if apply_entity:
            self._apply_entity(importer.entity)
        if apply_features:
            for feature in importer.features:
                self._apply_feature(feature)

        if importer.require_staging:
            print("Staging file to remote path {}".format(
                importer.remote_path))
            importer.stage()
        print("Submitting job with spec:\n {}".format(
            spec_to_yaml(importer.spec)))
        self._connect_core()
        response = self._job_service_stub.SubmitJob(request)
        print("Submitted job with id: {}".format(response.jobId))
        return response.jobId

    def create_dataset(self,
                       feature_set,
                       start_date,
                       end_date,
                       limit=None,
                       name_prefix=None):
        """
        Create training dataset for a feature set. The training dataset
        will be bounded by event timestamp between start_date and end_date.
        Specify limit to limit number of row returned. The training dataset
        will reside in a bigquery table specified by destination.

        Args:
            feature_set (feast.sdk.resources.feature_set.FeatureSet):
                feature set representing the data wanted
            start_date (str): starting date of the training data in ISO 8601
                format (e.g.: "2018-12-31")
            end_date (str): end date of training data in ISO 8601 format (e.g.:
                "2018-12-31")
            limit (int, optional): (default: None) maximum number of row
                returned
            name_prefix (str, optional): (default: None) name prefix.
        :return:
            feast.resources.feature_set.DatasetInfo: DatasetInfo containing
            the information of training dataset
        """
        self._check_create_dataset_args(feature_set, start_date, end_date,
                                        limit)

        req = DatasetServiceTypes.CreateDatasetRequest(
            featureSet=feature_set.proto,
            startDate=_timestamp_from_datetime(_parse_date(start_date)),
            endDate=_timestamp_from_datetime(_parse_date(end_date)),
            limit=limit,
            namePrefix=name_prefix)
        if self.verbose:
            print("creating training dataset for features: " +
                  str(feature_set.features))
        self._connect_core()
        resp = self._dataset_service_stub.CreateDataset(req)

        if self.verbose:
            print("created dataset {}: {}".format(resp.datasetInfo.name,
                                                  resp.datasetInfo.tableUrl))
        return DatasetInfo(resp.datasetInfo.name, resp.datasetInfo.tableUrl)

    def get_serving_data(self,
                         feature_set,
                         entity_keys,
                         request_type=ServingRequestType.LAST,
                         ts_range=[],
                         limit=10):
        """Get data from the feast serving layer. You can either retrieve the
        the latest value, or a list of the latest values, up to a provided
        limit.

        If server_url is not provided, the value stored in the environment variable
        FEAST_SERVING_URL is used to connect to the serving server instead.

        Args:
            feature_set (feast.sdk.resources.feature_set.FeatureSet): feature set
                representing the data wanted
            entity_keys (:obj: `list` of :obj: `str): list of entity keys
            request_type (feast.sdk.utils.types.ServingRequestType):
                (default: feast.sdk.utils.types.ServingRequestType.LAST) type of
                request: one of [LIST, LAST]
            ts_range (:obj: `list` of str, optional): size 2 list of start 
                timestamp and end timestamp, in ISO 8601 format. Only required if
                request_type is set to LIST
            limit (int, optional): (default: 10) number of values to get. Only
                required if request_type is set to LIST

        Returns:
            pandas.DataFrame: DataFrame of results
        """

        ts_range = [
            _timestamp_from_datetime(dateutil.parser.parse(dt))
            for dt in ts_range
        ]
        request = self._build_serving_request(feature_set, entity_keys,
                                              request_type, ts_range, limit)
        self._connect_serving()
        return self._response_to_df(
            feature_set, self._serving_service_stub.QueryFeatures(request))

    def download_dataset(self,
                         dataset_info,
                         dest,
                         staging_location,
                         file_type=FileType.CSV):
        """
        Download training dataset as file
        Args:
            dataset_info (feast.sdk.resources.feature_set.DatasetInfo) :
                dataset_info to be downloaded
            dest (str): destination's file path
            staging_location (str): url to staging_location (currently
                support a folder in GCS)
            file_type (feast.sdk.resources.feature_set.FileType): (default:
                FileType.CSV) exported file format
        Returns:
            str: path to the downloaded file
        """
        return self._table_downloader.download_table_as_file(
            dataset_info.table_id, dest, staging_location, file_type)

    def download_dataset_to_df(self, dataset_info, staging_location):
        """
        Download training dataset as Pandas Dataframe
        Args:
            dataset_info (feast.sdk.resources.feature_set.DatasetInfo) :
                dataset_info to be downloaded
            staging_location: url to staging_location (currently
                support a folder in GCS)

        Returns: pandas.DataFrame: dataframe of the training dataset

        """
        return self._table_downloader.download_table_as_df(
            dataset_info.table_id, staging_location)

    def close(self):
        """
        Close underlying connection to Feast's core and serving end points.
        """
        self.__core_channel.close()
        self.__core_channel = None
        self.__serving_channel.close()
        self.__serving_channel = None

    def _connect_core(self):
        """Connect to core api"""
        if self.__core_channel is None:
            self.__core_channel = grpc.insecure_channel(self.core_url)
            self._core_service_stub = CoreServiceStub(self.__core_channel)
            self._job_service_stub = JobServiceStub(self.__core_channel)
            self._dataset_service_stub = DatasetServiceStub(
                self.__core_channel)

    def _connect_serving(self):
        """Connect to serving api"""
        if self.__serving_channel is None:
            self.__serving_channel = grpc.insecure_channel(self.serving_url)
            self._serving_service_stub = ServingAPIStub(self.__serving_channel)

    def _build_serving_request(self, feature_set, entity_keys, request_type,
                               ts_range, limit):
        """Helper function to build serving service request."""
        request = QueryFeatures.Request(entityName=feature_set.entity,
                                        entityId=entity_keys)
        features = [
            RequestDetail(featureId=feat_id, type=request_type.value)
            for feat_id in feature_set.features
        ]

        if request_type == ServingRequestType.LIST:
            ts_range = TimestampRange(start=ts_range[0], end=ts_range[1])
            request.timestampRange.CopyFrom(ts_range)
            for feature in features:
                feature.limit = limit
        request.requestDetails.extend(features)
        return request

    def _response_to_df(self, feature_set, response):
        entity_tables = []
        for entity_key in response.entities:
            feature_tables = []
            features = response.entities[entity_key].features
            for feature_name in features:
                rows = []
                v_list = features[feature_name].valueList
                v_list = getattr(v_list, v_list.WhichOneof("valueList")).val
                for idx in range(len(v_list)):
                    row = {
                        response.entityName: entity_key,
                        feature_name: v_list[idx]
                    }
                    if features[feature_name].HasField("timestampList"):
                        ts_seconds = \
                            features[feature_name].timestampList.val[idx].seconds
                        row["timestamp"] = datetime.fromtimestamp(ts_seconds)
                    rows.append(row)
                feature_tables.append(pd.DataFrame(rows))
            entity_table = feature_tables[0]
            for idx in range(1, len(feature_tables)):
                entity_table = pd.merge(left=entity_table,
                                        right=feature_tables[idx],
                                        how='outer')
            entity_tables.append(entity_table)
        if len(entity_tables) == 0:
            return pd.DataFrame(columns=[feature_set.entity, "timestamp"] +
                                feature_set.features)
        df = pd.concat(entity_tables)
        return df.reset_index(drop=True)

    def _apply(self, obj):
        """Applies a single object to feast core.

        Args:
            obj (object): one of
                [Feature, Entity, FeatureGroup, Storage, Importer]
        """
        if isinstance(obj, Feature):
            return self._apply_feature(obj)
        elif isinstance(obj, Entity):
            return self._apply_entity(obj)
        elif isinstance(obj, FeatureGroup):
            return self._apply_feature_group(obj)
        elif isinstance(obj, Storage):
            return self._apply_storage(obj)
        else:
            raise TypeError('Apply can only be passed one of the following \
            types: [Feature, Entity, FeatureGroup, Storage, Importer]')

    def _apply_feature(self, feature):
        """Apply the feature to the core API

        Args:
            feature (feast.sdk.resources.feature.Feature): feature to apply
        """
        self._connect_core()
        response = self._core_service_stub.ApplyFeature(feature.spec)
        if self.verbose:
            print("Successfully applied feature with id: {}\n---\n{}".format(
                response.featureId, feature))
        return response.featureId

    def _apply_entity(self, entity):
        """Apply the entity to the core API

        Args:
            entity (feast.sdk.resources.entity.Entity): entity to apply
        """
        self._connect_core()
        response = self._core_service_stub.ApplyEntity(entity.spec)
        if self.verbose:
            print("Successfully applied entity with name: {}\n---\n{}".format(
                response.entityName, entity))
        return response.entityName

    def _apply_feature_group(self, feature_group):
        """Apply the feature group to the core API

        Args:
            feature_group (feast.sdk.resources.feature_group.FeatureGroup):
                feature group to apply
        """
        self._connect_core()
        response = self._core_service_stub.ApplyFeatureGroup(
            feature_group.spec)
        if self.verbose:
            print("Successfully applied feature group with id: " +
                  "{}\n---\n{}".format(response.featureGroupId, feature_group))
        return response.featureGroupId

    def _apply_storage(self, storage):
        """Apply the storage to the core API

        Args:
            storage (feast.sdk.resources.storage.Storage): storage to apply
        """
        self._connect_core()
        response = self._core_service_stub.ApplyStorage(storage.spec)
        if self.verbose:
            print("Successfully applied storage with id: " +
                  "{}\n{}".format(response.storageId, storage))
        return response.storageId

    def _check_create_dataset_args(self, feature_set, start_date, end_date,
                                   limit):
        if len(feature_set.features) < 1:
            raise ValueError("feature set is empty")

        start = _parse_date(start_date)
        end = _parse_date(end_date)
        if end < start:
            raise ValueError("end_date is before start_date")

        if limit is not None and limit < 1:
            raise ValueError("limit is not a positive integer")
Beispiel #16
0
class Client:
    """
    Feast Client: Used for creating, managing, and retrieving features.
    """
    def __init__(self,
                 core_url: str = None,
                 serving_url: str = None,
                 verbose: bool = False):
        """
        The Feast Client should be initialized with at least one service url

        Args:
            core_url: Feast Core URL. Used to manage features
            serving_url: Feast Serving URL. Used to retrieve features
            verbose: Enable verbose logging
        """
        self._core_url = core_url
        self._serving_url = serving_url
        self._verbose = verbose
        self.__core_channel: grpc.Channel = None
        self.__serving_channel: grpc.Channel = None
        self._core_service_stub: CoreServiceStub = None
        self._serving_service_stub: ServingServiceStub = None

    @property
    def core_url(self) -> str:
        """
        Retrieve Feast Core URL
        """

        if self._core_url is not None:
            return self._core_url
        if os.getenv(FEAST_CORE_URL_ENV_KEY) is not None:
            return os.getenv(FEAST_CORE_URL_ENV_KEY)
        return ""

    @core_url.setter
    def core_url(self, value: str):
        """
        Set the Feast Core URL

        Returns:
            Feast Core URL string
        """
        self._core_url = value

    @property
    def serving_url(self) -> str:
        """
        Retrieve Serving Core URL
        """
        if self._serving_url is not None:
            return self._serving_url
        if os.getenv(FEAST_SERVING_URL_ENV_KEY) is not None:
            return os.getenv(FEAST_SERVING_URL_ENV_KEY)
        return ""

    @serving_url.setter
    def serving_url(self, value: str):
        """
        Set the Feast Serving URL

        Returns:
            Feast Serving URL string
        """
        self._serving_url = value

    def version(self):
        """
        Returns version information from Feast Core and Feast Serving
        """
        result = {}

        if self.serving_url:
            self._connect_serving()
            serving_version = self._serving_service_stub.GetFeastServingInfo(
                GetFeastServingInfoRequest(),
                timeout=GRPC_CONNECTION_TIMEOUT_DEFAULT).version
            result["serving"] = {
                "url": self.serving_url,
                "version": serving_version
            }

        if self.core_url:
            self._connect_core()
            core_version = self._core_service_stub.GetFeastCoreVersion(
                GetFeastCoreVersionRequest(),
                timeout=GRPC_CONNECTION_TIMEOUT_DEFAULT).version
            result["core"] = {"url": self.core_url, "version": core_version}

        return result

    def _connect_core(self, skip_if_connected: bool = True):
        """
        Connect to Core API

        Args:
            skip_if_connected: Do not attempt to connect if already connected
        """
        if skip_if_connected and self._core_service_stub:
            return

        if not self.core_url:
            raise ValueError("Please set Feast Core URL.")

        if self.__core_channel is None:
            self.__core_channel = grpc.insecure_channel(self.core_url)

        try:
            grpc.channel_ready_future(self.__core_channel).result(
                timeout=GRPC_CONNECTION_TIMEOUT_DEFAULT)
        except grpc.FutureTimeoutError:
            raise ConnectionError(
                f"Connection timed out while attempting to connect to Feast "
                f"Core gRPC server {self.core_url} ")
        else:
            self._core_service_stub = CoreServiceStub(self.__core_channel)

    def _connect_serving(self, skip_if_connected=True):
        """
        Connect to Serving API

        Args:
            skip_if_connected: Do not attempt to connect if already connected
        """

        if skip_if_connected and self._serving_service_stub:
            return

        if not self.serving_url:
            raise ValueError("Please set Feast Serving URL.")

        if self.__serving_channel is None:
            self.__serving_channel = grpc.insecure_channel(self.serving_url)

        try:
            grpc.channel_ready_future(self.__serving_channel).result(
                timeout=GRPC_CONNECTION_TIMEOUT_DEFAULT)
        except grpc.FutureTimeoutError:
            raise ConnectionError(
                f"Connection timed out while attempting to connect to Feast "
                f"Serving gRPC server {self.serving_url} ")
        else:
            self._serving_service_stub = ServingServiceStub(
                self.__serving_channel)

    def apply(self, feature_sets: Union[List[FeatureSet], FeatureSet]):
        """
        Idempotently registers feature set(s) with Feast Core. Either a single
        feature set or a list can be provided.

        Args:
            feature_sets: List of feature sets that will be registered
        """
        if not isinstance(feature_sets, list):
            feature_sets = [feature_sets]
        for feature_set in feature_sets:
            if isinstance(feature_set, FeatureSet):
                self._apply_feature_set(feature_set)
                continue
            raise ValueError(
                f"Could not determine feature set type to apply {feature_set}")

    def _apply_feature_set(self, feature_set: FeatureSet):
        """
        Registers a single feature set with Feast

        Args:
            feature_set: Feature set that will be registered
        """
        self._connect_core()
        feature_set._client = self

        feature_set.is_valid()

        # Convert the feature set to a request and send to Feast Core
        apply_fs_response = self._core_service_stub.ApplyFeatureSet(
            ApplyFeatureSetRequest(feature_set=feature_set.to_proto()),
            timeout=GRPC_CONNECTION_TIMEOUT_APPLY,
        )  # type: ApplyFeatureSetResponse

        # Extract the returned feature set
        applied_fs = FeatureSet.from_proto(apply_fs_response.feature_set)

        # If the feature set has changed, update the local copy
        if apply_fs_response.status == ApplyFeatureSetResponse.Status.CREATED:
            print(
                f'Feature set updated/created: "{applied_fs.name}:{applied_fs.version}"'
            )

        # If no change has been applied, do nothing
        if apply_fs_response.status == ApplyFeatureSetResponse.Status.NO_CHANGE:
            print(f"No change detected or applied: {feature_set.name}")

        # Deep copy from the returned feature set to the local feature set
        feature_set.update_from_feature_set(applied_fs)

    def list_feature_sets(self) -> List[FeatureSet]:
        """
        Retrieve a list of feature sets from Feast Core

        Returns:
            List of feature sets
        """
        self._connect_core()

        # Get latest feature sets from Feast Core
        feature_set_protos = self._core_service_stub.ListFeatureSets(
            ListFeatureSetsRequest())  # type: ListFeatureSetsResponse

        # Extract feature sets and return
        feature_sets = []
        for feature_set_proto in feature_set_protos.feature_sets:
            feature_set = FeatureSet.from_proto(feature_set_proto)
            feature_set._client = self
            feature_sets.append(feature_set)
        return feature_sets

    def get_feature_set(self,
                        name: str,
                        version: int = None) -> Union[FeatureSet, None]:
        """
        Retrieves a feature set. If no version is specified then the latest
        version will be returned.

        Args:
            name: Name of feature set
            version: Version of feature set

        Returns:
            Returns either the specified feature set, or raises an exception if
            none is found
        """
        self._connect_core()

        if version is None:
            version = 0
        get_feature_set_response = self._core_service_stub.GetFeatureSet(
            GetFeatureSetRequest(
                name=name.strip(),
                version=int(version)))  # type: GetFeatureSetResponse
        return FeatureSet.from_proto(get_feature_set_response.feature_set)

    def list_entities(self) -> Dict[str, Entity]:
        """
        Returns a dictionary of entities across all feature sets

        Returns:
            Dictionary of entities, indexed by name
        """
        entities_dict = OrderedDict()
        for fs in self.list_feature_sets():
            for entity in fs.entities:
                entities_dict[entity.name] = entity
        return entities_dict

    def get_batch_features(self, feature_ids: List[str],
                           entity_rows: pd.DataFrame) -> Job:
        """
        Retrieves historical features from a Feast Serving deployment.

        Args:
            feature_ids: List of feature ids that will be returned for each
                entity. Each feature id should have the following format
                "feature_set_name:version:feature_name".
            entity_rows: Pandas dataframe containing entities and a 'datetime'
                column. Each entity in a feature set must be present as a column
                in this dataframe. The datetime column must

        Returns:
            Returns a job object that can be used to monitor retrieval progress
            asynchronously, and can be used to materialize the results

        Examples:
            >>> from feast import Client
            >>> from datetime import datetime
            >>>
            >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566")
            >>> feature_ids = ["customer:1:bookings_7d"]
            >>> entity_rows = pd.DataFrame(
            >>>         {
            >>>            "datetime": [pd.datetime.now() for _ in range(3)],
            >>>            "customer": [1001, 1002, 1003],
            >>>         }
            >>>     )
            >>> feature_retrieval_job = feast_client.get_batch_features(feature_ids, entity_rows)
            >>> df = feature_retrieval_job.to_dataframe()
            >>> print(df)
        """

        self._connect_serving()

        fs_request = _build_feature_set_request(feature_ids)

        # Validate entity rows based on entities in Feast Core
        self._validate_entity_rows_for_batch_retrieval(entity_rows, fs_request)

        # Remove timezone from datetime column
        if isinstance(entity_rows["datetime"].dtype,
                      pd.core.dtypes.dtypes.DatetimeTZDtype):
            entity_rows["datetime"] = pd.DatetimeIndex(
                entity_rows["datetime"]).tz_localize(None)

        # Retrieve serving information to determine store type and
        # staging location
        serving_info = self._serving_service_stub.GetFeastServingInfo(
            GetFeastServingInfoRequest(),
            timeout=GRPC_CONNECTION_TIMEOUT_DEFAULT
        )  # type: GetFeastServingInfoResponse

        if serving_info.type != FeastServingType.FEAST_SERVING_TYPE_BATCH:
            raise Exception(
                f'You are connected to a store "{self._serving_url}" which '
                f"does not support batch retrieval ")

        # Export and upload entity row dataframe to staging location
        # provided by Feast
        staged_file = export_dataframe_to_staging_location(
            entity_rows, serving_info.job_staging_location)  # type: str

        request = GetBatchFeaturesRequest(
            feature_sets=fs_request,
            dataset_source=DatasetSource(file_source=DatasetSource.FileSource(
                file_uris=[staged_file],
                data_format=DataFormat.DATA_FORMAT_AVRO)),
        )

        # Retrieve Feast Job object to manage life cycle of retrieval
        response = self._serving_service_stub.GetBatchFeatures(request)
        return Job(response.job, self._serving_service_stub)

    def _validate_entity_rows_for_batch_retrieval(self, entity_rows,
                                                  feature_sets_request):
        """
        Validate whether an entity_row dataframe contains the correct
        information for batch retrieval

        Args:
            entity_rows: Pandas dataframe containing entities and datetime
                column. Each entity in a feature set must be present as a
                column in this dataframe.
            feature_sets_request: Feature sets that will be requested
        """

        # Ensure datetime column exists
        if "datetime" not in entity_rows.columns:
            raise ValueError(
                f'Entity rows does not contain "datetime" column in columns '
                f"{entity_rows.columns}")

        # Validate dataframe columns based on feature set entities
        for feature_set in feature_sets_request:
            fs = self.get_feature_set(name=feature_set.name,
                                      version=feature_set.version)
            if fs is None:
                raise ValueError(
                    f'Feature set "{feature_set.name}:{feature_set.version}" '
                    f"could not be found")
            for entity_type in fs.entities:
                if entity_type.name not in entity_rows.columns:
                    raise ValueError(
                        f'Dataframe does not contain entity "{entity_type.name}"'
                        f' column in columns "{entity_rows.columns}"')

    def get_online_features(
        self,
        feature_ids: List[str],
        entity_rows: List[GetOnlineFeaturesRequest.EntityRow],
    ) -> GetOnlineFeaturesResponse:
        """
        Retrieves the latest online feature data from Feast Serving

        Args:
            feature_ids: List of feature Ids in the following format
                [feature_set_name]:[version]:[feature_name]
                example:
                    ["feature_set_1:6:my_feature_1",
                    "feature_set_1:6:my_feature_2",]
            entity_rows: List of GetFeaturesRequest.EntityRow where each row
                contains entities. Timestamp should not be set for online
                retrieval. All entity types within a feature

        Returns:
            Returns a list of maps where each item in the list contains the
            latest feature values for the provided entities
        """

        self._connect_serving()

        return self._serving_service_stub.GetOnlineFeatures(
            GetOnlineFeaturesRequest(
                feature_sets=_build_feature_set_request(feature_ids),
                entity_rows=entity_rows,
            ))  # type: GetOnlineFeaturesResponse

    def ingest(
        self,
        feature_set: Union[str, FeatureSet],
        source: Union[pd.DataFrame, str],
        version: int = None,
        force_update: bool = False,
        max_workers: int = CPU_COUNT,
        disable_progress_bar: bool = False,
        chunk_size: int = 5000,
        timeout: int = None,
    ):
        """
        Loads feature data into Feast for a specific feature set.

        Args:
            feature_set: Name of feature set or a feature set object
            source: Either a file path or Pandas Dataframe to ingest into Feast
                Files that are currently supported:
                * parquet
                * csv
                * json
            version: Feature set version
            force_update: Automatically update feature set based on source data
                prior to ingesting. This will also register changes to Feast
            max_workers: Number of worker processes to use to encode values
            disable_progress_bar: Disable printing of progress statistics
            chunk_size: Maximum amount of rows to load into memory and ingest at
                a time
            timeout: Seconds to wait before ingestion times out
        """
        if isinstance(feature_set, FeatureSet):
            name = feature_set.name
            if version is None:
                version = feature_set.version
        elif isinstance(feature_set, str):
            name = feature_set
        else:
            raise Exception(f"Feature set name must be provided")

        table = _read_table_from_source(source)

        # Update the feature set based on DataFrame schema
        if force_update:
            # Use a small as reference DataFrame to infer fields
            ref_df = table.to_batches(max_chunksize=20)[0].to_pandas()

            feature_set.infer_fields_from_df(ref_df,
                                             discard_unused_fields=True,
                                             replace_existing_features=True)
            self.apply(feature_set)

        feature_set = self.get_feature_set(name, version)

        if feature_set.source.source_type == "Kafka":
            ingest_table_to_kafka(
                feature_set=feature_set,
                table=table,
                max_workers=max_workers,
                disable_pbar=disable_progress_bar,
                chunk_size=chunk_size,
                timeout=timeout,
            )
        else:
            raise Exception(f"Could not determine source type for feature set "
                            f'"{feature_set.name}" with source type '
                            f'"{feature_set.source.source_type}"')