Esempio n. 1
0
    def test_set_value(self):
        """
        Test type casting of strings to other types
        """
        fd, path = mkstemp()
        config = Config(path=path)
        config.set("my_val", 1)

        assert config.getint("my_val") == 1
Esempio n. 2
0
 def test_type_casting_of_defaults(self):
     """
     default values are casted as expected
     """
     fd, path = mkstemp()
     config = Config(path=path)
     assert isinstance(config.getboolean("enable_auth"), bool)
     assert isinstance(config.getint("DATAPROC_EXECUTOR_INSTANCES"), int)
     assert isinstance(config.getfloat("DATAPROC_EXECUTOR_INSTANCES"),
                       float)
Esempio n. 3
0
    def test_type_casting(self):
        """
        Test type casting of strings to other types
        """
        fd, path = mkstemp()
        os.environ["FEAST_INT_VAR"] = "1"
        os.environ["FEAST_FLOAT_VAR"] = "1.0"
        os.environ["FEAST_BOOLEAN_VAR"] = "True"
        config = Config(path=path)

        assert config.getint("INT_VAR") == 1
        assert config.getfloat("FLOAT_VAR") == 1.0
        assert config.getboolean("BOOLEAN_VAR") is True
Esempio n. 4
0
File: client.py Progetto: beoy/feast
class Client:
    """
    Feast Client: Used for creating, managing, and retrieving features.
    """

    def __init__(self, options: Optional[Dict[str, str]] = None, **kwargs):
        """
        The Feast Client should be initialized with at least one service url
        Please see constants.py for configuration options. Commonly used options
        or arguments include:
            core_url: Feast Core URL. Used to manage features
            serving_url: Feast Serving URL. Used to retrieve features
            project: Sets the active project. This field is optional.
            core_secure: Use client-side SSL/TLS for Core gRPC API
            serving_secure: Use client-side SSL/TLS for Serving gRPC API
            enable_auth: Enable authentication and authorization
            auth_provider: Authentication provider – "google" or "oauth"
            if auth_provider is "oauth", the following fields are mandatory –
            oauth_grant_type, oauth_client_id, oauth_client_secret, oauth_audience, oauth_token_request_url

        Args:
            options: Configuration options to initialize client with
            **kwargs: Additional keyword arguments that will be used as
                configuration options along with "options"
        """

        if options is None:
            options = dict()
        self._config = Config(options={**options, **kwargs})

        self._core_service_stub: Optional[CoreServiceStub] = None
        self._serving_service_stub: Optional[ServingServiceStub] = None
        self._auth_metadata: Optional[grpc.AuthMetadataPlugin] = None

        # Configure Auth Metadata Plugin if auth is enabled
        if self._config.getboolean(CONFIG_ENABLE_AUTH_KEY):
            self._auth_metadata = feast_auth.get_auth_metadata_plugin(self._config)

    @property
    def _core_service(self):
        """
        Creates or returns the gRPC Feast Core Service Stub

        Returns: CoreServiceStub
        """
        if not self._core_service_stub:
            channel = create_grpc_channel(
                url=self._config.get(CONFIG_CORE_URL_KEY),
                enable_ssl=self._config.getboolean(CONFIG_CORE_ENABLE_SSL_KEY),
                enable_auth=self._config.getboolean(CONFIG_ENABLE_AUTH_KEY),
                ssl_server_cert_path=self._config.get(CONFIG_CORE_SERVER_SSL_CERT_KEY),
                auth_metadata_plugin=self._auth_metadata,
                timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
            )
            self._core_service_stub = CoreServiceStub(channel)
        return self._core_service_stub

    @property
    def _serving_service(self):
        """
        Creates or returns the gRPC Feast Serving Service Stub

        Returns: ServingServiceStub
        """
        if not self._serving_service_stub:
            channel = create_grpc_channel(
                url=self._config.get(CONFIG_SERVING_URL_KEY),
                enable_ssl=self._config.getboolean(CONFIG_SERVING_ENABLE_SSL_KEY),
                enable_auth=self._config.getboolean(CONFIG_ENABLE_AUTH_KEY),
                ssl_server_cert_path=self._config.get(
                    CONFIG_SERVING_SERVER_SSL_CERT_KEY
                ),
                auth_metadata_plugin=self._auth_metadata,
                timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
            )
            self._serving_service_stub = ServingServiceStub(channel)
        return self._serving_service_stub

    @property
    def core_url(self) -> str:
        """
        Retrieve Feast Core URL

        Returns:
            Feast Core URL string
        """
        return self._config.get(CONFIG_CORE_URL_KEY)

    @core_url.setter
    def core_url(self, value: str):
        """
        Set the Feast Core URL

        Args:
            value: Feast Core URL
        """
        self._config.set(CONFIG_CORE_URL_KEY, value)

    @property
    def serving_url(self) -> str:
        """
        Retrieve Serving Core URL

        Returns:
            Feast Serving URL string
        """
        return self._config.get(CONFIG_SERVING_URL_KEY)

    @serving_url.setter
    def serving_url(self, value: str):
        """
        Set the Feast Serving URL

        Args:
            value: Feast Serving URL
        """
        self._config.set(CONFIG_SERVING_URL_KEY, value)

    @property
    def core_secure(self) -> bool:
        """
        Retrieve Feast Core client-side SSL/TLS setting

        Returns:
            Whether client-side SSL/TLS is enabled
        """
        return self._config.getboolean(CONFIG_CORE_ENABLE_SSL_KEY)

    @core_secure.setter
    def core_secure(self, value: bool):
        """
        Set the Feast Core client-side SSL/TLS setting

        Args:
            value: True to enable client-side SSL/TLS
        """
        self._config.set(CONFIG_CORE_ENABLE_SSL_KEY, value)

    @property
    def serving_secure(self) -> bool:
        """
        Retrieve Feast Serving client-side SSL/TLS setting

        Returns:
            Whether client-side SSL/TLS is enabled
        """
        return self._config.getboolean(CONFIG_SERVING_ENABLE_SSL_KEY)

    @serving_secure.setter
    def serving_secure(self, value: bool):
        """
        Set the Feast Serving client-side SSL/TLS setting

        Args:
            value: True to enable client-side SSL/TLS
        """
        self._config.set(CONFIG_SERVING_ENABLE_SSL_KEY, value)

    def version(self):
        """
        Returns version information from Feast Core and Feast Serving
        """
        import pkg_resources

        result = {
            "sdk": {"version": pkg_resources.get_distribution("feast").version},
            "serving": "not configured",
            "core": "not configured",
        }

        if self.serving_url:
            serving_version = self._serving_service.GetFeastServingInfo(
                GetFeastServingInfoRequest(),
                timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
                metadata=self._get_grpc_metadata(),
            ).version
            result["serving"] = {"url": self.serving_url, "version": serving_version}

        if self.core_url:
            core_version = self._core_service.GetFeastCoreVersion(
                GetFeastCoreVersionRequest(),
                timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
                metadata=self._get_grpc_metadata(),
            ).version
            result["core"] = {"url": self.core_url, "version": core_version}

        return result

    @property
    def project(self) -> Union[str, None]:
        """
        Retrieve currently active project

        Returns:
            Project name
        """
        if not self._config.get(CONFIG_PROJECT_KEY):
            raise ValueError("No project has been configured.")
        return self._config.get(CONFIG_PROJECT_KEY)

    def set_project(self, project: Optional[str] = None):
        """
        Set currently active Feast project

        Args:
            project: Project to set as active. If unset, will reset to the default project.
        """
        if project is None:
            project = FEAST_DEFAULT_OPTIONS[CONFIG_PROJECT_KEY]
        self._config.set(CONFIG_PROJECT_KEY, project)

    def list_projects(self) -> List[str]:
        """
        List all active Feast projects

        Returns:
            List of project names

        """

        response = self._core_service.ListProjects(
            ListProjectsRequest(),
            timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
            metadata=self._get_grpc_metadata(),
        )  # type: ListProjectsResponse
        return list(response.projects)

    def create_project(self, project: str):
        """
        Creates a Feast project

        Args:
            project: Name of project
        """

        self._core_service.CreateProject(
            CreateProjectRequest(name=project),
            timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
            metadata=self._get_grpc_metadata(),
        )  # type: CreateProjectResponse

    def archive_project(self, project):
        """
        Archives a project. Project will still continue to function for
        ingestion and retrieval, but will be in a read-only state. It will
        also not be visible from the Core API for management purposes.

        Args:
            project: Name of project to archive
        """

        try:
            self._core_service_stub.ArchiveProject(
                ArchiveProjectRequest(name=project),
                timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
                metadata=self._get_grpc_metadata(),
            )  # type: ArchiveProjectResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        # revert to the default project
        if self._project == project:
            self._project = FEAST_DEFAULT_OPTIONS[CONFIG_PROJECT_KEY]

    def apply_entity(self, entities: Union[List[Entity], Entity], project: str = None):
        """
        Idempotently registers entities with Feast Core. Either a single
        entity or a list can be provided.

        Args:
            entities: List of entities that will be registered

        Examples:
            >>> from feast import Client
            >>> from feast.entity import Entity
            >>> from feast.value_type import ValueType
            >>>
            >>> feast_client = Client(core_url="localhost:6565")
            >>> entity = Entity(
            >>>     name="driver_entity",
            >>>     description="Driver entity for car rides",
            >>>     value_type=ValueType.STRING,
            >>>     labels={
            >>>         "key": "val"
            >>>     }
            >>> )
            >>> feast_client.apply_entity(entity)
        """

        if project is None:
            project = self.project

        if not isinstance(entities, list):
            entities = [entities]
        for entity in entities:
            if isinstance(entity, Entity):
                self._apply_entity(project, entity)  # type: ignore
                continue
            raise ValueError(f"Could not determine entity type to apply {entity}")

    def _apply_entity(self, project: str, entity: Entity):
        """
        Registers a single entity with Feast

        Args:
            entity: Entity that will be registered
        """

        entity.is_valid()
        entity_proto = entity.to_spec_proto()

        # Convert the entity to a request and send to Feast Core
        try:
            apply_entity_response = self._core_service.ApplyEntity(
                ApplyEntityRequest(project=project, spec=entity_proto),  # type: ignore
                timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
                metadata=self._get_grpc_metadata(),
            )  # type: ApplyEntityResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        # Extract the returned entity
        applied_entity = Entity.from_proto(apply_entity_response.entity)

        # Deep copy from the returned entity to the local entity
        entity._update_from_entity(applied_entity)

    def list_entities(
        self, project: str = None, labels: Dict[str, str] = dict()
    ) -> List[Entity]:
        """
        Retrieve a list of entities from Feast Core

        Args:
            project: Filter entities based on project name
            labels: User-defined labels that these entities are associated with

        Returns:
            List of entities
        """

        if project is None:
            project = self.project

        filter = ListEntitiesRequest.Filter(project=project, labels=labels)

        # Get latest entities from Feast Core
        entity_protos = self._core_service.ListEntities(
            ListEntitiesRequest(filter=filter), metadata=self._get_grpc_metadata(),
        )  # type: ListEntitiesResponse

        # Extract entities and return
        entities = []
        for entity_proto in entity_protos.entities:
            entity = Entity.from_proto(entity_proto)
            entity._client = self
            entities.append(entity)
        return entities

    def get_entity(self, name: str, project: str = None) -> Entity:
        """
        Retrieves an entity.

        Args:
            project: Feast project that this entity belongs to
            name: Name of entity

        Returns:
            Returns either the specified entity, or raises an exception if
            none is found
        """

        if project is None:
            project = self.project

        try:
            get_entity_response = self._core_service.GetEntity(
                GetEntityRequest(project=project, name=name.strip()),
                metadata=self._get_grpc_metadata(),
            )  # type: GetEntityResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())
        entity = Entity.from_proto(get_entity_response.entity)

        return entity

    def apply_feature_table(
        self,
        feature_tables: Union[List[FeatureTable], FeatureTable],
        project: str = None,
    ):
        """
        Idempotently registers feature tables with Feast Core. Either a single
        feature table or a list can be provided.

        Args:
            feature_tables: List of feature tables that will be registered
        """

        if project is None:
            project = self.project

        if not isinstance(feature_tables, list):
            feature_tables = [feature_tables]
        for feature_table in feature_tables:
            if isinstance(feature_table, FeatureTable):
                self._apply_feature_table(project, feature_table)  # type: ignore
                continue
            raise ValueError(
                f"Could not determine feature table type to apply {feature_table}"
            )

    def _apply_feature_table(self, project: str, feature_table: FeatureTable):
        """
        Registers a single feature table with Feast

        Args:
            feature_table: Feature table that will be registered
        """

        feature_table.is_valid()
        feature_table_proto = feature_table.to_spec_proto()

        # Convert the feature table to a request and send to Feast Core
        try:
            apply_feature_table_response = self._core_service.ApplyFeatureTable(
                ApplyFeatureTableRequest(project=project, table_spec=feature_table_proto),  # type: ignore
                timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
                metadata=self._get_grpc_metadata(),
            )  # type: ApplyFeatureTableResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        # Extract the returned feature table
        applied_feature_table = FeatureTable.from_proto(
            apply_feature_table_response.table
        )

        # Deep copy from the returned feature table to the local entity
        feature_table._update_from_feature_table(applied_feature_table)

    def list_feature_tables(
        self, project: str = None, labels: Dict[str, str] = dict()
    ) -> List[FeatureTable]:
        """
        Retrieve a list of feature tables from Feast Core

        Args:
            project: Filter feature tables based on project name

        Returns:
            List of feature tables
        """

        if project is None:
            project = self.project

        filter = ListFeatureTablesRequest.Filter(project=project, labels=labels)

        # Get latest feature tables from Feast Core
        feature_table_protos = self._core_service.ListFeatureTables(
            ListFeatureTablesRequest(filter=filter), metadata=self._get_grpc_metadata(),
        )  # type: ListFeatureTablesResponse

        # Extract feature tables and return
        feature_tables = []
        for feature_table_proto in feature_table_protos.tables:
            feature_table = FeatureTable.from_proto(feature_table_proto)
            feature_table._client = self
            feature_tables.append(feature_table)
        return feature_tables

    def get_feature_table(self, name: str, project: str = None) -> FeatureTable:
        """
        Retrieves a feature table.

        Args:
            project: Feast project that this feature table belongs to
            name: Name of feature table

        Returns:
            Returns either the specified feature table, or raises an exception if
            none is found
        """

        if project is None:
            project = self.project

        try:
            get_feature_table_response = self._core_service.GetFeatureTable(
                GetFeatureTableRequest(project=project, name=name.strip()),
                metadata=self._get_grpc_metadata(),
            )  # type: GetFeatureTableResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())
        return FeatureTable.from_proto(get_feature_table_response.table)

    def ingest(
        self,
        feature_table: Union[str, FeatureTable],
        source: Union[pd.DataFrame, str],
        project: str = None,
        chunk_size: int = 10000,
        max_workers: int = max(CPU_COUNT - 1, 1),
        timeout: int = BATCH_INGESTION_PRODUCTION_TIMEOUT,
    ) -> None:
        """
        Batch load feature data into a FeatureTable.

        Args:
            feature_table (typing.Union[str, feast.feature_table.FeatureTable]):
                FeatureTable object or the string name of the feature table

            source (typing.Union[pd.DataFrame, str]):
                Either a file path or Pandas Dataframe to ingest into Feast
                Files that are currently supported:
                    * parquet
                    * csv
                    * json

            project: Feast project to locate FeatureTable

            chunk_size (int):
                Amount of rows to load and ingest at a time.

            max_workers (int):
                Number of worker processes to use to encode values.

            timeout (int):
                Timeout in seconds to wait for completion.

        Examples:
            >>> from feast import Client
            >>>
            >>> client = Client(core_url="localhost:6565")
            >>> ft_df = pd.DataFrame(
            >>>         {
            >>>            "datetime": [pd.datetime.now()],
            >>>            "driver": [1001],
            >>>            "rating": [4.3],
            >>>         }
            >>>     )
            >>> client.set_project("project1")
            >>>
            >>> driver_ft = client.get_feature_table("driver")
            >>> client.ingest(driver_ft, ft_df)
        """

        if project is None:
            project = self.project
        if isinstance(feature_table, FeatureTable):
            name = feature_table.name

        fetched_feature_table: Optional[FeatureTable] = self.get_feature_table(
            name, project
        )
        if fetched_feature_table is not None:
            feature_table = fetched_feature_table
        else:
            raise Exception(f"FeatureTable, {name} cannot be found.")

        # Check 1) Only parquet file format for FeatureTable batch source is supported
        if (
            feature_table.batch_source
            and issubclass(type(feature_table.batch_source), FileSource)
            and "".join(
                feature_table.batch_source.file_options.file_format.split()
            ).lower()
            != "parquet"
        ):
            raise Exception(
                f"No suitable batch source found for FeatureTable, {name}."
                f"Only BATCH_FILE source with parquet format is supported for batch ingestion."
            )

        pyarrow_table, column_names = _read_table_from_source(source)
        # Check 2) Check if FeatureTable batch source field mappings can be found in provided source table
        _check_field_mappings(
            column_names,
            name,
            feature_table.batch_source.timestamp_column,
            feature_table.batch_source.field_mapping,
        )

        dir_path = None
        with_partitions = False
        if (
            issubclass(type(feature_table.batch_source), FileSource)
            and feature_table.batch_source.date_partition_column
        ):
            with_partitions = True
            dest_path = _write_partitioned_table_from_source(
                column_names,
                pyarrow_table,
                feature_table.batch_source.date_partition_column,
                feature_table.batch_source.timestamp_column,
            )
        else:
            dir_path, dest_path = _write_non_partitioned_table_from_source(
                column_names, pyarrow_table, chunk_size, max_workers,
            )

        try:
            if issubclass(type(feature_table.batch_source), FileSource):
                file_url = feature_table.batch_source.file_options.file_url[:-1]
                _upload_to_file_source(file_url, with_partitions, dest_path)
            if issubclass(type(feature_table.batch_source), BigQuerySource):
                bq_table_ref = feature_table.batch_source.bigquery_options.table_ref
                feature_table_timestamp_column = (
                    feature_table.batch_source.timestamp_column
                )

                _upload_to_bq_source(
                    bq_table_ref, feature_table_timestamp_column, dest_path
                )
        finally:
            # Remove parquet file(s) that were created earlier
            print("Removing temporary file(s)...")
            if dir_path:
                shutil.rmtree(dir_path)

        print("Data has been successfully ingested into FeatureTable batch source.")

    def _get_grpc_metadata(self):
        """
        Returns a metadata tuple to attach to gRPC requests. This is primarily
        used when authentication is enabled but SSL/TLS is disabled.

        Returns: Tuple of metadata to attach to each gRPC call
        """
        if self._config.getboolean(CONFIG_ENABLE_AUTH_KEY) and self._auth_metadata:
            return self._auth_metadata.get_signed_meta()
        return ()
Esempio n. 5
0
class Client:
    """
    Feast Client: Used for creating, managing, and retrieving features.
    """
    def __init__(self, options: Optional[Dict[str, str]] = None, **kwargs):
        """
        The Feast Client should be initialized with at least one service url

        Args:
            core_url: Feast Core URL. Used to manage features
            serving_url: Feast Serving URL. Used to retrieve features
            project: Sets the active project. This field is optional.
            core_secure: Use client-side SSL/TLS for Core gRPC API
            serving_secure: Use client-side SSL/TLS for Serving gRPC API
            options: Configuration options to initialize client with
            **kwargs: Additional keyword arguments that will be used as
                configuration options along with "options"
        """

        if options is None:
            options = dict()
        self._config = Config(options={**options, **kwargs})

        self.__core_channel: grpc.Channel = None
        self.__serving_channel: grpc.Channel = None
        self._core_service_stub: CoreServiceStub = None
        self._serving_service_stub: ServingServiceStub = None

    @property
    def core_url(self) -> str:
        """
        Retrieve Feast Core URL

        Returns:
            Feast Core URL string
        """
        return self._config.get(CONFIG_CORE_URL_KEY)

    @core_url.setter
    def core_url(self, value: str):
        """
        Set the Feast Core URL

        Args:
            value: Feast Core URL
        """
        self._config.set(CONFIG_CORE_URL_KEY, value)

    @property
    def serving_url(self) -> str:
        """
        Retrieve Serving Core URL

        Returns:
            Feast Serving URL string
        """
        return self._config.get(CONFIG_SERVING_URL_KEY)

    @serving_url.setter
    def serving_url(self, value: str):
        """
        Set the Feast Serving URL

        Args:
            value: Feast Serving URL
        """
        self._config.set(CONFIG_SERVING_URL_KEY, value)

    @property
    def core_secure(self) -> bool:
        """
        Retrieve Feast Core client-side SSL/TLS setting

        Returns:
            Whether client-side SSL/TLS is enabled
        """
        return self._config.getboolean(CONFIG_CORE_SECURE_KEY)

    @core_secure.setter
    def core_secure(self, value: bool):
        """
        Set the Feast Core client-side SSL/TLS setting

        Args:
            value: True to enable client-side SSL/TLS
        """
        self._config.set(CONFIG_CORE_SECURE_KEY, value)

    @property
    def serving_secure(self) -> bool:
        """
        Retrieve Feast Serving client-side SSL/TLS setting

        Returns:
            Whether client-side SSL/TLS is enabled
        """
        return self._config.getboolean(CONFIG_SERVING_SECURE_KEY)

    @serving_secure.setter
    def serving_secure(self, value: bool):
        """
        Set the Feast Serving client-side SSL/TLS setting

        Args:
            value: True to enable client-side SSL/TLS
        """
        self._config.set(CONFIG_SERVING_SECURE_KEY, value)

    def version(self):
        """
        Returns version information from Feast Core and Feast Serving
        """
        result = {}

        if self.serving_url:
            self._connect_serving()
            serving_version = self._serving_service_stub.GetFeastServingInfo(
                GetFeastServingInfoRequest(),
                timeout=self._config.getint(
                    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
            ).version
            result["serving"] = {
                "url": self.serving_url,
                "version": serving_version
            }

        if self.core_url:
            self._connect_core()
            core_version = self._core_service_stub.GetFeastCoreVersion(
                GetFeastCoreVersionRequest(),
                timeout=self._config.getint(
                    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
            ).version
            result["core"] = {"url": self.core_url, "version": core_version}

        return result

    def _connect_core(self, skip_if_connected: bool = True):
        """
        Connect to Core API

        Args:
            skip_if_connected: Do not attempt to connect if already connected
        """
        if skip_if_connected and self._core_service_stub:
            return

        if not self.core_url:
            raise ValueError("Please set Feast Core URL.")

        if self.__core_channel is None:
            if self.core_secure or self.core_url.endswith(":443"):
                self.__core_channel = grpc.secure_channel(
                    self.core_url, grpc.ssl_channel_credentials())
            else:
                self.__core_channel = grpc.insecure_channel(self.core_url)

        try:
            grpc.channel_ready_future(
                self.__core_channel).result(timeout=self._config.getint(
                    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY))
        except grpc.FutureTimeoutError:
            raise ConnectionError(
                f"Connection timed out while attempting to connect to Feast "
                f"Core gRPC server {self.core_url} ")
        else:
            self._core_service_stub = CoreServiceStub(self.__core_channel)

    def _connect_serving(self, skip_if_connected=True):
        """
        Connect to Serving API

        Args:
            skip_if_connected: Do not attempt to connect if already connected
        """

        if skip_if_connected and self._serving_service_stub:
            return

        if not self.serving_url:
            raise ValueError("Please set Feast Serving URL.")

        if self.__serving_channel is None:
            if self.serving_secure or self.serving_url.endswith(":443"):
                self.__serving_channel = grpc.secure_channel(
                    self.serving_url, grpc.ssl_channel_credentials())
            else:
                self.__serving_channel = grpc.insecure_channel(
                    self.serving_url)

        try:
            grpc.channel_ready_future(
                self.__serving_channel).result(timeout=self._config.getint(
                    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY))
        except grpc.FutureTimeoutError:
            raise ConnectionError(
                f"Connection timed out while attempting to connect to Feast "
                f"Serving gRPC server {self.serving_url} ")
        else:
            self._serving_service_stub = ServingServiceStub(
                self.__serving_channel)

    @property
    def project(self) -> Union[str, None]:
        """
        Retrieve currently active project

        Returns:
            Project name
        """
        return self._config.get(CONFIG_PROJECT_KEY)

    def set_project(self, project: str):
        """
        Set currently active Feast project

        Args:
            project: Project to set as active
        """
        self._config.set(CONFIG_PROJECT_KEY, project)

    def list_projects(self) -> List[str]:
        """
        List all active Feast projects

        Returns:
            List of project names

        """
        self._connect_core()
        response = self._core_service_stub.ListProjects(
            ListProjectsRequest(),
            timeout=self._config.getint(
                CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
        )  # type: ListProjectsResponse
        return list(response.projects)

    def create_project(self, project: str):
        """
        Creates a Feast project

        Args:
            project: Name of project
        """

        self._connect_core()
        self._core_service_stub.CreateProject(
            CreateProjectRequest(name=project),
            timeout=self._config.getint(
                CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
        )  # type: CreateProjectResponse

    def archive_project(self, project):
        """
        Archives a project. Project will still continue to function for
        ingestion and retrieval, but will be in a read-only state. It will
        also not be visible from the Core API for management purposes.

        Args:
            project: Name of project to archive
        """

        self._connect_core()
        self._core_service_stub.ArchiveProject(
            ArchiveProjectRequest(name=project),
            timeout=self._config.getint(
                CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
        )  # type: ArchiveProjectResponse

        if self._project == project:
            self._project = ""

    def apply(self, feature_sets: Union[List[FeatureSet], FeatureSet]):
        """
        Idempotently registers feature set(s) with Feast Core. Either a single
        feature set or a list can be provided.

        Args:
            feature_sets: List of feature sets that will be registered
        """
        if not isinstance(feature_sets, list):
            feature_sets = [feature_sets]
        for feature_set in feature_sets:
            if isinstance(feature_set, FeatureSet):
                self._apply_feature_set(feature_set)
                continue
            raise ValueError(
                f"Could not determine feature set type to apply {feature_set}")

    def _apply_feature_set(self, feature_set: FeatureSet):
        """
        Registers a single feature set with Feast

        Args:
            feature_set: Feature set that will be registered
        """
        self._connect_core()

        feature_set.is_valid()
        feature_set_proto = feature_set.to_proto()
        if len(feature_set_proto.spec.project) == 0:
            if self.project is None:
                raise ValueError(
                    f"No project found in feature set {feature_set.name}. "
                    f"Please set the project within the feature set or within "
                    f"your Feast Client.")
            else:
                feature_set_proto.spec.project = self.project

        # Convert the feature set to a request and send to Feast Core
        try:
            apply_fs_response = self._core_service_stub.ApplyFeatureSet(
                ApplyFeatureSetRequest(feature_set=feature_set_proto),
                timeout=self._config.getint(
                    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
            )  # type: ApplyFeatureSetResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        # Extract the returned feature set
        applied_fs = FeatureSet.from_proto(apply_fs_response.feature_set)

        # If the feature set has changed, update the local copy
        if apply_fs_response.status == ApplyFeatureSetResponse.Status.CREATED:
            print(
                f'Feature set updated/created: "{applied_fs.name}:{applied_fs.version}"'
            )

        # If no change has been applied, do nothing
        if apply_fs_response.status == ApplyFeatureSetResponse.Status.NO_CHANGE:
            print(f"No change detected or applied: {feature_set.name}")

        # Deep copy from the returned feature set to the local feature set
        feature_set._update_from_feature_set(applied_fs)

    def list_feature_sets(self,
                          project: str = None,
                          name: str = None,
                          version: str = None) -> List[FeatureSet]:
        """
        Retrieve a list of feature sets from Feast Core

        Args:
            project: Filter feature sets based on project name
            name: Filter feature sets based on feature set name
            version: Filter feature sets based on version numbf,

        Returns:
            List of feature sets
        """
        self._connect_core()

        if project is None:
            if self.project is not None:
                project = self.project
            else:
                project = "*"

        if name is None:
            name = "*"

        if version is None:
            version = "*"

        filter = ListFeatureSetsRequest.Filter(project=project,
                                               feature_set_name=name,
                                               feature_set_version=version)

        # Get latest feature sets from Feast Core
        feature_set_protos = self._core_service_stub.ListFeatureSets(
            ListFeatureSetsRequest(
                filter=filter))  # type: ListFeatureSetsResponse

        # Extract feature sets and return
        feature_sets = []
        for feature_set_proto in feature_set_protos.feature_sets:
            feature_set = FeatureSet.from_proto(feature_set_proto)
            feature_set._client = self
            feature_sets.append(feature_set)
        return feature_sets

    def get_feature_set(self,
                        name: str,
                        version: int = None,
                        project: str = None) -> Union[FeatureSet, None]:
        """
        Retrieves a feature set. If no version is specified then the latest
        version will be returned.

        Args:
            project: Feast project that this feature set belongs to
            name: Name of feature set
            version: Version of feature set

        Returns:
            Returns either the specified feature set, or raises an exception if
            none is found
        """
        self._connect_core()

        if project is None:
            if self.project is not None:
                project = self.project
            else:
                raise ValueError("No project has been configured.")

        if version is None:
            version = 0

        try:
            get_feature_set_response = self._core_service_stub.GetFeatureSet(
                GetFeatureSetRequest(
                    project=project, name=name.strip(),
                    version=int(version)))  # type: GetFeatureSetResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())
        return FeatureSet.from_proto(get_feature_set_response.feature_set)

    def list_entities(self) -> Dict[str, Entity]:
        """
        Returns a dictionary of entities across all feature sets

        Returns:
            Dictionary of entities, indexed by name
        """
        entities_dict = OrderedDict()
        for fs in self.list_feature_sets():
            for entity in fs.entities:
                entities_dict[entity.name] = entity
        return entities_dict

    def get_batch_features(
        self,
        feature_refs: List[str],
        entity_rows: Union[pd.DataFrame, str],
        default_project: str = None,
    ) -> RetrievalJob:
        """
        Retrieves historical features from a Feast Serving deployment.

        Args:
            feature_refs (List[str]):
                List of feature references that will be returned for each entity.
                Each feature reference should have the following format
                "project/feature:version".

            entity_rows (Union[pd.DataFrame, str]):
                Pandas dataframe containing entities and a 'datetime' column.
                Each entity in a feature set must be present as a column in this
                dataframe. The datetime column must contain timestamps in
                datetime64 format.
            default_project: Default project where feature values will be found.

        Returns:
            feast.job.RetrievalJob:
                Returns a retrival job object that can be used to monitor retrieval
                progress asynchronously, and can be used to materialize the
                results.

        Examples:
            >>> from feast import Client
            >>> from datetime import datetime
            >>>
            >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566")
            >>> feature_refs = ["my_project/bookings_7d:1", "booking_14d"]
            >>> entity_rows = pd.DataFrame(
            >>>         {
            >>>            "datetime": [pd.datetime.now() for _ in range(3)],
            >>>            "customer": [1001, 1002, 1003],
            >>>         }
            >>>     )
            >>> feature_retrieval_job = feast_client.get_batch_features(
            >>>     feature_refs, entity_rows, default_project="my_project")
            >>> df = feature_retrieval_job.to_dataframe()
            >>> print(df)
        """

        self._connect_serving()

        feature_references = _build_feature_references(
            feature_refs=feature_refs, default_project=default_project)

        # Retrieve serving information to determine store type and
        # staging location
        serving_info = self._serving_service_stub.GetFeastServingInfo(
            GetFeastServingInfoRequest(),
            timeout=self._config.getint(
                CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
        )  # type: GetFeastServingInfoResponse

        if serving_info.type != FeastServingType.FEAST_SERVING_TYPE_BATCH:
            raise Exception(
                f'You are connected to a store "{self._serving_url}" which '
                f"does not support batch retrieval ")

        if isinstance(entity_rows, pd.DataFrame):
            # Pandas DataFrame detected

            # Remove timezone from datetime column
            if isinstance(entity_rows["datetime"].dtype,
                          pd.core.dtypes.dtypes.DatetimeTZDtype):
                entity_rows["datetime"] = pd.DatetimeIndex(
                    entity_rows["datetime"]).tz_localize(None)
        elif isinstance(entity_rows, str):
            # String based source
            if not entity_rows.endswith((".avro", "*")):
                raise Exception(
                    f"Only .avro and wildcard paths are accepted as entity_rows"
                )
        else:
            raise Exception(f"Only pandas.DataFrame and str types are allowed"
                            f" as entity_rows, but got {type(entity_rows)}.")

        # Export and upload entity row DataFrame to staging location
        # provided by Feast
        staged_files = export_source_to_staging_location(
            entity_rows, serving_info.job_staging_location)  # type: List[str]

        request = GetBatchFeaturesRequest(
            features=feature_references,
            dataset_source=DatasetSource(file_source=DatasetSource.FileSource(
                file_uris=staged_files,
                data_format=DataFormat.DATA_FORMAT_AVRO)),
        )

        # Retrieve Feast Job object to manage life cycle of retrieval
        response = self._serving_service_stub.GetBatchFeatures(request)
        return RetrievalJob(response.job, self._serving_service_stub)

    def get_online_features(
        self,
        feature_refs: List[str],
        entity_rows: List[GetOnlineFeaturesRequest.EntityRow],
        default_project: Optional[str] = None,
    ) -> GetOnlineFeaturesResponse:
        """
        Retrieves the latest online feature data from Feast Serving

        Args:
            feature_refs: List of feature references in the following format
                [project]/[feature_name]:[version]. Only the feature name
                is a required component in the reference.
                example:
                    ["my_project/my_feature_1:3",
                    "my_project3/my_feature_4:1",]
            entity_rows: List of GetFeaturesRequest.EntityRow where each row
                contains entities. Timestamp should not be set for online
                retrieval. All entity types within a feature
            default_project: This project will be used if the project name is
                not provided in the feature reference

        Returns:
            Returns a list of maps where each item in the list contains the
            latest feature values for the provided entities
        """
        self._connect_serving()

        return self._serving_service_stub.GetOnlineFeatures(
            GetOnlineFeaturesRequest(
                features=_build_feature_references(
                    feature_refs=feature_refs,
                    default_project=(default_project
                                     if not self.project else self.project),
                ),
                entity_rows=entity_rows,
            ))

    def list_ingest_jobs(
        self,
        job_id: str = None,
        feature_set_ref: FeatureSetRef = None,
        store_name: str = None,
    ):
        """
        List the ingestion jobs currently registered in Feast, with optional filters.
        Provides detailed metadata about each ingestion job.

        Args:
            job_id: Select specific ingestion job with the given job_id
            feature_set_ref: Filter ingestion jobs by target feature set (via reference)
            store_name: Filter ingestion jobs by target feast store's name

        Returns:
            List of IngestJobs matching the given filters
        """
        self._connect_core()
        # construct list request
        feature_set_ref = None
        list_filter = ListIngestionJobsRequest.Filter(
            id=job_id,
            feature_set_reference=feature_set_ref,
            store_name=store_name,
        )
        request = ListIngestionJobsRequest(filter=list_filter)
        # make list request & unpack response
        response = self._core_service_stub.ListIngestionJobs(request)
        ingest_jobs = [
            IngestJob(proto, self._core_service_stub)
            for proto in response.jobs
        ]
        return ingest_jobs

    def restart_ingest_job(self, job: IngestJob):
        """
        Restart ingestion job currently registered in Feast.
        NOTE: Data might be lost during the restart for some job runners.
        Does not support stopping a job in a transitional (ie pending, suspending, aborting),
        terminal state (ie suspended or aborted) or unknown status

        Args:
            job: IngestJob to restart
        """
        self._connect_core()
        request = RestartIngestionJobRequest(id=job.id)
        try:
            self._core_service_stub.RestartIngestionJob(request)
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

    def stop_ingest_job(self, job: IngestJob):
        """
        Stop ingestion job currently resgistered in Feast
        Does nothing if the target job if already in a terminal state (ie suspended or aborted).
        Does not support stopping a job in a transitional (ie pending, suspending, aborting)
        or in a unknown status

        Args:
            job: IngestJob to restart
        """
        self._connect_core()
        request = StopIngestionJobRequest(id=job.id)
        try:
            self._core_service_stub.StopIngestionJob(request)
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

    def ingest(
        self,
        feature_set: Union[str, FeatureSet],
        source: Union[pd.DataFrame, str],
        chunk_size: int = 10000,
        version: int = None,
        force_update: bool = False,
        max_workers: int = max(CPU_COUNT - 1, 1),
        disable_progress_bar: bool = False,
        timeout: int = KAFKA_CHUNK_PRODUCTION_TIMEOUT,
    ) -> None:
        """
        Loads feature data into Feast for a specific feature set.

        Args:
            feature_set (typing.Union[str, feast.feature_set.FeatureSet]):
                Feature set object or the string name of the feature set
                (without a version).

            source (typing.Union[pd.DataFrame, str]):
                Either a file path or Pandas Dataframe to ingest into Feast
                Files that are currently supported:
                    * parquet
                    * csv
                    * json

            chunk_size (int):
                Amount of rows to load and ingest at a time.

            version (int):
                Feature set version.

            force_update (bool):
                Automatically update feature set based on source data prior to
                ingesting. This will also register changes to Feast.

            max_workers (int):
                Number of worker processes to use to encode values.

            disable_progress_bar (bool):
                Disable printing of progress statistics.

            timeout (int):
                Timeout in seconds to wait for completion.

        Returns:
            None:
                None
        """

        if isinstance(feature_set, FeatureSet):
            name = feature_set.name
            if version is None:
                version = feature_set.version
        elif isinstance(feature_set, str):
            name = feature_set
        else:
            raise Exception(f"Feature set name must be provided")

        # Read table and get row count
        dir_path, dest_path = _read_table_from_source(source, chunk_size,
                                                      max_workers)

        pq_file = pq.ParquetFile(dest_path)

        row_count = pq_file.metadata.num_rows

        # Update the feature set based on PyArrow table of first row group
        if force_update:
            feature_set.infer_fields_from_pa(
                table=pq_file.read_row_group(0),
                discard_unused_fields=True,
                replace_existing_features=True,
            )
            self.apply(feature_set)
        current_time = time.time()

        print("Waiting for feature set to be ready for ingestion...")
        while True:
            if timeout is not None and time.time() - current_time >= timeout:
                raise TimeoutError(
                    "Timed out waiting for feature set to be ready")
            feature_set = self.get_feature_set(name, version)
            if (feature_set is not None
                    and feature_set.status == FeatureSetStatus.STATUS_READY):
                break
            time.sleep(3)

        if timeout is not None:
            timeout = timeout - int(time.time() - current_time)

        try:
            # Kafka configs
            brokers = feature_set.get_kafka_source_brokers()
            topic = feature_set.get_kafka_source_topic()
            producer = get_producer(brokers, row_count, disable_progress_bar)

            # Loop optimization declarations
            produce = producer.produce
            flush = producer.flush

            # Transform and push data to Kafka
            if feature_set.source.source_type == "Kafka":
                for chunk in get_feature_row_chunks(
                        file=dest_path,
                        row_groups=list(range(pq_file.num_row_groups)),
                        fs=feature_set,
                        max_workers=max_workers,
                ):

                    # Push FeatureRow one chunk at a time to kafka
                    for serialized_row in chunk:
                        produce(topic=topic, value=serialized_row)

                    # Force a flush after each chunk
                    flush(timeout=timeout)

                    # Remove chunk from memory
                    del chunk

            else:
                raise Exception(
                    f"Could not determine source type for feature set "
                    f'"{feature_set.name}" with source type '
                    f'"{feature_set.source.source_type}"')

            # Print ingestion statistics
            producer.print_results()
        finally:
            # Remove parquet file(s) that were created earlier
            print("Removing temporary file(s)...")
            shutil.rmtree(dir_path)

        return None
Esempio n. 6
0
class Client:
    """
    Feast Client: Used for creating, managing, and retrieving features.
    """
    def __init__(self, options: Optional[Dict[str, str]] = None, **kwargs):
        """
        The Feast Client should be initialized with at least one service url
        Please see constants.py for configuration options. Commonly used options
        or arguments include:
            core_url: Feast Core URL. Used to manage features
            serving_url: Feast Serving URL. Used to retrieve features
            project: Sets the active project. This field is optional.
            core_secure: Use client-side SSL/TLS for Core gRPC API
            serving_secure: Use client-side SSL/TLS for Serving gRPC API
            enable_auth: Enable authentication and authorization
            auth_provider: Authentication provider – "google" or "oauth"
            if auth_provider is "oauth", the following fields are mandatory –
            oauth_grant_type, oauth_client_id, oauth_client_secret, oauth_audience, oauth_token_request_url

        Args:
            options: Configuration options to initialize client with
            **kwargs: Additional keyword arguments that will be used as
                configuration options along with "options"
        """

        if options is None:
            options = dict()
        self._config = Config(options={**options, **kwargs})

        self._core_service_stub: Optional[CoreServiceStub] = None
        self._serving_service_stub: Optional[ServingServiceStub] = None
        self._job_service_stub: Optional[JobServiceStub] = None
        self._auth_metadata: Optional[grpc.AuthMetadataPlugin] = None

        # Configure Auth Metadata Plugin if auth is enabled
        if self._config.getboolean(opt.ENABLE_AUTH):
            self._auth_metadata = feast_auth.get_auth_metadata_plugin(
                self._config)

    @property
    def _core_service(self):
        """
        Creates or returns the gRPC Feast Core Service Stub

        Returns: CoreServiceStub
        """
        if not self._core_service_stub:
            channel = create_grpc_channel(
                url=self._config.get(opt.CORE_URL),
                enable_ssl=self._config.getboolean(opt.CORE_ENABLE_SSL),
                enable_auth=self._config.getboolean(opt.ENABLE_AUTH),
                ssl_server_cert_path=self._config.get(
                    opt.CORE_SERVER_SSL_CERT),
                auth_metadata_plugin=self._auth_metadata,
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
            )
            self._core_service_stub = CoreServiceStub(channel)
        return self._core_service_stub

    @property
    def _serving_service(self):
        """
        Creates or returns the gRPC Feast Serving Service Stub. If both `opentracing`
        and `grpcio-opentracing` are installed, an opentracing interceptor will be
        instantiated based on the global tracer.

        Returns: ServingServiceStub
        """
        if not self._serving_service_stub:
            channel = create_grpc_channel(
                url=self._config.get(opt.SERVING_URL),
                enable_ssl=self._config.getboolean(opt.SERVING_ENABLE_SSL),
                enable_auth=self._config.getboolean(opt.ENABLE_AUTH),
                ssl_server_cert_path=self._config.get(
                    opt.SERVING_SERVER_SSL_CERT),
                auth_metadata_plugin=self._auth_metadata,
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
            )
            try:
                import opentracing
                from grpc_opentracing import open_tracing_client_interceptor
                from grpc_opentracing.grpcext import intercept_channel

                interceptor = open_tracing_client_interceptor(
                    opentracing.global_tracer())
                channel = intercept_channel(channel, interceptor)
            except ImportError:
                pass
            self._serving_service_stub = ServingServiceStub(channel)
        return self._serving_service_stub

    @property
    def _use_job_service(self) -> bool:
        return self._config.exists(opt.JOB_SERVICE_URL)

    @property
    def _job_service(self):
        """
        Creates or returns the gRPC Feast Job Service Stub

        Returns: JobServiceStub
        """
        # Don't try to initialize job service stub if the job service is disabled
        if not self._use_job_service:
            return None

        if not self._job_service_stub:
            channel = create_grpc_channel(
                url=self._config.get(opt.JOB_SERVICE_URL),
                enable_ssl=self._config.getboolean(opt.JOB_SERVICE_ENABLE_SSL),
                enable_auth=self._config.getboolean(opt.ENABLE_AUTH),
                ssl_server_cert_path=self._config.get(
                    opt.JOB_SERVICE_SERVER_SSL_CERT),
                auth_metadata_plugin=self._auth_metadata,
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
            )
            self._job_service_service_stub = JobServiceStub(channel)
        return self._job_service_service_stub

    def _extra_grpc_params(self) -> Dict[str, Any]:
        return dict(
            timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
            metadata=self._get_grpc_metadata(),
        )

    @property
    def core_url(self) -> str:
        """
        Retrieve Feast Core URL

        Returns:
            Feast Core URL string
        """
        return self._config.get(opt.CORE_URL)

    @core_url.setter
    def core_url(self, value: str):
        """
        Set the Feast Core URL

        Args:
            value: Feast Core URL
        """
        self._config.set(opt.CORE_URL, value)

    @property
    def serving_url(self) -> str:
        """
        Retrieve Feast Serving URL

        Returns:
            Feast Serving URL string
        """
        return self._config.get(opt.SERVING_URL)

    @serving_url.setter
    def serving_url(self, value: str):
        """
        Set the Feast Serving URL

        Args:
            value: Feast Serving URL
        """
        self._config.set(opt.SERVING_URL, value)

    @property
    def job_service_url(self) -> str:
        """
        Retrieve Feast Job Service URL

        Returns:
            Feast Job Service URL string
        """
        return self._config.get(opt.JOB_SERVICE_URL)

    @job_service_url.setter
    def job_service_url(self, value: str):
        """
        Set the Feast Job Service URL

        Args:
            value: Feast Job Service URL
        """
        self._config.set(opt.JOB_SERVICE_URL, value)

    @property
    def core_secure(self) -> bool:
        """
        Retrieve Feast Core client-side SSL/TLS setting

        Returns:
            Whether client-side SSL/TLS is enabled
        """
        return self._config.getboolean(opt.CORE_ENABLE_SSL)

    @core_secure.setter
    def core_secure(self, value: bool):
        """
        Set the Feast Core client-side SSL/TLS setting

        Args:
            value: True to enable client-side SSL/TLS
        """
        self._config.set(opt.CORE_ENABLE_SSL, value)

    @property
    def serving_secure(self) -> bool:
        """
        Retrieve Feast Serving client-side SSL/TLS setting

        Returns:
            Whether client-side SSL/TLS is enabled
        """
        return self._config.getboolean(opt.SERVING_ENABLE_SSL)

    @serving_secure.setter
    def serving_secure(self, value: bool):
        """
        Set the Feast Serving client-side SSL/TLS setting

        Args:
            value: True to enable client-side SSL/TLS
        """
        self._config.set(opt.SERVING_ENABLE_SSL, value)

    @property
    def job_service_secure(self) -> bool:
        """
        Retrieve Feast Job Service client-side SSL/TLS setting

        Returns:
            Whether client-side SSL/TLS is enabled
        """
        return self._config.getboolean(opt.JOB_SERVICE_ENABLE_SSL)

    @job_service_secure.setter
    def job_service_secure(self, value: bool):
        """
        Set the Feast Job Service client-side SSL/TLS setting

        Args:
            value: True to enable client-side SSL/TLS
        """
        self._config.set(opt.JOB_SERVICE_ENABLE_SSL, value)

    def version(self):
        """
        Returns version information from Feast Core and Feast Serving
        """
        import pkg_resources

        result = {
            "sdk": {
                "version": pkg_resources.get_distribution("feast").version
            },
            "serving": "not configured",
            "core": "not configured",
        }

        if self.serving_url:
            serving_version = self._serving_service.GetFeastServingInfo(
                GetFeastServingInfoRequest(),
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                metadata=self._get_grpc_metadata(),
            ).version
            result["serving"] = {
                "url": self.serving_url,
                "version": serving_version
            }

        if self.core_url:
            core_version = self._core_service.GetFeastCoreVersion(
                GetFeastCoreVersionRequest(),
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                metadata=self._get_grpc_metadata(),
            ).version
            result["core"] = {"url": self.core_url, "version": core_version}

        return result

    @property
    def project(self) -> str:
        """
        Retrieve currently active project

        Returns:
            Project name
        """
        if not self._config.get(opt.PROJECT):
            raise ValueError("No project has been configured.")
        return self._config.get(opt.PROJECT)

    def set_project(self, project: Optional[str] = None):
        """
        Set currently active Feast project

        Args:
            project: Project to set as active. If unset, will reset to the default project.
        """
        if project is None:
            project = opt().PROJECT
        self._config.set(opt.PROJECT, project)

    def list_projects(self) -> List[str]:
        """
        List all active Feast projects

        Returns:
            List of project names

        """

        response = self._core_service.ListProjects(
            ListProjectsRequest(),
            timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
            metadata=self._get_grpc_metadata(),
        )  # type: ListProjectsResponse
        return list(response.projects)

    def create_project(self, project: str):
        """
        Creates a Feast project

        Args:
            project: Name of project
        """

        self._core_service.CreateProject(
            CreateProjectRequest(name=project),
            timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
            metadata=self._get_grpc_metadata(),
        )  # type: CreateProjectResponse

    def archive_project(self, project):
        """
        Archives a project. Project will still continue to function for
        ingestion and retrieval, but will be in a read-only state. It will
        also not be visible from the Core API for management purposes.

        Args:
            project: Name of project to archive
        """

        try:
            self._core_service_stub.ArchiveProject(
                ArchiveProjectRequest(name=project),
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                metadata=self._get_grpc_metadata(),
            )  # type: ArchiveProjectResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        # revert to the default project
        if self._project == project:
            self._project = opt().PROJECT

    def apply(
        self,
        objects: Union[List[Union[Entity, FeatureTable]], Entity,
                       FeatureTable],
        project: str = None,
    ):
        """
        Idempotently registers entities and feature tables with Feast Core. Either a single
        entity or feature table or a list can be provided.

        Args:
            objects: List of entities and/or feature tables that will be registered

        Examples:
            >>> from feast import Client
            >>> from feast.entity import Entity
            >>> from feast.value_type import ValueType
            >>>
            >>> feast_client = Client(core_url="localhost:6565")
            >>> entity = Entity(
            >>>     name="driver_entity",
            >>>     description="Driver entity for car rides",
            >>>     value_type=ValueType.STRING,
            >>>     labels={
            >>>         "key": "val"
            >>>     }
            >>> )
            >>> feast_client.apply(entity)
        """

        if project is None:
            project = self.project

        if not isinstance(objects, list):
            objects = [objects]
        for obj in objects:
            if isinstance(obj, Entity):
                self._apply_entity(project, obj)  # type: ignore
            elif isinstance(obj, FeatureTable):
                self._apply_feature_table(project, obj)  # type: ignore
            else:
                raise ValueError(
                    f"Could not determine object type to apply {obj} with type {type(obj)}. Type must be Entity or FeatureTable."
                )

    def apply_entity(self,
                     entities: Union[List[Entity], Entity],
                     project: str = None):
        """
        Deprecated. Please see apply().
        """
        warnings.warn(
            "The method apply_entity() is being deprecated. Please use apply() instead. Feast 0.10 and onwards will not support apply_entity().",
            DeprecationWarning,
        )

        if project is None:
            project = self.project

        if not isinstance(entities, list):
            entities = [entities]
        for entity in entities:
            if isinstance(entity, Entity):
                self._apply_entity(project, entity)  # type: ignore
                continue
            raise ValueError(
                f"Could not determine entity type to apply {entity}")

    def _apply_entity(self, project: str, entity: Entity):
        """
        Registers a single entity with Feast

        Args:
            entity: Entity that will be registered
        """

        entity.is_valid()
        entity_proto = entity.to_spec_proto()

        # Convert the entity to a request and send to Feast Core
        try:
            apply_entity_response = self._core_service.ApplyEntity(
                ApplyEntityRequest(project=project,
                                   spec=entity_proto),  # type: ignore
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                metadata=self._get_grpc_metadata(),
            )  # type: ApplyEntityResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        # Extract the returned entity
        applied_entity = Entity.from_proto(apply_entity_response.entity)

        # Deep copy from the returned entity to the local entity
        entity._update_from_entity(applied_entity)

    def list_entities(self,
                      project: str = None,
                      labels: Dict[str, str] = dict()) -> List[Entity]:
        """
        Retrieve a list of entities from Feast Core

        Args:
            project: Filter entities based on project name
            labels: User-defined labels that these entities are associated with

        Returns:
            List of entities
        """

        if project is None:
            project = self.project

        filter = ListEntitiesRequest.Filter(project=project, labels=labels)

        # Get latest entities from Feast Core
        entity_protos = self._core_service.ListEntities(
            ListEntitiesRequest(filter=filter),
            metadata=self._get_grpc_metadata(),
        )  # type: ListEntitiesResponse

        # Extract entities and return
        entities = []
        for entity_proto in entity_protos.entities:
            entity = Entity.from_proto(entity_proto)
            entity._client = self
            entities.append(entity)
        return entities

    def get_entity(self, name: str, project: str = None) -> Entity:
        """
        Retrieves an entity.

        Args:
            project: Feast project that this entity belongs to
            name: Name of entity

        Returns:
            Returns either the specified entity, or raises an exception if
            none is found
        """

        if project is None:
            project = self.project

        try:
            get_entity_response = self._core_service.GetEntity(
                GetEntityRequest(project=project, name=name.strip()),
                metadata=self._get_grpc_metadata(),
            )  # type: GetEntityResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())
        entity = Entity.from_proto(get_entity_response.entity)

        return entity

    def apply_feature_table(
        self,
        feature_tables: Union[List[FeatureTable], FeatureTable],
        project: str = None,
    ):
        """
        Deprecated. Please see apply().
        """
        warnings.warn(
            "The method apply_feature_table() is being deprecated. Please use apply() instead. Feast 0.10 and onwards will not support apply_feature_table().",
            DeprecationWarning,
        )

        if project is None:
            project = self.project

        if not isinstance(feature_tables, list):
            feature_tables = [feature_tables]
        for feature_table in feature_tables:
            if isinstance(feature_table, FeatureTable):
                self._apply_feature_table(project,
                                          feature_table)  # type: ignore
                continue
            raise ValueError(
                f"Could not determine feature table type to apply {feature_table}"
            )

    def _apply_feature_table(self, project: str, feature_table: FeatureTable):
        """
        Registers a single feature table with Feast

        Args:
            feature_table: Feature table that will be registered
        """

        feature_table.is_valid()
        feature_table_proto = feature_table.to_spec_proto()

        # Convert the feature table to a request and send to Feast Core
        try:
            apply_feature_table_response = self._core_service.ApplyFeatureTable(
                ApplyFeatureTableRequest(
                    project=project,
                    table_spec=feature_table_proto),  # type: ignore
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                metadata=self._get_grpc_metadata(),
            )  # type: ApplyFeatureTableResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        # Extract the returned feature table
        applied_feature_table = FeatureTable.from_proto(
            apply_feature_table_response.table)

        # Deep copy from the returned feature table to the local entity
        feature_table._update_from_feature_table(applied_feature_table)

    def list_feature_tables(
        self,
        project: str = None,
        labels: Dict[str, str] = dict()
    ) -> List[FeatureTable]:
        """
        Retrieve a list of feature tables from Feast Core

        Args:
            project: Filter feature tables based on project name

        Returns:
            List of feature tables
        """

        if project is None:
            project = self.project

        filter = ListFeatureTablesRequest.Filter(project=project,
                                                 labels=labels)

        # Get latest feature tables from Feast Core
        feature_table_protos = self._core_service.ListFeatureTables(
            ListFeatureTablesRequest(filter=filter),
            metadata=self._get_grpc_metadata(),
        )  # type: ListFeatureTablesResponse

        # Extract feature tables and return
        feature_tables = []
        for feature_table_proto in feature_table_protos.tables:
            feature_table = FeatureTable.from_proto(feature_table_proto)
            feature_table._client = self
            feature_tables.append(feature_table)
        return feature_tables

    def get_feature_table(self,
                          name: str,
                          project: str = None) -> FeatureTable:
        """
        Retrieves a feature table.

        Args:
            project: Feast project that this feature table belongs to
            name: Name of feature table

        Returns:
            Returns either the specified feature table, or raises an exception if
            none is found
        """

        if project is None:
            project = self.project

        try:
            get_feature_table_response = self._core_service.GetFeatureTable(
                GetFeatureTableRequest(project=project, name=name.strip()),
                metadata=self._get_grpc_metadata(),
            )  # type: GetFeatureTableResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())
        return FeatureTable.from_proto(get_feature_table_response.table)

    def delete_feature_table(self, name: str, project: str = None) -> None:
        """
        Deletes a feature table.

        Args:
            project: Feast project that this feature table belongs to
            name: Name of feature table
        """

        if project is None:
            project = self.project

        try:
            self._core_service.DeleteFeatureTable(
                DeleteFeatureTableRequest(project=project, name=name.strip()),
                metadata=self._get_grpc_metadata(),
            )
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

    def list_features_by_ref(
            self,
            project: str = None,
            entities: List[str] = list(),
            labels: Dict[str, str] = dict(),
    ) -> Dict[FeatureRef, Feature]:
        """
        Retrieve a dictionary of feature reference to feature from Feast Core based on filters provided.

        Args:
            project: Feast project that these features belongs to
            entities: Feast entity that these features are associated with
            labels: Feast labels that these features are associated with

        Returns:
            Dictionary of <feature references: features>

        Examples:
            >>> from feast import Client
            >>>
            >>> feast_client = Client(core_url="localhost:6565")
            >>> features = feast_client.list_features(project="test_project", entities=["driver_id"], labels={"key1":"val1","key2":"val2"})
            >>> print(features)
        """

        if project is None:
            project = self.project

        filter = ListFeaturesRequest.Filter(project=project,
                                            entities=entities,
                                            labels=labels)

        feature_protos = self._core_service.ListFeatures(
            ListFeaturesRequest(filter=filter),
            metadata=self._get_grpc_metadata(),
        )  # type: ListFeaturesResponse

        # Extract features and return
        features_dict = {}
        for ref_str, feature_proto in feature_protos.features.items():
            feature_ref = FeatureRef.from_str(ref_str)
            feature = Feature.from_proto(feature_proto)
            features_dict[feature_ref] = feature

        return features_dict

    def ingest(
        self,
        feature_table: Union[str, FeatureTable],
        source: Union[pd.DataFrame, str],
        project: str = None,
        chunk_size: int = 10000,
        max_workers: int = max(CPU_COUNT - 1, 1),
        timeout: int = int(opt().BATCH_INGESTION_PRODUCTION_TIMEOUT),
    ) -> None:
        """
        Batch load feature data into a FeatureTable.

        Args:
            feature_table (typing.Union[str, feast.feature_table.FeatureTable]):
                FeatureTable object or the string name of the feature table

            source (typing.Union[pd.DataFrame, str]):
                Either a file path or Pandas Dataframe to ingest into Feast
                Files that are currently supported:
                    * parquet
                    * csv
                    * json

            project: Feast project to locate FeatureTable

            chunk_size (int):
                Amount of rows to load and ingest at a time.

            max_workers (int):
                Number of worker processes to use to encode values.

            timeout (int):
                Timeout in seconds to wait for completion.

        Examples:
            >>> from feast import Client
            >>>
            >>> client = Client(core_url="localhost:6565")
            >>> ft_df = pd.DataFrame(
            >>>         {
            >>>            "datetime": [pd.datetime.now()],
            >>>            "driver": [1001],
            >>>            "rating": [4.3],
            >>>         }
            >>>     )
            >>> client.set_project("project1")
            >>>
            >>> driver_ft = client.get_feature_table("driver")
            >>> client.ingest(driver_ft, ft_df)
        """

        if project is None:
            project = self.project
        if isinstance(feature_table, str):
            name = feature_table
        if isinstance(feature_table, FeatureTable):
            name = feature_table.name

        fetched_feature_table: Optional[FeatureTable] = self.get_feature_table(
            name, project)
        if fetched_feature_table is not None:
            feature_table = fetched_feature_table
        else:
            raise Exception(f"FeatureTable, {name} cannot be found.")

        # Check 1) Only parquet file format for FeatureTable batch source is supported
        if (feature_table.batch_source
                and issubclass(type(feature_table.batch_source), FileSource)
                and isinstance(
                    type(feature_table.batch_source.file_options.file_format),
                    ParquetFormat)):
            raise Exception(
                f"No suitable batch source found for FeatureTable, {name}."
                f"Only BATCH_FILE source with parquet format is supported for batch ingestion."
            )

        pyarrow_table, column_names = _read_table_from_source(source)
        # Check 2) Check if FeatureTable batch source field mappings can be found in provided source table
        _check_field_mappings(
            column_names,
            name,
            feature_table.batch_source.event_timestamp_column,
            feature_table.batch_source.field_mapping,
        )

        dir_path = None
        with_partitions = False
        if (issubclass(type(feature_table.batch_source), FileSource)
                and feature_table.batch_source.date_partition_column):
            with_partitions = True
            dest_path = _write_partitioned_table_from_source(
                column_names,
                pyarrow_table,
                feature_table.batch_source.date_partition_column,
                feature_table.batch_source.event_timestamp_column,
            )
        else:
            dir_path, dest_path = _write_non_partitioned_table_from_source(
                column_names,
                pyarrow_table,
                chunk_size,
                max_workers,
            )

        try:
            if issubclass(type(feature_table.batch_source), FileSource):
                file_url = feature_table.batch_source.file_options.file_url.rstrip(
                    "*")
                _upload_to_file_source(file_url, with_partitions, dest_path,
                                       self._config)
            if issubclass(type(feature_table.batch_source), BigQuerySource):
                bq_table_ref = feature_table.batch_source.bigquery_options.table_ref
                feature_table_timestamp_column = (
                    feature_table.batch_source.event_timestamp_column)

                _upload_to_bq_source(bq_table_ref,
                                     feature_table_timestamp_column, dest_path)
        finally:
            # Remove parquet file(s) that were created earlier
            print("Removing temporary file(s)...")
            if dir_path:
                shutil.rmtree(dir_path)

        print(
            "Data has been successfully ingested into FeatureTable batch source."
        )

    def _get_grpc_metadata(self):
        """
        Returns a metadata tuple to attach to gRPC requests. This is primarily
        used when authentication is enabled but SSL/TLS is disabled.

        Returns: Tuple of metadata to attach to each gRPC call
        """
        if self._config.getboolean(opt.ENABLE_AUTH) and self._auth_metadata:
            return self._auth_metadata.get_signed_meta()
        return ()

    def get_online_features(
        self,
        feature_refs: List[str],
        entity_rows: List[Dict[str, Any]],
        project: Optional[str] = None,
    ) -> OnlineResponse:
        """
        Retrieves the latest online feature data from Feast Serving.
        Args:
            feature_refs: List of feature references that will be returned for each entity.
                Each feature reference should have the following format:
                "feature_table:feature" where "feature_table" & "feature" refer to
                the feature and feature table names respectively.
                Only the feature name is required.
            entity_rows: A list of dictionaries where each key-value is an entity-name, entity-value pair.
            project: Optionally specify the the project override. If specified, uses given project for retrieval.
                Overrides the projects specified in Feature References if also are specified.
        Returns:
            GetOnlineFeaturesResponse containing the feature data in records.
            Each EntityRow provided will yield one record, which contains
            data fields with data value and field status metadata (if included).
        Examples:
            >>> from feast import Client
            >>>
            >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566")
            >>> feature_refs = ["sales:daily_transactions"]
            >>> entity_rows = [{"customer_id": 0},{"customer_id": 1}]
            >>>
            >>> online_response = feast_client.get_online_features(
            >>>     feature_refs, entity_rows, project="my_project")
            >>> online_response_dict = online_response.to_dict()
            >>> print(online_response_dict)
            {'sales:daily_transactions': [1.1,1.2], 'sales:customer_id': [0,1]}
        """

        try:
            response = self._serving_service.GetOnlineFeaturesV2(
                GetOnlineFeaturesRequestV2(
                    features=_build_feature_references(
                        feature_ref_strs=feature_refs),
                    entity_rows=_infer_online_entity_rows(entity_rows),
                    project=project if project is not None else self.project,
                ),
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                metadata=self._get_grpc_metadata(),
            )
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        response = OnlineResponse(response)
        return response

    def get_historical_features(
        self,
        feature_refs: List[str],
        entity_source: Union[pd.DataFrame, FileSource, BigQuerySource],
        output_location: Optional[str] = None,
    ) -> RetrievalJob:
        """
        Launch a historical feature retrieval job.

        Args:
            feature_refs: List of feature references that will be returned for each entity.
                Each feature reference should have the following format:
                "feature_table:feature" where "feature_table" & "feature" refer to
                the feature and feature table names respectively.
            entity_source (Union[pd.DataFrame, FileSource, BigQuerySource]): Source for the entity rows.
                If entity_source is a Panda DataFrame, the dataframe will be staged
                to become accessible by spark workers.
                If one of feature tables' source is in BigQuery - entities will be upload to BQ.
                Otherwise to remote file storage (derived from configured staging location).
                It is also assumed that the column event_timestamp is present
                in the dataframe, and is of type datetime without timezone information.

                The user needs to make sure that the source (or staging location, if entity_source is
                a Panda DataFrame) is accessible from the Spark cluster that will be used for the
                retrieval job.
            destination_path: Specifies the path in a bucket to write the exported feature data files

        Returns:
                Returns a retrieval job object that can be used to monitor retrieval
                progress asynchronously, and can be used to materialize the
                results.

        Examples:
            >>> from feast import Client
            >>> from feast.data_format import ParquetFormat
            >>> from datetime import datetime
            >>> feast_client = Client(core_url="localhost:6565")
            >>> feature_refs = ["bookings:bookings_7d", "bookings:booking_14d"]
            >>> entity_source = FileSource("event_timestamp", ParquetFormat(), "gs://some-bucket/customer")
            >>> feature_retrieval_job = feast_client.get_historical_features(
            >>>     feature_refs, entity_source)
            >>> output_file_uri = feature_retrieval_job.get_output_file_uri()
                "gs://some-bucket/output/
        """
        feature_tables = self._get_feature_tables_from_feature_refs(
            feature_refs, self.project)

        assert all(
            ft.batch_source.created_timestamp_column
            for ft in feature_tables), (
                "All BatchSources attached to retrieved FeatureTables "
                "must have specified `created_timestamp_column` to be used in "
                "historical dataset generation.")

        if output_location is None:
            output_location = os.path.join(
                self._config.get(opt.HISTORICAL_FEATURE_OUTPUT_LOCATION),
                str(uuid.uuid4()),
            )
        output_format = self._config.get(opt.HISTORICAL_FEATURE_OUTPUT_FORMAT)
        feature_sources = [
            feature_table.batch_source for feature_table in feature_tables
        ]

        if isinstance(entity_source, pd.DataFrame):
            if any(
                    isinstance(source, BigQuerySource)
                    for source in feature_sources):
                first_bq_source = [
                    source for source in feature_sources
                    if isinstance(source, BigQuerySource)
                ][0]
                source_ref = table_reference_from_string(
                    first_bq_source.bigquery_options.table_ref)
                entity_source = stage_entities_to_bq(entity_source,
                                                     source_ref.project,
                                                     source_ref.dataset_id)
            else:
                entity_source = stage_entities_to_fs(
                    entity_source,
                    staging_location=self._config.get(
                        opt.SPARK_STAGING_LOCATION),
                    config=self._config,
                )

        if self._use_job_service:
            response = self._job_service.GetHistoricalFeatures(
                GetHistoricalFeaturesRequest(
                    feature_refs=feature_refs,
                    entity_source=entity_source.to_proto(),
                    project=self.project,
                    output_format=output_format,
                    output_location=output_location,
                ),
                **self._extra_grpc_params(),
            )
            return RemoteRetrievalJob(
                self._job_service,
                self._extra_grpc_params,
                response.id,
                output_file_uri=response.output_file_uri,
            )
        else:
            return start_historical_feature_retrieval_job(
                client=self,
                project=self.project,
                entity_source=entity_source,
                feature_tables=feature_tables,
                output_format=output_format,
                output_path=output_location,
            )

    def get_historical_features_df(
        self,
        feature_refs: List[str],
        entity_source: Union[FileSource, BigQuerySource],
    ):
        """
        Launch a historical feature retrieval job.

        Args:
            feature_refs: List of feature references that will be returned for each entity.
                Each feature reference should have the following format:
                "feature_table:feature" where "feature_table" & "feature" refer to
                the feature and feature table names respectively.
            entity_source (Union[FileSource, BigQuerySource]): Source for the entity rows.
                The user needs to make sure that the source is accessible from the Spark cluster
                that will be used for the retrieval job.

        Returns:
                Returns the historical feature retrieval result in the form of Spark dataframe.

        Examples:
            >>> from feast import Client
            >>> from feast.data_format import ParquetFormat
            >>> from datetime import datetime
            >>> from pyspark.sql import SparkSession
            >>> spark = SparkSession.builder.getOrCreate()
            >>> feast_client = Client(core_url="localhost:6565")
            >>> feature_refs = ["bookings:bookings_7d", "bookings:booking_14d"]
            >>> entity_source = FileSource("event_timestamp", ParquetFormat, "gs://some-bucket/customer")
            >>> df = feast_client.get_historical_features(
            >>>     feature_refs, entity_source)
        """
        feature_tables = self._get_feature_tables_from_feature_refs(
            feature_refs, self.project)
        return start_historical_feature_retrieval_spark_session(
            client=self,
            project=self.project,
            entity_source=entity_source,
            feature_tables=feature_tables,
        )

    def _get_feature_tables_from_feature_refs(self, feature_refs: List[str],
                                              project: Optional[str]):
        feature_refs_grouped_by_table = [
            (feature_table_name, list(grouped_feature_refs))
            for feature_table_name, grouped_feature_refs in groupby(
                feature_refs, lambda x: x.split(":")[0])
        ]

        feature_tables = []
        for feature_table_name, grouped_feature_refs in feature_refs_grouped_by_table:
            feature_table = self.get_feature_table(feature_table_name, project)
            feature_names = [f.split(":")[-1] for f in grouped_feature_refs]
            feature_table.features = [
                f for f in feature_table.features if f.name in feature_names
            ]
            feature_tables.append(feature_table)
        return feature_tables

    def start_offline_to_online_ingestion(
        self,
        feature_table: FeatureTable,
        start: datetime,
        end: datetime,
    ) -> SparkJob:
        """

        Launch Ingestion Job from Batch Source to Online Store for given featureTable

        :param feature_table: FeatureTable which will be ingested
        :param start: lower datetime boundary
        :param end: upper datetime boundary
        :return: Spark Job Proxy object
        """
        if not self._use_job_service:
            return start_offline_to_online_ingestion(
                client=self,
                project=self.project,
                feature_table=feature_table,
                start=start,
                end=end,
            )
        else:
            request = StartOfflineToOnlineIngestionJobRequest(
                project=self.project,
                table_name=feature_table.name,
            )
            request.start_date.FromDatetime(start)
            request.end_date.FromDatetime(end)
            response = self._job_service.StartOfflineToOnlineIngestionJob(
                request)
            return RemoteBatchIngestionJob(
                self._job_service,
                self._extra_grpc_params,
                response.id,
            )

    def start_stream_to_online_ingestion(
        self,
        feature_table: FeatureTable,
        extra_jars: Optional[List[str]] = None,
        project: str = None,
    ) -> SparkJob:
        if not self._use_job_service:
            return start_stream_to_online_ingestion(
                client=self,
                project=project or self.project,
                feature_table=feature_table,
                extra_jars=extra_jars or [],
            )
        else:
            request = StartStreamToOnlineIngestionJobRequest(
                project=self.project,
                table_name=feature_table.name,
            )
            response = self._job_service.StartStreamToOnlineIngestionJob(
                request)
            return RemoteStreamIngestionJob(self._job_service,
                                            self._extra_grpc_params,
                                            response.id)

    def list_jobs(self, include_terminated: bool) -> List[SparkJob]:
        if not self._use_job_service:
            return list_jobs(include_terminated, self)
        else:
            request = ListJobsRequest(include_terminated=include_terminated)
            response = self._job_service.ListJobs(request)
            return [
                get_remote_job_from_proto(self._job_service,
                                          self._extra_grpc_params, job)
                for job in response.jobs
            ]

    def get_job_by_id(self, job_id: str) -> SparkJob:
        if not self._use_job_service:
            return get_job_by_id(job_id, self)
        else:
            request = GetJobRequest(job_id=job_id)
            response = self._job_service.GetJob(request)
            return get_remote_job_from_proto(self._job_service,
                                             self._extra_grpc_params,
                                             response.job)

    def stage_dataframe(
        self,
        df: pd.DataFrame,
        event_timestamp_column: str,
    ) -> FileSource:
        return stage_dataframe(df, event_timestamp_column, self._config)
Esempio n. 7
0
class Client:
    """
    Feast Client: Used for creating, managing, and retrieving features.
    """
    def __init__(self, options: Optional[Dict[str, str]] = None, **kwargs):
        """
        The Feast Client should be initialized with at least one service url
        Please see constants.py for configuration options. Commonly used options
        or arguments include:
            core_url: Feast Core URL. Used to manage features
            serving_url: Feast Serving URL. Used to retrieve features
            project: Sets the active project. This field is optional.
            core_secure: Use client-side SSL/TLS for Core gRPC API
            serving_secure: Use client-side SSL/TLS for Serving gRPC API
            enable_auth: Enable authentication and authorization
            auth_provider: Authentication provider – "google" or "oauth"
            if auth_provider is "oauth", the following fields are mandatory –
            oauth_grant_type, oauth_client_id, oauth_client_secret, oauth_audience, oauth_token_request_url

        Args:
            options: Configuration options to initialize client with
            **kwargs: Additional keyword arguments that will be used as
                configuration options along with "options"
        """

        if options is None:
            options = dict()
        self._config = Config(options={**options, **kwargs})

        self._core_service_stub: Optional[CoreServiceStub] = None
        self._serving_service_stub: Optional[ServingServiceStub] = None
        self._auth_metadata: Optional[grpc.AuthMetadataPlugin] = None
        self._registry_impl: Optional[Registry] = None

        # Configure Auth Metadata Plugin if auth is enabled
        if self._config.getboolean(opt.ENABLE_AUTH):
            self._auth_metadata = feast_auth.get_auth_metadata_plugin(
                self._config)

        self._configure_telemetry()

    @property
    def config(self) -> Config:
        return self._config

    @property
    def _core_service(self):
        """
        Creates or returns the gRPC Feast Core Service Stub

        Returns: CoreServiceStub
        """
        if not self._core_service_stub:
            channel = create_grpc_channel(
                url=self._config.get(opt.CORE_URL),
                enable_ssl=self._config.getboolean(opt.CORE_ENABLE_SSL),
                enable_auth=self._config.getboolean(opt.ENABLE_AUTH),
                ssl_server_cert_path=self._config.get(
                    opt.CORE_SERVER_SSL_CERT),
                auth_metadata_plugin=self._auth_metadata,
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
            )
            self._core_service_stub = CoreServiceStub(channel)
        return self._core_service_stub

    @property
    def _use_object_store_registry(self) -> bool:
        return self._config.exists(opt.REGISTRY_PATH)

    @property
    def _registry(self):
        if self._registry_impl is None:
            self._registry_impl = Registry(self._config.get(opt.REGISTRY_PATH))
        return self._registry_impl

    @property
    def _serving_service(self):
        """
        Creates or returns the gRPC Feast Serving Service Stub. If both `opentracing`
        and `grpcio-opentracing` are installed, an opentracing interceptor will be
        instantiated based on the global tracer.

        Returns: ServingServiceStub
        """
        if not self._serving_service_stub:
            channel = create_grpc_channel(
                url=self._config.get(opt.SERVING_URL),
                enable_ssl=self._config.getboolean(opt.SERVING_ENABLE_SSL),
                enable_auth=self._config.getboolean(opt.ENABLE_AUTH),
                ssl_server_cert_path=self._config.get(
                    opt.SERVING_SERVER_SSL_CERT),
                auth_metadata_plugin=self._auth_metadata,
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
            )
            try:
                import opentracing
                from grpc_opentracing import open_tracing_client_interceptor
                from grpc_opentracing.grpcext import intercept_channel

                interceptor = open_tracing_client_interceptor(
                    opentracing.global_tracer())
                channel = intercept_channel(channel, interceptor)
            except ImportError:
                pass
            self._serving_service_stub = ServingServiceStub(channel)
        return self._serving_service_stub

    def _extra_grpc_params(self) -> Dict[str, Any]:
        return dict(
            timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
            metadata=self._get_grpc_metadata(),
        )

    @property
    def core_url(self) -> str:
        """
        Retrieve Feast Core URL

        Returns:
            Feast Core URL string
        """
        return self._config.get(opt.CORE_URL)

    @core_url.setter
    def core_url(self, value: str):
        """
        Set the Feast Core URL

        Args:
            value: Feast Core URL
        """
        self._config.set(opt.CORE_URL, value)

    @property
    def serving_url(self) -> str:
        """
        Retrieve Feast Serving URL

        Returns:
            Feast Serving URL string
        """
        return self._config.get(opt.SERVING_URL)

    @serving_url.setter
    def serving_url(self, value: str):
        """
        Set the Feast Serving URL

        Args:
            value: Feast Serving URL
        """
        self._config.set(opt.SERVING_URL, value)

    @property
    def job_service_url(self) -> str:
        """
        Retrieve Feast Job Service URL

        Returns:
            Feast Job Service URL string
        """
        return self._config.get(opt.JOB_SERVICE_URL)

    @job_service_url.setter
    def job_service_url(self, value: str):
        """
        Set the Feast Job Service URL

        Args:
            value: Feast Job Service URL
        """
        self._config.set(opt.JOB_SERVICE_URL, value)

    @property
    def core_secure(self) -> bool:
        """
        Retrieve Feast Core client-side SSL/TLS setting

        Returns:
            Whether client-side SSL/TLS is enabled
        """
        return self._config.getboolean(opt.CORE_ENABLE_SSL)

    @core_secure.setter
    def core_secure(self, value: bool):
        """
        Set the Feast Core client-side SSL/TLS setting

        Args:
            value: True to enable client-side SSL/TLS
        """
        self._config.set(opt.CORE_ENABLE_SSL, value)

    @property
    def serving_secure(self) -> bool:
        """
        Retrieve Feast Serving client-side SSL/TLS setting

        Returns:
            Whether client-side SSL/TLS is enabled
        """
        return self._config.getboolean(opt.SERVING_ENABLE_SSL)

    @serving_secure.setter
    def serving_secure(self, value: bool):
        """
        Set the Feast Serving client-side SSL/TLS setting

        Args:
            value: True to enable client-side SSL/TLS
        """
        self._config.set(opt.SERVING_ENABLE_SSL, value)

    @property
    def job_service_secure(self) -> bool:
        """
        Retrieve Feast Job Service client-side SSL/TLS setting

        Returns:
            Whether client-side SSL/TLS is enabled
        """
        return self._config.getboolean(opt.JOB_SERVICE_ENABLE_SSL)

    @job_service_secure.setter
    def job_service_secure(self, value: bool):
        """
        Set the Feast Job Service client-side SSL/TLS setting

        Args:
            value: True to enable client-side SSL/TLS
        """
        self._config.set(opt.JOB_SERVICE_ENABLE_SSL, value)

    def version(self, sdk_only=False):
        """
        Returns version information from Feast Core and Feast Serving
        """
        import pkg_resources

        try:
            sdk_version = pkg_resources.get_distribution("feast").version
        except pkg_resources.DistributionNotFound:
            sdk_version = "local build"
        if sdk_only:
            return sdk_version

        result = {
            "sdk": {
                "version": sdk_version
            },
            "serving": "not configured",
            "core": "not configured",
        }

        if self.serving_url:
            serving_version = self._serving_service.GetFeastServingInfo(
                GetFeastServingInfoRequest(),
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                metadata=self._get_grpc_metadata(),
            ).version
            result["serving"] = {
                "url": self.serving_url,
                "version": serving_version
            }

        if not self._use_object_store_registry and self.core_url:
            core_version = self._core_service.GetFeastCoreVersion(
                GetFeastCoreVersionRequest(),
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                metadata=self._get_grpc_metadata(),
            ).version
            result["core"] = {"url": self.core_url, "version": core_version}

        return result

    def _configure_telemetry(self):
        telemetry_filepath = join(expanduser("~"), ".feast", "telemetry")
        self._telemetry_enabled = (
            self._config.get(opt.TELEMETRY, "True") == "True"
        )  # written this way to turn the env var string into a boolean
        if self._telemetry_enabled:
            self._telemetry_counter = {"get_online_features": 0}
            if os.path.exists(telemetry_filepath):
                with open(telemetry_filepath, "r") as f:
                    self._telemetry_id = f.read()
            else:
                self._telemetry_id = str(uuid.uuid4())
                print(
                    "Feast is an open source project that collects anonymized usage statistics. To opt out or learn more see https://docs.feast.dev/v/master/advanced/telemetry"
                )
                with open(telemetry_filepath, "w") as f:
                    f.write(self._telemetry_id)
        else:
            if os.path.exists(telemetry_filepath):
                os.remove(telemetry_filepath)

    @property
    def project(self) -> str:
        """
        Retrieve currently active project

        Returns:
            Project name
        """
        if not self._config.get(opt.PROJECT):
            raise ValueError("No project has been configured.")
        return self._config.get(opt.PROJECT)

    def set_project(self, project: Optional[str] = None):
        """
        Set currently active Feast project

        Args:
            project: Project to set as active. If unset, will reset to the default project.
        """
        if project is None:
            project = opt().PROJECT
        self._config.set(opt.PROJECT, project)

    def list_projects(self) -> List[str]:
        """
        List all active Feast projects

        Returns:
            List of project names

        """

        if self._use_object_store_registry:
            raise NotImplementedError(
                "Projects are not implemented for object store registry.")
        else:
            response = self._core_service.ListProjects(
                ListProjectsRequest(),
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                metadata=self._get_grpc_metadata(),
            )  # type: ListProjectsResponse
            return list(response.projects)

    def create_project(self, project: str):
        """
        Creates a Feast project

        Args:
            project: Name of project
        """

        if self._use_object_store_registry:
            raise NotImplementedError(
                "Projects are not implemented for object store registry.")
        else:
            self._core_service.CreateProject(
                CreateProjectRequest(name=project),
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                metadata=self._get_grpc_metadata(),
            )  # type: CreateProjectResponse

    def archive_project(self, project):
        """
        Archives a project. Project will still continue to function for
        ingestion and retrieval, but will be in a read-only state. It will
        also not be visible from the Core API for management purposes.

        Args:
            project: Name of project to archive
        """

        if self._use_object_store_registry:
            raise NotImplementedError(
                "Projects are not implemented for object store registry.")
        else:
            try:
                self._core_service.ArchiveProject(
                    ArchiveProjectRequest(name=project),
                    timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                    metadata=self._get_grpc_metadata(),
                )  # type: ArchiveProjectResponse
            except grpc.RpcError as e:
                raise grpc.RpcError(e.details())

            # revert to the default project
            if self._project == project:
                self._project = opt().PROJECT

    def apply(
        self,
        objects: Union[List[Union[Entity, FeatureTable]], Entity,
                       FeatureTable],
        project: str = None,
    ):
        """
        Idempotently registers entities and feature tables with Feast Core. Either a single
        entity or feature table or a list can be provided.

        Args:
            objects: List of entities and/or feature tables that will be registered

        Examples:
            >>> from feast import Client
            >>> from feast.entity import Entity
            >>> from feast.value_type import ValueType
            >>>
            >>> feast_client = Client(core_url="localhost:6565")
            >>> entity = Entity(
            >>>     name="driver_entity",
            >>>     description="Driver entity for car rides",
            >>>     value_type=ValueType.STRING,
            >>>     labels={
            >>>         "key": "val"
            >>>     }
            >>> )
            >>> feast_client.apply(entity)
        """

        if self._telemetry_enabled:
            log_usage(
                "apply",
                self._telemetry_id,
                datetime.utcnow(),
                self.version(sdk_only=True),
            )
        if project is None:
            project = self.project

        if not isinstance(objects, list):
            objects = [objects]
        for obj in objects:
            if isinstance(obj, Entity):
                self._apply_entity(project, obj)  # type: ignore
            elif isinstance(obj, FeatureTable):
                self._apply_feature_table(project, obj)  # type: ignore
            else:
                raise ValueError(
                    f"Could not determine object type to apply {obj} with type {type(obj)}. Type must be Entity or FeatureTable."
                )

    def apply_entity(self,
                     entities: Union[List[Entity], Entity],
                     project: str = None):
        """
        Deprecated. Please see apply().
        """
        warnings.warn(
            "The method apply_entity() is being deprecated. Please use apply() instead. Feast 0.10 and onwards will not support apply_entity().",
            DeprecationWarning,
        )

        if project is None:
            project = self.project

        if not isinstance(entities, list):
            entities = [entities]
        for entity in entities:
            if isinstance(entity, Entity):
                self._apply_entity(project, entity)  # type: ignore
                continue
            raise ValueError(
                f"Could not determine entity type to apply {entity}")

    def _apply_entity(self, project: str, entity: Entity):
        """
        Registers a single entity with Feast

        Args:
            entity: Entity that will be registered
        """

        if self._use_object_store_registry:
            return self._registry.apply_entity(entity, project)
        else:
            entity.is_valid()
            entity_proto = entity.to_spec_proto()

            # Convert the entity to a request and send to Feast Core
            try:
                apply_entity_response = self._core_service.ApplyEntity(
                    ApplyEntityRequest(project=project,
                                       spec=entity_proto),  # type: ignore
                    timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                    metadata=self._get_grpc_metadata(),
                )  # type: ApplyEntityResponse
            except grpc.RpcError as e:
                raise grpc.RpcError(e.details())

            # Extract the returned entity
            applied_entity = Entity.from_proto(apply_entity_response.entity)

            # Deep copy from the returned entity to the local entity
            entity._update_from_entity(applied_entity)

    def list_entities(self,
                      project: str = None,
                      labels: Dict[str, str] = dict()) -> List[Entity]:
        """
        Retrieve a list of entities from Feast Core

        Args:
            project: Filter entities based on project name
            labels: User-defined labels that these entities are associated with

        Returns:
            List of entities
        """

        if project is None:
            project = self.project

        if self._use_object_store_registry:
            return self._registry.list_entities(project)
        else:
            filter = ListEntitiesRequest.Filter(project=project, labels=labels)

            # Get latest entities from Feast Core
            entity_protos = self._core_service.ListEntities(
                ListEntitiesRequest(filter=filter),
                metadata=self._get_grpc_metadata(),
            )  # type: ListEntitiesResponse

            # Extract entities and return
            entities = []
            for entity_proto in entity_protos.entities:
                entity = Entity.from_proto(entity_proto)
                entity._client = self
                entities.append(entity)
            return entities

    def get_entity(self, name: str, project: str = None) -> Entity:
        """
        Retrieves an entity.

        Args:
            project: Feast project that this entity belongs to
            name: Name of entity

        Returns:
            Returns either the specified entity, or raises an exception if
            none is found
        """

        if self._telemetry_enabled:
            log_usage(
                "get_entity",
                self._telemetry_id,
                datetime.utcnow(),
                self.version(sdk_only=True),
            )

        if project is None:
            project = self.project

        if self._use_object_store_registry:
            return self._registry.get_entity(name, project)
        else:
            try:
                get_entity_response = self._core_service.GetEntity(
                    GetEntityRequest(project=project, name=name.strip()),
                    metadata=self._get_grpc_metadata(),
                )  # type: GetEntityResponse
            except grpc.RpcError as e:
                raise grpc.RpcError(e.details())
            entity = Entity.from_proto(get_entity_response.entity)

            return entity

    def apply_feature_table(
        self,
        feature_tables: Union[List[FeatureTable], FeatureTable],
        project: str = None,
    ):
        """
        Deprecated. Please see apply().
        """
        warnings.warn(
            "The method apply_feature_table() is being deprecated. Please use apply() instead. Feast 0.10 and onwards will not support apply_feature_table().",
            DeprecationWarning,
        )

        if project is None:
            project = self.project

        if not isinstance(feature_tables, list):
            feature_tables = [feature_tables]
        for feature_table in feature_tables:
            if isinstance(feature_table, FeatureTable):
                self._apply_feature_table(project,
                                          feature_table)  # type: ignore
                continue
            raise ValueError(
                f"Could not determine feature table type to apply {feature_table}"
            )

    def _apply_feature_table(self, project: str, feature_table: FeatureTable):
        """
        Registers a single feature table with Feast

        Args:
            feature_table: Feature table that will be registered
        """

        if self._use_object_store_registry:
            return self._registry.apply_feature_table(feature_table, project)
        else:
            feature_table.is_valid()
            feature_table_proto = feature_table.to_spec_proto()

            # Convert the feature table to a request and send to Feast Core
            try:
                apply_feature_table_response = self._core_service.ApplyFeatureTable(
                    ApplyFeatureTableRequest(
                        project=project,
                        table_spec=feature_table_proto),  # type: ignore
                    timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                    metadata=self._get_grpc_metadata(),
                )  # type: ApplyFeatureTableResponse
            except grpc.RpcError as e:
                raise grpc.RpcError(e.details())

            # Extract the returned feature table
            applied_feature_table = FeatureTable.from_proto(
                apply_feature_table_response.table)

            # Deep copy from the returned feature table to the local entity
            feature_table._update_from_feature_table(applied_feature_table)

    def list_feature_tables(
        self,
        project: str = None,
        labels: Dict[str, str] = dict()
    ) -> List[FeatureTable]:
        """
        Retrieve a list of feature tables from Feast Core

        Args:
            project: Filter feature tables based on project name

        Returns:
            List of feature tables
        """

        if project is None:
            project = self.project

        if self._use_object_store_registry:
            return self._registry.list_feature_tables(project)
        else:
            filter = ListFeatureTablesRequest.Filter(project=project,
                                                     labels=labels)

            # Get latest feature tables from Feast Core
            feature_table_protos = self._core_service.ListFeatureTables(
                ListFeatureTablesRequest(filter=filter),
                metadata=self._get_grpc_metadata(),
            )  # type: ListFeatureTablesResponse

            # Extract feature tables and return
            feature_tables = []
            for feature_table_proto in feature_table_protos.tables:
                feature_table = FeatureTable.from_proto(feature_table_proto)
                feature_table._client = self
                feature_tables.append(feature_table)
            return feature_tables

    def get_feature_table(self,
                          name: str,
                          project: str = None) -> FeatureTable:
        """
        Retrieves a feature table.

        Args:
            project: Feast project that this feature table belongs to
            name: Name of feature table

        Returns:
            Returns either the specified feature table, or raises an exception if
            none is found
        """

        if self._telemetry_enabled:
            log_usage(
                "get_feature_table",
                self._telemetry_id,
                datetime.utcnow(),
                self.version(sdk_only=True),
            )

        if project is None:
            project = self.project

        if self._use_object_store_registry:
            return self._registry.get_feature_table(name, project)
        else:
            try:
                get_feature_table_response = self._core_service.GetFeatureTable(
                    GetFeatureTableRequest(project=project, name=name.strip()),
                    metadata=self._get_grpc_metadata(),
                )  # type: GetFeatureTableResponse
            except grpc.RpcError as e:
                raise grpc.RpcError(e.details())
            return FeatureTable.from_proto(get_feature_table_response.table)

    def delete_feature_table(self, name: str, project: str = None) -> None:
        """
        Deletes a feature table.

        Args:
            project: Feast project that this feature table belongs to
            name: Name of feature table
        """

        if project is None:
            project = self.project

        if self._use_object_store_registry:
            return self._registry.delete_feature_table(name, project)
        else:
            try:
                self._core_service.DeleteFeatureTable(
                    DeleteFeatureTableRequest(project=project,
                                              name=name.strip()),
                    metadata=self._get_grpc_metadata(),
                )
            except grpc.RpcError as e:
                raise grpc.RpcError(e.details())

    def list_features_by_ref(
            self,
            project: str = None,
            entities: List[str] = list(),
            labels: Dict[str, str] = dict(),
    ) -> Dict[FeatureRef, Feature]:
        """
        Retrieve a dictionary of feature reference to feature from Feast Core based on filters provided.

        Args:
            project: Feast project that these features belongs to
            entities: Feast entity that these features are associated with
            labels: Feast labels that these features are associated with

        Returns:
            Dictionary of <feature references: features>

        Examples:
            >>> from feast import Client
            >>>
            >>> feast_client = Client(core_url="localhost:6565")
            >>> features = feast_client.list_features(project="test_project", entities=["driver_id"], labels={"key1":"val1","key2":"val2"})
            >>> print(features)
        """

        if self._use_object_store_registry:
            raise NotImplementedError(
                "This function is not implemented for object store registry.")
        else:
            if project is None:
                project = self.project

            filter = ListFeaturesRequest.Filter(project=project,
                                                entities=entities,
                                                labels=labels)

            feature_protos = self._core_service.ListFeatures(
                ListFeaturesRequest(filter=filter),
                metadata=self._get_grpc_metadata(),
            )  # type: ListFeaturesResponse

            # Extract features and return
            features_dict = {}
            for ref_str, feature_proto in feature_protos.features.items():
                feature_ref = FeatureRef.from_str(ref_str)
                feature = Feature.from_proto(feature_proto)
                features_dict[feature_ref] = feature

            return features_dict

    def ingest(
        self,
        feature_table: Union[str, FeatureTable],
        source: Union[pd.DataFrame, str],
        project: str = None,
        chunk_size: int = 10000,
        max_workers: int = max(CPU_COUNT - 1, 1),
        timeout: int = int(opt().BATCH_INGESTION_PRODUCTION_TIMEOUT),
    ) -> None:
        """
        Batch load feature data into a FeatureTable.

        Args:
            feature_table (typing.Union[str, feast.feature_table.FeatureTable]):
                FeatureTable object or the string name of the feature table

            source (typing.Union[pd.DataFrame, str]):
                Either a file path or Pandas Dataframe to ingest into Feast
                Files that are currently supported:
                    * parquet
                    * csv
                    * json

            project: Feast project to locate FeatureTable

            chunk_size (int):
                Amount of rows to load and ingest at a time.

            max_workers (int):
                Number of worker processes to use to encode values.

            timeout (int):
                Timeout in seconds to wait for completion.

        Examples:
            >>> from feast import Client
            >>>
            >>> client = Client(core_url="localhost:6565")
            >>> ft_df = pd.DataFrame(
            >>>         {
            >>>            "datetime": [pd.datetime.now()],
            >>>            "driver": [1001],
            >>>            "rating": [4.3],
            >>>         }
            >>>     )
            >>> client.set_project("project1")
            >>>
            >>> driver_ft = client.get_feature_table("driver")
            >>> client.ingest(driver_ft, ft_df)
        """

        if self._telemetry_enabled:
            log_usage(
                "ingest",
                self._telemetry_id,
                datetime.utcnow(),
                self.version(sdk_only=True),
            )
        if project is None:
            project = self.project
        if isinstance(feature_table, str):
            name = feature_table
        if isinstance(feature_table, FeatureTable):
            name = feature_table.name

        fetched_feature_table: Optional[FeatureTable] = self.get_feature_table(
            name, project)
        if fetched_feature_table is not None:
            feature_table = fetched_feature_table
        else:
            raise Exception(f"FeatureTable, {name} cannot be found.")

        # Check 1) Only parquet file format for FeatureTable batch source is supported
        if (feature_table.batch_source
                and issubclass(type(feature_table.batch_source), FileSource)
                and isinstance(
                    type(feature_table.batch_source.file_options.file_format),
                    ParquetFormat)):
            raise Exception(
                f"No suitable batch source found for FeatureTable, {name}."
                f"Only BATCH_FILE source with parquet format is supported for batch ingestion."
            )

        pyarrow_table, column_names = _read_table_from_source(source)
        # Check 2) Check if FeatureTable batch source field mappings can be found in provided source table
        _check_field_mappings(
            column_names,
            name,
            feature_table.batch_source.event_timestamp_column,
            feature_table.batch_source.field_mapping,
        )

        dir_path = None
        with_partitions = False
        if (issubclass(type(feature_table.batch_source), FileSource)
                and feature_table.batch_source.date_partition_column):
            with_partitions = True
            dest_path = _write_partitioned_table_from_source(
                column_names,
                pyarrow_table,
                feature_table.batch_source.date_partition_column,
                feature_table.batch_source.event_timestamp_column,
            )
        else:
            dir_path, dest_path = _write_non_partitioned_table_from_source(
                column_names,
                pyarrow_table,
                chunk_size,
                max_workers,
            )

        try:
            if issubclass(type(feature_table.batch_source), FileSource):
                file_url = feature_table.batch_source.file_options.file_url.rstrip(
                    "*")
                _upload_to_file_source(file_url, with_partitions, dest_path,
                                       self._config)
            if issubclass(type(feature_table.batch_source), BigQuerySource):
                bq_table_ref = feature_table.batch_source.bigquery_options.table_ref
                feature_table_timestamp_column = (
                    feature_table.batch_source.event_timestamp_column)

                _upload_to_bq_source(bq_table_ref,
                                     feature_table_timestamp_column, dest_path)
        finally:
            # Remove parquet file(s) that were created earlier
            print("Removing temporary file(s)...")
            if dir_path:
                shutil.rmtree(dir_path)

        print(
            "Data has been successfully ingested into FeatureTable batch source."
        )

    def _get_grpc_metadata(self):
        """
        Returns a metadata tuple to attach to gRPC requests. This is primarily
        used when authentication is enabled but SSL/TLS is disabled.

        Returns: Tuple of metadata to attach to each gRPC call
        """
        if self._config.getboolean(opt.ENABLE_AUTH) and self._auth_metadata:
            return self._auth_metadata.get_signed_meta()
        return ()

    def get_online_features(
        self,
        feature_refs: List[str],
        entity_rows: List[Dict[str, Any]],
        project: Optional[str] = None,
    ) -> OnlineResponse:
        """
        Retrieves the latest online feature data from Feast Serving.
        Args:
            feature_refs: List of feature references that will be returned for each entity.
                Each feature reference should have the following format:
                "feature_table:feature" where "feature_table" & "feature" refer to
                the feature and feature table names respectively.
                Only the feature name is required.
            entity_rows: A list of dictionaries where each key-value is an entity-name, entity-value pair.
            project: Optionally specify the the project override. If specified, uses given project for retrieval.
                Overrides the projects specified in Feature References if also are specified.
        Returns:
            GetOnlineFeaturesResponse containing the feature data in records.
            Each EntityRow provided will yield one record, which contains
            data fields with data value and field status metadata (if included).
        Examples:
            >>> from feast import Client
            >>>
            >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566")
            >>> feature_refs = ["sales:daily_transactions"]
            >>> entity_rows = [{"customer_id": 0},{"customer_id": 1}]
            >>>
            >>> online_response = feast_client.get_online_features(
            >>>     feature_refs, entity_rows, project="my_project")
            >>> online_response_dict = online_response.to_dict()
            >>> print(online_response_dict)
            {'sales:daily_transactions': [1.1,1.2], 'sales:customer_id': [0,1]}
        """

        if self._telemetry_enabled:
            if self._telemetry_counter["get_online_features"] % 1000 == 0:
                log_usage(
                    "get_online_features",
                    self._telemetry_id,
                    datetime.utcnow(),
                    self.version(sdk_only=True),
                )
            self._telemetry_counter["get_online_features"] += 1
        try:
            response = self._serving_service.GetOnlineFeaturesV2(
                GetOnlineFeaturesRequestV2(
                    features=_build_feature_references(
                        feature_ref_strs=feature_refs),
                    entity_rows=_infer_online_entity_rows(entity_rows),
                    project=project if project is not None else self.project,
                ),
                timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                metadata=self._get_grpc_metadata(),
            )
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        response = OnlineResponse(response)
        return response
Esempio n. 8
0
class Client:
    """
    Feast Client: Used for creating, managing, and retrieving features.
    """
    def __init__(self, options: Optional[Dict[str, str]] = None, **kwargs):
        """
        The Feast Client should be initialized with at least one service url
        Please see constants.py for configuration options. Commonly used options
        or arguments include:
            core_url: Feast Core URL. Used to manage features
            serving_url: Feast Serving URL. Used to retrieve features
            project: Sets the active project. This field is optional.
            core_secure: Use client-side SSL/TLS for Core gRPC API
            serving_secure: Use client-side SSL/TLS for Serving gRPC API
            enable_auth: Enable authentication and authorization
            auth_provider: Authentication provider – "google" or "oauth"
            if auth_provider is "oauth", the following fields are mandatory –
            oauth_grant_type, oauth_client_id, oauth_client_secret, oauth_audience, oauth_token_request_url

        Args:
            options: Configuration options to initialize client with
            **kwargs: Additional keyword arguments that will be used as
                configuration options along with "options"
        """

        if options is None:
            options = dict()
        self._config = Config(options={**options, **kwargs})

        self._core_service_stub: Optional[CoreServiceStub] = None
        self._serving_service_stub: Optional[ServingServiceStub] = None
        self._auth_metadata: Optional[grpc.AuthMetadataPlugin] = None

        # Configure Auth Metadata Plugin if auth is enabled
        if self._config.getboolean(CONFIG_ENABLE_AUTH_KEY):
            self._auth_metadata = feast_auth.get_auth_metadata_plugin(
                self._config)

    @property
    def _core_service(self):
        """
        Creates or returns the gRPC Feast Core Service Stub

        Returns: CoreServiceStub
        """
        if not self._core_service_stub:
            channel = create_grpc_channel(
                url=self._config.get(CONFIG_CORE_URL_KEY),
                enable_ssl=self._config.getboolean(CONFIG_CORE_ENABLE_SSL_KEY),
                enable_auth=self._config.getboolean(CONFIG_ENABLE_AUTH_KEY),
                ssl_server_cert_path=self._config.get(
                    CONFIG_CORE_SERVER_SSL_CERT_KEY),
                auth_metadata_plugin=self._auth_metadata,
                timeout=self._config.getint(
                    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
            )
            self._core_service_stub = CoreServiceStub(channel)
        return self._core_service_stub

    @property
    def _serving_service(self):
        """
        Creates or returns the gRPC Feast Serving Service Stub

        Returns: ServingServiceStub
        """
        if not self._serving_service_stub:
            channel = create_grpc_channel(
                url=self._config.get(CONFIG_SERVING_URL_KEY),
                enable_ssl=self._config.getboolean(
                    CONFIG_SERVING_ENABLE_SSL_KEY),
                enable_auth=self._config.getboolean(CONFIG_ENABLE_AUTH_KEY),
                ssl_server_cert_path=self._config.get(
                    CONFIG_SERVING_SERVER_SSL_CERT_KEY),
                auth_metadata_plugin=self._auth_metadata,
                timeout=self._config.getint(
                    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
            )
            self._serving_service_stub = ServingServiceStub(channel)
        return self._serving_service_stub

    @property
    def core_url(self) -> str:
        """
        Retrieve Feast Core URL

        Returns:
            Feast Core URL string
        """
        return self._config.get(CONFIG_CORE_URL_KEY)

    @core_url.setter
    def core_url(self, value: str):
        """
        Set the Feast Core URL

        Args:
            value: Feast Core URL
        """
        self._config.set(CONFIG_CORE_URL_KEY, value)

    @property
    def serving_url(self) -> str:
        """
        Retrieve Serving Core URL

        Returns:
            Feast Serving URL string
        """
        return self._config.get(CONFIG_SERVING_URL_KEY)

    @serving_url.setter
    def serving_url(self, value: str):
        """
        Set the Feast Serving URL

        Args:
            value: Feast Serving URL
        """
        self._config.set(CONFIG_SERVING_URL_KEY, value)

    @property
    def core_secure(self) -> bool:
        """
        Retrieve Feast Core client-side SSL/TLS setting

        Returns:
            Whether client-side SSL/TLS is enabled
        """
        return self._config.getboolean(CONFIG_CORE_ENABLE_SSL_KEY)

    @core_secure.setter
    def core_secure(self, value: bool):
        """
        Set the Feast Core client-side SSL/TLS setting

        Args:
            value: True to enable client-side SSL/TLS
        """
        self._config.set(CONFIG_CORE_ENABLE_SSL_KEY, value)

    @property
    def serving_secure(self) -> bool:
        """
        Retrieve Feast Serving client-side SSL/TLS setting

        Returns:
            Whether client-side SSL/TLS is enabled
        """
        return self._config.getboolean(CONFIG_SERVING_ENABLE_SSL_KEY)

    @serving_secure.setter
    def serving_secure(self, value: bool):
        """
        Set the Feast Serving client-side SSL/TLS setting

        Args:
            value: True to enable client-side SSL/TLS
        """
        self._config.set(CONFIG_SERVING_ENABLE_SSL_KEY, value)

    def version(self):
        """
        Returns version information from Feast Core and Feast Serving
        """
        import pkg_resources

        result = {
            "sdk": {
                "version": pkg_resources.get_distribution("feast").version
            },
            "serving": "not configured",
            "core": "not configured",
        }

        if self.serving_url:
            serving_version = self._serving_service.GetFeastServingInfo(
                GetFeastServingInfoRequest(),
                timeout=self._config.getint(
                    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
                metadata=self._get_grpc_metadata(),
            ).version
            result["serving"] = {
                "url": self.serving_url,
                "version": serving_version
            }

        if self.core_url:
            core_version = self._core_service.GetFeastCoreVersion(
                GetFeastCoreVersionRequest(),
                timeout=self._config.getint(
                    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
                metadata=self._get_grpc_metadata(),
            ).version
            result["core"] = {"url": self.core_url, "version": core_version}

        return result

    @property
    def project(self) -> Union[str, None]:
        """
        Retrieve currently active project

        Returns:
            Project name
        """
        return self._config.get(CONFIG_PROJECT_KEY)

    def set_project(self, project: Optional[str] = None):
        """
        Set currently active Feast project

        Args:
            project: Project to set as active. If unset, will reset to the default project.
        """
        if project is None:
            project = FEAST_DEFAULT_OPTIONS[CONFIG_PROJECT_KEY]
        self._config.set(CONFIG_PROJECT_KEY, project)

    def list_projects(self) -> List[str]:
        """
        List all active Feast projects

        Returns:
            List of project names

        """

        response = self._core_service.ListProjects(
            ListProjectsRequest(),
            timeout=self._config.getint(
                CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
            metadata=self._get_grpc_metadata(),
        )  # type: ListProjectsResponse
        return list(response.projects)

    def create_project(self, project: str):
        """
        Creates a Feast project

        Args:
            project: Name of project
        """

        self._core_service.CreateProject(
            CreateProjectRequest(name=project),
            timeout=self._config.getint(
                CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
            metadata=self._get_grpc_metadata(),
        )  # type: CreateProjectResponse

    def archive_project(self, project):
        """
        Archives a project. Project will still continue to function for
        ingestion and retrieval, but will be in a read-only state. It will
        also not be visible from the Core API for management purposes.

        Args:
            project: Name of project to archive
        """

        try:
            self._core_service_stub.ArchiveProject(
                ArchiveProjectRequest(name=project),
                timeout=self._config.getint(
                    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
                metadata=self._get_grpc_metadata(),
            )  # type: ArchiveProjectResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        # revert to the default project
        if self._project == project:
            self._project = FEAST_DEFAULT_OPTIONS[CONFIG_PROJECT_KEY]

    def apply(self, feature_sets: Union[List[FeatureSet], FeatureSet]):
        """
        Idempotently registers feature set(s) with Feast Core. Either a single
        feature set or a list can be provided.

        Args:
            feature_sets: List of feature sets that will be registered
        """
        if not isinstance(feature_sets, list):
            feature_sets = [feature_sets]
        for feature_set in feature_sets:
            if isinstance(feature_set, FeatureSet):
                self._apply_feature_set(feature_set)
                continue
            raise ValueError(
                f"Could not determine feature set type to apply {feature_set}")

    def _apply_feature_set(self, feature_set: FeatureSet):
        """
        Registers a single feature set with Feast

        Args:
            feature_set: Feature set that will be registered
        """

        feature_set.is_valid()
        feature_set_proto = feature_set.to_proto()
        if len(feature_set_proto.spec.project) == 0:
            if self.project is not None:
                feature_set_proto.spec.project = self.project

        # Convert the feature set to a request and send to Feast Core
        try:
            apply_fs_response = self._core_service.ApplyFeatureSet(
                ApplyFeatureSetRequest(feature_set=feature_set_proto),
                timeout=self._config.getint(
                    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
                metadata=self._get_grpc_metadata(),
            )  # type: ApplyFeatureSetResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        # Extract the returned feature set
        applied_fs = FeatureSet.from_proto(apply_fs_response.feature_set)

        # If the feature set has changed, update the local copy
        if apply_fs_response.status == ApplyFeatureSetResponse.Status.CREATED:
            print(f'Feature set created: "{applied_fs.name}"')

        if apply_fs_response.status == ApplyFeatureSetResponse.Status.UPDATED:
            print(f'Feature set updated: "{applied_fs.name}"')

        # If no change has been applied, do nothing
        if apply_fs_response.status == ApplyFeatureSetResponse.Status.NO_CHANGE:
            print(f"No change detected or applied: {feature_set.name}")

        # Deep copy from the returned feature set to the local feature set
        feature_set._update_from_feature_set(applied_fs)

    def list_feature_sets(
        self,
        project: str = None,
        name: str = None,
        labels: Dict[str, str] = dict()) -> List[FeatureSet]:
        """
        Retrieve a list of feature sets from Feast Core

        Args:
            project: Filter feature sets based on project name
            name: Filter feature sets based on feature set name

        Returns:
            List of feature sets
        """

        if project is None:
            if self.project is not None:
                project = self.project
            else:
                project = "*"

        if name is None:
            name = "*"

        filter = ListFeatureSetsRequest.Filter(project=project,
                                               feature_set_name=name,
                                               labels=labels)

        # Get latest feature sets from Feast Core
        feature_set_protos = self._core_service.ListFeatureSets(
            ListFeatureSetsRequest(filter=filter),
            metadata=self._get_grpc_metadata(),
        )  # type: ListFeatureSetsResponse

        # Extract feature sets and return
        feature_sets = []
        for feature_set_proto in feature_set_protos.feature_sets:
            feature_set = FeatureSet.from_proto(feature_set_proto)
            feature_set._client = self
            feature_sets.append(feature_set)
        return feature_sets

    def get_feature_set(self,
                        name: str,
                        project: str = None) -> Union[FeatureSet, None]:
        """
        Retrieves a feature set.

        Args:
            project: Feast project that this feature set belongs to
            name: Name of feature set

        Returns:
            Returns either the specified feature set, or raises an exception if
            none is found
        """

        if project is None:
            if self.project is not None:
                project = self.project
            else:
                raise ValueError("No project has been configured.")

        try:
            get_feature_set_response = self._core_service.GetFeatureSet(
                GetFeatureSetRequest(project=project, name=name.strip()),
                metadata=self._get_grpc_metadata(),
            )  # type: GetFeatureSetResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())
        return FeatureSet.from_proto(get_feature_set_response.feature_set)

    def list_features_by_ref(
            self,
            project: str = None,
            entities: List[str] = list(),
            labels: Dict[str, str] = dict(),
    ) -> Dict[FeatureRef, Feature]:
        """
        Returns a list of features based on filters provided.

        Args:
            project: Feast project that these features belongs to
            entities: Feast entity that these features are associated with
            labels: Feast labels that these features are associated with

        Returns:
            Dictionary of <feature references: features>

        Examples:
            >>> from feast import Client
            >>>
            >>> feast_client = Client(core_url="localhost:6565")
            >>> features = list_features_by_ref(project="test_project", entities=["driver_id"], labels={"key1":"val1","key2":"val2"})
            >>> print(features)
        """
        if project is None:
            if self.project is not None:
                project = self.project
            else:
                project = "default"

        filter = ListFeaturesRequest.Filter(project=project,
                                            entities=entities,
                                            labels=labels)

        feature_protos = self._core_service.ListFeatures(
            ListFeaturesRequest(filter=filter),
            metadata=self._get_grpc_metadata(),
        )  # type: ListFeaturesResponse

        features_dict = {}
        for ref_str, feature_proto in feature_protos.features.items():
            feature_ref = FeatureRef.from_str(ref_str, ignore_project=True)
            feature = Feature.from_proto(feature_proto)
            features_dict[feature_ref] = feature

        return features_dict

    def list_entities(self) -> Dict[str, Entity]:
        """
        Returns a dictionary of entities across all feature sets
        Returns:
            Dictionary of entities, indexed by name
        """
        entities_dict = OrderedDict()
        for fs in self.list_feature_sets():
            for entity in fs.entities:
                entities_dict[entity.name] = entity
        return entities_dict

    def get_batch_features(
        self,
        feature_refs: List[str],
        entity_rows: Union[pd.DataFrame, str],
        compute_statistics: bool = False,
        project: str = None,
    ) -> RetrievalJob:
        """
        Deprecated. Please see get_historical_features.
        """
        warnings.warn(
            "The method get_batch_features() is being deprecated. Please use the identical get_historical_features(). "
            "Feast 0.7 and onwards will not support get_batch_features().",
            DeprecationWarning,
        )
        return self.get_historical_features(feature_refs, entity_rows,
                                            compute_statistics, project)

    def get_historical_features(
        self,
        feature_refs: List[str],
        entity_rows: Union[pd.DataFrame, str],
        compute_statistics: bool = False,
        project: str = None,
    ) -> RetrievalJob:
        """
        Retrieves historical features from a Feast Serving deployment.

        Args:
            feature_refs: List of feature references that will be returned for each entity.
                Each feature reference should have the following format:
                "feature_set:feature" where "feature_set" & "feature" refer to
                the feature and feature set names respectively.
                Only the feature name is required.
            entity_rows (Union[pd.DataFrame, str]):
                Pandas dataframe containing entities and a 'datetime' column.
                Each entity in a feature set must be present as a column in this
                dataframe. The datetime column must contain timestamps in
                datetime64 format.
            compute_statistics (bool):
                Indicates whether Feast should compute statistics over the retrieved dataset.
            project: Specifies the project which contain the FeatureSets
                which the requested features belong to.

        Returns:
            feast.job.RetrievalJob:
                Returns a retrival job object that can be used to monitor retrieval
                progress asynchronously, and can be used to materialize the
                results.

        Examples:
            >>> from feast import Client
            >>> from datetime import datetime
            >>>
            >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566")
            >>> feature_refs = ["my_project/bookings_7d", "booking_14d"]
            >>> entity_rows = pd.DataFrame(
            >>>         {
            >>>            "datetime": [pd.datetime.now() for _ in range(3)],
            >>>            "customer": [1001, 1002, 1003],
            >>>         }
            >>>     )
            >>> feature_retrieval_job = feast_client.get_historical_features(
            >>>     feature_refs, entity_rows, project="my_project")
            >>> df = feature_retrieval_job.to_dataframe()
            >>> print(df)
        """

        # Retrieve serving information to determine store type and
        # staging location
        serving_info = self._serving_service.GetFeastServingInfo(
            GetFeastServingInfoRequest(),
            timeout=self._config.getint(
                CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
            metadata=self._get_grpc_metadata(),
        )  # type: GetFeastServingInfoResponse

        if serving_info.type != FeastServingType.FEAST_SERVING_TYPE_BATCH:
            raise Exception(
                f'You are connected to a store "{self.serving_url}" which '
                f"does not support batch retrieval ")

        if isinstance(entity_rows, pd.DataFrame):
            # Pandas DataFrame detected

            # Remove timezone from datetime column
            if isinstance(entity_rows["datetime"].dtype,
                          pd.core.dtypes.dtypes.DatetimeTZDtype):
                entity_rows["datetime"] = pd.DatetimeIndex(
                    entity_rows["datetime"]).tz_localize(None)
        elif isinstance(entity_rows, str):
            # String based source
            if not entity_rows.endswith((".avro", "*")):
                raise Exception(
                    "Only .avro and wildcard paths are accepted as entity_rows"
                )
        else:
            raise Exception(f"Only pandas.DataFrame and str types are allowed"
                            f" as entity_rows, but got {type(entity_rows)}.")

        # Export and upload entity row DataFrame to staging location
        # provided by Feast
        staged_files = export_source_to_staging_location(
            entity_rows, serving_info.job_staging_location)  # type: List[str]
        request = GetBatchFeaturesRequest(
            features=_build_feature_references(
                feature_ref_strs=feature_refs,
                project=project if project is not None else self.project,
            ),
            dataset_source=DatasetSource(file_source=DatasetSource.FileSource(
                file_uris=staged_files,
                data_format=DataFormat.DATA_FORMAT_AVRO)),
            compute_statistics=compute_statistics,
        )

        # Retrieve Feast Job object to manage life cycle of retrieval
        try:
            response = self._serving_service.GetBatchFeatures(
                request, metadata=self._get_grpc_metadata())
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        return RetrievalJob(
            response.job,
            self._serving_service,
            auth_metadata_plugin=self._auth_metadata,
        )

    def get_online_features(
        self,
        feature_refs: List[str],
        entity_rows: List[Union[GetOnlineFeaturesRequest.EntityRow,
                                Dict[str, Any]]],
        project: Optional[str] = None,
        omit_entities: bool = False,
    ) -> OnlineResponse:
        """
        Retrieves the latest online feature data from Feast Serving

        Args:
            feature_refs: List of feature references that will be returned for each entity.
                Each feature reference should have the following format:
                "feature_set:feature" where "feature_set" & "feature" refer to
                the feature and feature set names respectively.
                Only the feature name is required.
            entity_rows: A list of dictionaries where each key is an entity and each value is
                feast.types.Value or Python native form.
            project: Optionally specify the the project override. If specified, uses given project for retrieval.
                Overrides the projects specified in Feature References if also are specified.
            omit_entities: If true will omit entity values in the returned feature data.
        Returns:
            GetOnlineFeaturesResponse containing the feature data in records.
            Each EntityRow provided will yield one record, which contains
            data fields with data value and field status metadata (if included).

        Examples:
            >>> from feast import Client
            >>>
            >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566")
            >>> feature_refs = ["daily_transactions"]
            >>> entity_rows = [{"customer_id": 0},{"customer_id": 1}]
            >>>
            >>> online_response = feast_client.get_online_features(
            >>>     feature_refs, entity_rows, project="my_project")
            >>> online_response_dict = online_response.to_dict()
            >>> print(online_response_dict)
            {'daily_transactions': [1.1,1.2], 'customer_id': [0,1]}
        """

        try:
            response = self._serving_service.GetOnlineFeatures(
                GetOnlineFeaturesRequest(
                    omit_entities_in_response=omit_entities,
                    features=_build_feature_references(
                        feature_ref_strs=feature_refs),
                    entity_rows=_infer_online_entity_rows(entity_rows),
                    project=project if project is not None else self.project,
                ),
                metadata=self._get_grpc_metadata(),
            )
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        response = OnlineResponse(response)
        return response

    def list_ingest_jobs(
        self,
        job_id: str = None,
        feature_set_ref: FeatureSetRef = None,
        store_name: str = None,
    ):
        """
        List the ingestion jobs currently registered in Feast, with optional filters.
        Provides detailed metadata about each ingestion job.

        Args:
            job_id: Select specific ingestion job with the given job_id
            feature_set_ref: Filter ingestion jobs by target feature set (via reference)
            store_name: Filter ingestion jobs by target feast store's name

        Returns:
            List of IngestJobs matching the given filters
        """
        # construct list request
        feature_set_ref_proto = None
        if feature_set_ref:
            feature_set_ref_proto = feature_set_ref.to_proto()
        list_filter = ListIngestionJobsRequest.Filter(
            id=job_id,
            feature_set_reference=feature_set_ref_proto,
            store_name=store_name,
        )
        request = ListIngestionJobsRequest(filter=list_filter)
        # make list request & unpack response
        response = self._core_service.ListIngestionJobs(
            request,
            metadata=self._get_grpc_metadata(),
        )  # type: ignore
        ingest_jobs = [
            IngestJob(proto,
                      self._core_service,
                      auth_metadata_plugin=self._auth_metadata)
            for proto in response.jobs  # type: ignore
        ]

        return ingest_jobs

    def restart_ingest_job(self, job: IngestJob):
        """
        Restart ingestion job currently registered in Feast.
        NOTE: Data might be lost during the restart for some job runners.
        Does not support stopping a job in a transitional (ie pending, suspending, aborting),
        terminal state (ie suspended or aborted) or unknown status

        Args:
            job: IngestJob to restart
        """
        request = RestartIngestionJobRequest(id=job.id)
        try:
            self._core_service.RestartIngestionJob(
                request,
                metadata=self._get_grpc_metadata(),
            )  # type: ignore
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

    def stop_ingest_job(self, job: IngestJob):
        """
        Stop ingestion job currently resgistered in Feast
        Does nothing if the target job if already in a terminal state (ie suspended or aborted).
        Does not support stopping a job in a transitional (ie pending, suspending, aborting)
        or in a unknown status

        Args:
            job: IngestJob to restart
        """
        request = StopIngestionJobRequest(id=job.id)
        try:
            self._core_service.StopIngestionJob(
                request,
                metadata=self._get_grpc_metadata(),
            )  # type: ignore
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

    def ingest(
        self,
        feature_set: Union[str, FeatureSet],
        source: Union[pd.DataFrame, str],
        chunk_size: int = 10000,
        max_workers: int = max(CPU_COUNT - 1, 1),
        disable_progress_bar: bool = False,
        timeout: int = KAFKA_CHUNK_PRODUCTION_TIMEOUT,
    ) -> str:
        """
        Loads feature data into Feast for a specific feature set.

        Args:
            feature_set (typing.Union[str, feast.feature_set.FeatureSet]):
                Feature set object or the string name of the feature set

            source (typing.Union[pd.DataFrame, str]):
                Either a file path or Pandas Dataframe to ingest into Feast
                Files that are currently supported:
                    * parquet
                    * csv
                    * json

            chunk_size (int):
                Amount of rows to load and ingest at a time.

            max_workers (int):
                Number of worker processes to use to encode values.

            disable_progress_bar (bool):
                Disable printing of progress statistics.

            timeout (int):
                Timeout in seconds to wait for completion.

        Returns:
            str:
                ingestion id for this dataset

        Examples:
            >>> from feast import Client
            >>>
            >>> client = Client(core_url="localhost:6565")
            >>> fs_df = pd.DataFrame(
            >>>         {
            >>>            "datetime": [pd.datetime.now()],
            >>>            "driver": [1001],
            >>>            "rating": [4.3],
            >>>         }
            >>>     )
            >>> client.set_project("project1")
            >>> client.ingest("driver", fs_df)
            >>>
            >>> driver_fs = client.get_feature_set(name="driver", project="project1")
            >>> client.ingest(driver_fs, fs_df)
        """

        if isinstance(feature_set, FeatureSet):
            name = feature_set.name
            project = feature_set.project
        elif isinstance(feature_set, str):
            if self.project is not None:
                project = self.project
            else:
                project = "default"
            name = feature_set
        else:
            raise Exception("Feature set name must be provided")

        # Read table and get row count
        dir_path, dest_path = _read_table_from_source(source, chunk_size,
                                                      max_workers)

        pq_file = pq.ParquetFile(dest_path)

        row_count = pq_file.metadata.num_rows

        current_time = time.time()

        print("Waiting for feature set to be ready for ingestion...")
        while True:
            if timeout is not None and time.time() - current_time >= timeout:
                raise TimeoutError(
                    "Timed out waiting for feature set to be ready")
            fetched_feature_set: Optional[FeatureSet] = self.get_feature_set(
                name, project)
            if (fetched_feature_set is not None and fetched_feature_set.status
                    == FeatureSetStatus.STATUS_READY):
                feature_set = fetched_feature_set
                break
            time.sleep(3)

        if timeout is not None:
            timeout = timeout - int(time.time() - current_time)

        try:
            # Kafka configs
            brokers = feature_set.get_kafka_source_brokers()
            topic = feature_set.get_kafka_source_topic()
            producer = get_producer(brokers, row_count, disable_progress_bar)

            # Loop optimization declarations
            produce = producer.produce
            flush = producer.flush
            ingestion_id = _generate_ingestion_id(feature_set)

            # Transform and push data to Kafka
            if feature_set.source.source_type == "Kafka":
                for chunk in get_feature_row_chunks(
                        file=dest_path,
                        row_groups=list(range(pq_file.num_row_groups)),
                        fs=feature_set,
                        ingestion_id=ingestion_id,
                        max_workers=max_workers,
                ):

                    # Push FeatureRow one chunk at a time to kafka
                    for serialized_row in chunk:
                        produce(topic=topic, value=serialized_row)

                    # Force a flush after each chunk
                    flush(timeout=timeout)

                    # Remove chunk from memory
                    del chunk

            else:
                raise Exception(
                    f"Could not determine source type for feature set "
                    f'"{feature_set.name}" with source type '
                    f'"{feature_set.source.source_type}"')

            # Print ingestion statistics
            producer.print_results()
        finally:
            # Remove parquet file(s) that were created earlier
            print("Removing temporary file(s)...")
            shutil.rmtree(dir_path)

        return ingestion_id

    def get_statistics(
        self,
        feature_set_id: str,
        store: str,
        features: List[str] = [],
        ingestion_ids: Optional[List[str]] = None,
        start_date: Optional[datetime.datetime] = None,
        end_date: Optional[datetime.datetime] = None,
        force_refresh: bool = False,
        project: Optional[str] = None,
    ) -> statistics_pb2.DatasetFeatureStatisticsList:
        """
        Retrieves the feature featureStatistics computed over the data in the batch
        stores.

        Args:
            feature_set_id: Feature set id to retrieve batch featureStatistics for. If project
                is not provided, the default ("default") will be used.
            store: Name of the store to retrieve feature featureStatistics over. This
                store must be a historical store.
            features: Optional list of feature names to filter from the results.
            ingestion_ids: Optional list of dataset Ids by which to filter data
                before retrieving featureStatistics. Cannot be used with start_date
                and end_date.
                If multiple dataset ids are provided, unaggregatable featureStatistics
                will be dropped.
            start_date: Optional start date over which to filter statistical data.
                Data from this date will be included.
                Cannot be used with dataset_ids. If the provided period spans
                multiple days, unaggregatable featureStatistics will be dropped.
            end_date: Optional end date over which to filter statistical data.
                Data from this data will not be included.
                Cannot be used with dataset_ids. If the provided period spans
                multiple days, unaggregatable featureStatistics will be dropped.
            force_refresh: Setting this flag to true will force a recalculation
                of featureStatistics and overwrite results currently in the cache, if any.
            project: Manual override for default project.

        Returns:
           Returns a tensorflow DatasetFeatureStatisticsList containing TFDV featureStatistics.
        """

        if ingestion_ids is not None and (start_date is not None
                                          or end_date is not None):
            raise ValueError(
                "Only one of dataset_id or [start_date, end_date] can be provided."
            )

        if project != "" and "/" not in feature_set_id:
            feature_set_id = f"{project}/{feature_set_id}"

        request = GetFeatureStatisticsRequest(
            feature_set_id=feature_set_id,
            features=features,
            store=store,
            force_refresh=force_refresh,
        )
        if ingestion_ids is not None:
            request.ingestion_ids.extend(ingestion_ids)
        else:
            if start_date is not None:
                request.start_date.CopyFrom(
                    Timestamp(seconds=int(start_date.timestamp())))
            if end_date is not None:
                request.end_date.CopyFrom(
                    Timestamp(seconds=int(end_date.timestamp())))

        return self._core_service.GetFeatureStatistics(
            request).dataset_feature_statistics_list

    def _get_grpc_metadata(self):
        """
        Returns a metadata tuple to attach to gRPC requests. This is primarily
        used when authentication is enabled but SSL/TLS is disabled.

        Returns: Tuple of metadata to attach to each gRPC call
        """
        if self._config.getboolean(
                CONFIG_ENABLE_AUTH_KEY) and self._auth_metadata:
            return self._auth_metadata.get_signed_meta()
        return ()
Esempio n. 9
0
class Client:
    """
    JobController Client: used internally to manage Ingestion Jobs
    """
    def __init__(self, options=None, **kwargs):
        """
        JobControllerClient should be initialized with
            jobcontroller_url: Feast JobController address

        :param options: Configuration options to initialize client with
        :param kwargs: options in kwargs style
        """
        if options is None:
            options = dict()
        self._config = Config(options={**options, **kwargs})

        self._jobcontroller_service_stub: Optional[
            JobControllerServiceStub] = None
        self._auth_metadata: Optional[grpc.AuthMetadataPlugin] = None

        # Configure Auth Metadata Plugin if auth is enabled
        if self._config.getboolean(CONFIG_ENABLE_AUTH_KEY):
            self._auth_metadata = feast_auth.get_auth_metadata_plugin(
                self._config)

    @property
    def _jobcontroller_service(self):
        if not self._jobcontroller_service_stub:
            channel = create_grpc_channel(
                url=self._config.get(CONFIG_JOB_CONTROLLER_SERVER_KEY),
                enable_ssl=self._config.getboolean(CONFIG_CORE_ENABLE_SSL_KEY),
                enable_auth=self._config.getboolean(CONFIG_ENABLE_AUTH_KEY),
                ssl_server_cert_path=self._config.get(
                    CONFIG_CORE_SERVER_SSL_CERT_KEY),
                auth_metadata_plugin=self._auth_metadata,
                timeout=self._config.getint(
                    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
            )
            self._jobcontroller_service_stub = JobControllerServiceStub(
                channel)

        return self._jobcontroller_service_stub

    def list_ingest_jobs(
        self,
        job_id: str = None,
        feature_set_ref: FeatureSetRef = None,
        store_name: str = None,
    ):
        """
        List the ingestion jobs currently registered in Feast, with optional filters.
        Provides detailed metadata about each ingestion job.

        Args:
            job_id: Select specific ingestion job with the given job_id
            feature_set_ref: Filter ingestion jobs by target feature set (via reference)
            store_name: Filter ingestion jobs by target feast store's name

        Returns:
            List of IngestJobs matching the given filters
        """
        # construct list request
        feature_set_ref_proto = None
        if feature_set_ref:
            feature_set_ref_proto = feature_set_ref.to_proto()
        list_filter = ListIngestionJobsRequest.Filter(
            id=job_id,
            feature_set_reference=feature_set_ref_proto,
            store_name=store_name,
        )
        request = ListIngestionJobsRequest(filter=list_filter)
        # make list request & unpack response
        response = self._jobcontroller_service.ListIngestionJobs(
            request,
            metadata=self._get_grpc_metadata(),
        )  # type: ignore
        ingest_jobs = [
            IngestJob(proto,
                      self._jobcontroller_service,
                      auth_metadata_plugin=self._auth_metadata)
            for proto in response.jobs  # type: ignore
        ]

        return ingest_jobs

    def restart_ingest_job(self, job: IngestJob):
        """
        Restart ingestion job currently registered in Feast.
        NOTE: Data might be lost during the restart for some job runners.
        Does not support stopping a job in a transitional (ie pending, suspending, aborting),
        terminal state (ie suspended or aborted) or unknown status

        Args:
            job: IngestJob to restart
        """
        request = RestartIngestionJobRequest(id=job.id)
        try:
            self._jobcontroller_service.RestartIngestionJob(
                request,
                metadata=self._get_grpc_metadata(),
            )  # type: ignore
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

    def stop_ingest_job(self, job: IngestJob):
        """
        Stop ingestion job currently resgistered in Feast
        Does nothing if the target job if already in a terminal state (ie suspended or aborted).
        Does not support stopping a job in a transitional (ie pending, suspending, aborting)
        or in a unknown status

        Args:
            job: IngestJob to restart
        """
        request = StopIngestionJobRequest(id=job.id)
        try:
            self._jobcontroller_service.StopIngestionJob(
                request,
                metadata=self._get_grpc_metadata(),
            )  # type: ignore
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

    def _get_grpc_metadata(self):
        """
        Returns a metadata tuple to attach to gRPC requests. This is primarily
        used when authentication is enabled but SSL/TLS is disabled.

        Returns: Tuple of metadata to attach to each gRPC call
        """
        if self._config.getboolean(
                CONFIG_ENABLE_AUTH_KEY) and self._auth_metadata:
            return self._auth_metadata.get_signed_meta()
        return ()