Esempio n. 1
0
    def retrieve_job(self):

        serving_service_stub = Serving.ServingServiceStub(
            grpc.insecure_channel(""))
        job_proto = JobProto(
            id="123",
            type=JobType.JOB_TYPE_DOWNLOAD,
            status=JobStatus.JOB_STATUS_RUNNING,
        )
        return RetrievalJob(job_proto, serving_service_stub)
Esempio n. 2
0
    def get_batch_features(
        self,
        feature_refs: List[str],
        entity_rows: Union[pd.DataFrame, str],
        default_project: str = None,
    ) -> RetrievalJob:
        """
        Retrieves historical features from a Feast Serving deployment.

        Args:
            feature_refs (List[str]):
                List of feature references that will be returned for each entity.
                Each feature reference should have the following format
                "project/feature:version".

            entity_rows (Union[pd.DataFrame, str]):
                Pandas dataframe containing entities and a 'datetime' column.
                Each entity in a feature set must be present as a column in this
                dataframe. The datetime column must contain timestamps in
                datetime64 format.
            default_project: Default project where feature values will be found.

        Returns:
            feast.job.RetrievalJob:
                Returns a retrival job object that can be used to monitor retrieval
                progress asynchronously, and can be used to materialize the
                results.

        Examples:
            >>> from feast import Client
            >>> from datetime import datetime
            >>>
            >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566")
            >>> feature_refs = ["my_project/bookings_7d:1", "booking_14d"]
            >>> entity_rows = pd.DataFrame(
            >>>         {
            >>>            "datetime": [pd.datetime.now() for _ in range(3)],
            >>>            "customer": [1001, 1002, 1003],
            >>>         }
            >>>     )
            >>> feature_retrieval_job = feast_client.get_batch_features(
            >>>     feature_refs, entity_rows, default_project="my_project")
            >>> df = feature_retrieval_job.to_dataframe()
            >>> print(df)
        """

        self._connect_serving()

        feature_references = _build_feature_references(
            feature_refs=feature_refs, default_project=default_project)

        # Retrieve serving information to determine store type and
        # staging location
        serving_info = self._serving_service_stub.GetFeastServingInfo(
            GetFeastServingInfoRequest(),
            timeout=self._config.getint(
                CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
        )  # type: GetFeastServingInfoResponse

        if serving_info.type != FeastServingType.FEAST_SERVING_TYPE_BATCH:
            raise Exception(
                f'You are connected to a store "{self._serving_url}" which '
                f"does not support batch retrieval ")

        if isinstance(entity_rows, pd.DataFrame):
            # Pandas DataFrame detected

            # Remove timezone from datetime column
            if isinstance(entity_rows["datetime"].dtype,
                          pd.core.dtypes.dtypes.DatetimeTZDtype):
                entity_rows["datetime"] = pd.DatetimeIndex(
                    entity_rows["datetime"]).tz_localize(None)
        elif isinstance(entity_rows, str):
            # String based source
            if not entity_rows.endswith((".avro", "*")):
                raise Exception(
                    f"Only .avro and wildcard paths are accepted as entity_rows"
                )
        else:
            raise Exception(f"Only pandas.DataFrame and str types are allowed"
                            f" as entity_rows, but got {type(entity_rows)}.")

        # Export and upload entity row DataFrame to staging location
        # provided by Feast
        staged_files = export_source_to_staging_location(
            entity_rows, serving_info.job_staging_location)  # type: List[str]

        request = GetBatchFeaturesRequest(
            features=feature_references,
            dataset_source=DatasetSource(file_source=DatasetSource.FileSource(
                file_uris=staged_files,
                data_format=DataFormat.DATA_FORMAT_AVRO)),
        )

        # Retrieve Feast Job object to manage life cycle of retrieval
        response = self._serving_service_stub.GetBatchFeatures(request)
        return RetrievalJob(response.job, self._serving_service_stub)