コード例 #1
0
 def teardown_infra(
     self,
     project: str,
     tables: Sequence[FeatureView],
     entities: Sequence[Entity],
 ) -> None:
     set_usage_attribute("provider", self.__class__.__name__)
     self.online_store.teardown(self.repo_config, tables, entities)
コード例 #2
0
 def online_write_batch(
     self,
     config: RepoConfig,
     table: FeatureView,
     data: List[Tuple[EntityKeyProto, Dict[str, ValueProto], datetime,
                      Optional[datetime]]],
     progress: Optional[Callable[[int], Any]],
 ) -> None:
     set_usage_attribute("provider", self.__class__.__name__)
     self.online_store.online_write_batch(config, table, data, progress)
コード例 #3
0
ファイル: passthrough_provider.py プロジェクト: pyalex/feast
    def online_read(
        self,
        config: RepoConfig,
        table: Union[FeatureTable, FeatureView],
        entity_keys: List[EntityKeyProto],
        requested_features: List[str] = None,
    ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]:
        set_usage_attribute("provider", self.__class__.__name__)
        result = self.online_store.online_read(config, table, entity_keys)

        return result
コード例 #4
0
    def materialize_single_feature_view(
        self,
        config: RepoConfig,
        feature_view: FeatureView,
        start_date: datetime,
        end_date: datetime,
        registry: Registry,
        project: str,
        tqdm_builder: Callable[[int], tqdm],
    ) -> None:
        set_usage_attribute("provider", self.__class__.__name__)

        entities = []
        for entity_name in feature_view.entities:
            entities.append(registry.get_entity(entity_name, project))

        (
            join_key_columns,
            feature_name_columns,
            event_timestamp_column,
            created_timestamp_column,
        ) = _get_column_names(feature_view, entities)

        offline_job = self.offline_store.pull_latest_from_table_or_query(
            config=config,
            data_source=feature_view.batch_source,
            join_key_columns=join_key_columns,
            feature_name_columns=feature_name_columns,
            event_timestamp_column=event_timestamp_column,
            created_timestamp_column=created_timestamp_column,
            start_date=start_date,
            end_date=end_date,
        )

        table = offline_job.to_arrow()

        if feature_view.batch_source.field_mapping is not None:
            table = _run_field_mapping(table,
                                       feature_view.batch_source.field_mapping)

        join_keys = {entity.join_key: entity.value_type for entity in entities}

        with tqdm_builder(table.num_rows) as pbar:
            for batch in table.to_batches(DEFAULT_BATCH_SIZE):
                rows_to_write = _convert_arrow_to_proto(
                    batch, feature_view, join_keys)
                self.online_write_batch(
                    self.repo_config,
                    feature_view,
                    rows_to_write,
                    lambda x: pbar.update(x),
                )
コード例 #5
0
 def online_read(
     self,
     config: RepoConfig,
     table: FeatureView,
     entity_keys: List[EntityKeyProto],
     requested_features: List[str] = None,
 ) -> List:
     set_usage_attribute("provider", self.__class__.__name__)
     result = []
     if self.online_store:
         result = self.online_store.online_read(config, table, entity_keys,
                                                requested_features)
     return result
コード例 #6
0
 def update_infra(
     self,
     project: str,
     tables_to_delete: Sequence[FeatureView],
     tables_to_keep: Sequence[FeatureView],
     entities_to_delete: Sequence[Entity],
     entities_to_keep: Sequence[Entity],
     partial: bool,
 ):
     set_usage_attribute("provider", self.__class__.__name__)
     self.online_store.update(
         config=self.repo_config,
         tables_to_delete=tables_to_delete,
         tables_to_keep=tables_to_keep,
         entities_to_keep=entities_to_keep,
         entities_to_delete=entities_to_delete,
         partial=partial,
     )
コード例 #7
0
    def ingest_df(
        self,
        feature_view: FeatureView,
        entities: List[Entity],
        df: pandas.DataFrame,
    ):
        set_usage_attribute("provider", self.__class__.__name__)
        table = pa.Table.from_pandas(df)

        if feature_view.batch_source.field_mapping is not None:
            table = _run_field_mapping(table,
                                       feature_view.batch_source.field_mapping)

        join_keys = {entity.join_key: entity.value_type for entity in entities}
        rows_to_write = _convert_arrow_to_proto(table, feature_view, join_keys)

        self.online_write_batch(self.repo_config,
                                feature_view,
                                rows_to_write,
                                progress=None)
コード例 #8
0
    def get_historical_features(
        self,
        config: RepoConfig,
        feature_views: List[FeatureView],
        feature_refs: List[str],
        entity_df: Union[pandas.DataFrame, str],
        registry: Registry,
        project: str,
        full_feature_names: bool,
    ) -> RetrievalJob:
        set_usage_attribute("provider", self.__class__.__name__)

        job = self.offline_store.get_historical_features(
            config=config,
            feature_views=feature_views,
            feature_refs=feature_refs,
            entity_df=entity_df,
            registry=registry,
            project=project,
            full_feature_names=full_feature_names,
        )
        return job
コード例 #9
0
    def retrieve_saved_dataset(self, config: RepoConfig,
                               dataset: SavedDataset) -> RetrievalJob:
        set_usage_attribute("provider", self.__class__.__name__)

        feature_name_columns = [
            ref.replace(":", "__")
            if dataset.full_feature_names else ref.split(":")[1]
            for ref in dataset.features
        ]

        # ToDo: replace hardcoded value
        event_ts_column = "event_timestamp"

        return self.offline_store.pull_all_from_table_or_query(
            config=config,
            data_source=dataset.storage.to_data_source(),
            join_key_columns=dataset.join_keys,
            feature_name_columns=feature_name_columns,
            event_timestamp_column=event_ts_column,
            start_date=make_tzaware(
                dataset.min_event_timestamp),  # type: ignore
            end_date=make_tzaware(dataset.max_event_timestamp +
                                  timedelta(seconds=1)),  # type: ignore
        )
コード例 #10
0
 def redis_store():
     set_usage_attribute("attr", "val")
コード例 #11
0
 def provider_two():
     set_usage_attribute("new-attr", "new-val")