Beispiel #1
0
    def materialize_single_feature_view(
        self,
        feature_view: FeatureView,
        start_date: datetime,
        end_date: datetime,
        registry: Registry,
        project: str,
        tqdm_builder: Callable[[int], tqdm],
    ) -> None:
        entities = []
        for entity_name in feature_view.entities:
            entities.append(registry.get_entity(entity_name, project))

        (
            join_key_columns,
            feature_name_columns,
            event_timestamp_column,
            created_timestamp_column,
        ) = _get_column_names(feature_view, entities)

        start_date = utils.make_tzaware(start_date)
        end_date = utils.make_tzaware(end_date)

        offline_store = get_offline_store_from_sources([feature_view.input])
        table = offline_store.pull_latest_from_table_or_query(
            data_source=feature_view.input,
            join_key_columns=join_key_columns,
            feature_name_columns=feature_name_columns,
            event_timestamp_column=event_timestamp_column,
            created_timestamp_column=created_timestamp_column,
            start_date=start_date,
            end_date=end_date,
        )

        if feature_view.input.field_mapping is not None:
            table = _run_field_mapping(table, feature_view.input.field_mapping)

        join_keys = [entity.join_key for entity in entities]
        rows_to_write = _convert_arrow_to_proto(table, feature_view, join_keys)

        with tqdm_builder(len(rows_to_write)) as pbar:
            self.online_write_batch(project, feature_view, rows_to_write,
                                    lambda x: pbar.update(x))

        feature_view.materialization_intervals.append((start_date, end_date))
        registry.apply_feature_view(feature_view, project)
Beispiel #2
0
 def get_historical_features(
     config: RepoConfig,
     feature_views: List[FeatureView],
     feature_refs: List[str],
     entity_df: Union[pd.DataFrame, str],
     registry: Registry,
     project: str,
 ) -> RetrievalJob:
     offline_store = get_offline_store_from_sources(
         [feature_view.input for feature_view in feature_views])
     return offline_store.get_historical_features(
         config=config,
         feature_views=feature_views,
         feature_refs=feature_refs,
         entity_df=entity_df,
         registry=registry,
         project=project,
     )