def materialize_single_feature_view( self, feature_view: FeatureView, start_date: datetime, end_date: datetime, registry: Registry, project: str, tqdm_builder: Callable[[int], tqdm], ) -> None: entities = [] for entity_name in feature_view.entities: entities.append(registry.get_entity(entity_name, project)) ( join_key_columns, feature_name_columns, event_timestamp_column, created_timestamp_column, ) = _get_column_names(feature_view, entities) start_date = utils.make_tzaware(start_date) end_date = utils.make_tzaware(end_date) offline_store = get_offline_store_from_sources([feature_view.input]) table = offline_store.pull_latest_from_table_or_query( data_source=feature_view.input, join_key_columns=join_key_columns, feature_name_columns=feature_name_columns, event_timestamp_column=event_timestamp_column, created_timestamp_column=created_timestamp_column, start_date=start_date, end_date=end_date, ) if feature_view.input.field_mapping is not None: table = _run_field_mapping(table, feature_view.input.field_mapping) join_keys = [entity.join_key for entity in entities] rows_to_write = _convert_arrow_to_proto(table, feature_view, join_keys) with tqdm_builder(len(rows_to_write)) as pbar: self.online_write_batch(project, feature_view, rows_to_write, lambda x: pbar.update(x)) feature_view.materialization_intervals.append((start_date, end_date)) registry.apply_feature_view(feature_view, project)
def get_historical_features( config: RepoConfig, feature_views: List[FeatureView], feature_refs: List[str], entity_df: Union[pd.DataFrame, str], registry: Registry, project: str, ) -> RetrievalJob: offline_store = get_offline_store_from_sources( [feature_view.input for feature_view in feature_views]) return offline_store.get_historical_features( config=config, feature_views=feature_views, feature_refs=feature_refs, entity_df=entity_df, registry=registry, project=project, )