def _save_offline_dataframe( self, feature_group, dataframe, operation, write_options, validation_id=None, ): if feature_group.time_travel_format == "HUDI": hudi_engine_instance = hudi_engine.HudiEngine( feature_group.feature_store_id, feature_group.feature_store_name, feature_group, self._spark_session, self._spark_context, ) hudi_engine_instance.save_hudi_fg(dataframe, self.APPEND, operation, write_options, validation_id) else: dataframe.write.format(self.HIVE_FORMAT).mode( self.APPEND).options(**write_options).partitionBy( feature_group.partition_key if feature_group. partition_key else []).saveAsTable( feature_group._get_table_name())
def commit_delete(feature_group, delete_df, write_options): hudi_engine_instance = hudi_engine.HudiEngine( feature_group.feature_store_id, feature_group.feature_store_name, feature_group, engine.get_instance()._spark_context, engine.get_instance()._spark_session, ) return hudi_engine_instance.delete_record(delete_df, write_options)
def register_hudi_temporary_table(self, hudi_fg_alias, feature_store_id, feature_store_name, read_options): hudi_engine_instance = hudi_engine.HudiEngine( feature_store_id, feature_store_name, hudi_fg_alias.feature_group, self._spark_context, self._spark_session, ) hudi_engine_instance.register_temporary_table( hudi_fg_alias.alias, hudi_fg_alias.left_feature_group_start_timestamp, hudi_fg_alias.left_feature_group_end_timestamp, read_options, )
def commit_details(self, feature_group, limit): hudi_engine_instance = hudi_engine.HudiEngine( feature_group.feature_store_id, feature_group.feature_store_name, feature_group, engine.get_instance()._spark_context, engine.get_instance()._spark_session, ) feature_group_commits = self._feature_group_api.commit_details( feature_group, limit ) commit_details = {} for feature_group_commit in feature_group_commits: commit_details[feature_group_commit.commitid] = { "committedOn": hudi_engine_instance._timestamp_to_hudiformat( feature_group_commit.commitid ), "rowsUpdated": feature_group_commit.rows_updated, "rowsInserted": feature_group_commit.rows_inserted, "rowsDeleted": feature_group_commit.rows_deleted, } return commit_details