def statistics_config(self, statistics_config): if isinstance(statistics_config, StatisticsConfig): self._statistics_config = statistics_config elif isinstance(statistics_config, dict): self._statistics_config = StatisticsConfig(**statistics_config) elif isinstance(statistics_config, bool): self._statistics_config = StatisticsConfig(statistics_config) elif statistics_config is None: self._statistics_config = StatisticsConfig() else: raise TypeError( "The argument `statistics_config` has to be `None` of type `StatisticsConfig, `bool` or `dict`, but is of type: `{}`" .format(type(statistics_config)))
def __init__( self, storage_connector, query=None, data_format=None, path=None, options={}, name=None, version=None, description=None, featurestore_id=None, featurestore_name=None, created=None, creator=None, id=None, features=None, jobs=None, statistics_config=None, ): super().__init__(featurestore_id) self._feature_store_id = featurestore_id self._feature_store_name = featurestore_name self._description = description self._created = created self._creator = creator self._version = version self._name = name self._query = query self._data_format = data_format self._path = path self._id = id self._jobs = jobs self._feature_group_engine = ( on_demand_feature_group_engine.OnDemandFeatureGroupEngine( featurestore_id)) if self._id: # Got from Hopsworks, deserialize features and storage connector self._features = ([ feature.Feature.from_response_json(feat) for feat in features ] if features else None) self._statistics_config = StatisticsConfig.from_response_json( statistics_config) self._options = ( {option["name"]: option["value"] for option in options} if options else None) else: self.statistics_config = statistics_config self._features = features self._options = options if storage_connector is not None and isinstance( storage_connector, dict): self._storage_connector = sc.StorageConnector.from_response_json( storage_connector) else: self._storage_connector = storage_connector
def __init__( self, name, version, data_format, featurestore_id, location="", event_start_time=None, event_end_time=None, coalesce=False, description=None, storage_connector=None, splits=None, validation_size=None, test_size=None, train_start=None, train_end=None, validation_start=None, validation_end=None, test_start=None, test_end=None, seed=None, created=None, creator=None, features=None, statistics_config=None, featurestore_name=None, id=None, inode_id=None, training_dataset_type=None, from_query=None, querydto=None, label=None, transformation_functions=None, train_split=None, ): self._id = id self._name = name self._version = version self._description = description self._data_format = data_format self._start_time = self._convert_event_time_to_timestamp( event_start_time) self._end_time = self._convert_event_time_to_timestamp(event_end_time) self._validation_size = validation_size self._test_size = test_size self._train_start = train_start self._train_end = train_end self._validation_start = validation_start self._validation_end = validation_end self._test_start = test_start self._test_end = test_end self._coalesce = coalesce self._seed = seed self._location = location self._from_query = from_query self._querydto = querydto self._feature_store_id = featurestore_id self._transformation_functions = transformation_functions self._train_split = train_split self._training_dataset_api = training_dataset_api.TrainingDatasetApi( featurestore_id) self._training_dataset_engine = training_dataset_engine.TrainingDatasetEngine( featurestore_id) self._statistics_engine = statistics_engine.StatisticsEngine( featurestore_id, self.ENTITY_TYPE) self._code_engine = code_engine.CodeEngine(featurestore_id, self.ENTITY_TYPE) self._transformation_function_engine = ( transformation_function_engine.TransformationFunctionEngine( featurestore_id)) if training_dataset_type: self.training_dataset_type = training_dataset_type else: self._training_dataset_type = None # set up depending on user initialized or coming from backend response if created is None: # no type -> user init self._features = features self.storage_connector = storage_connector self.splits = splits self.statistics_config = statistics_config self._label = label if validation_size or test_size: self._train_split = TrainingDatasetSplit.TRAIN self.splits = { TrainingDatasetSplit.TRAIN: 1 - (validation_size or 0) - (test_size or 0), TrainingDatasetSplit.VALIDATION: validation_size, TrainingDatasetSplit.TEST: test_size, } self._set_time_splits( train_start, train_end, validation_start, validation_end, test_start, test_end, ) else: # type available -> init from backend response # make rest call to get all connector information, description etc. self._storage_connector = StorageConnector.from_response_json( storage_connector) if features is None: features = [] self._features = [ training_dataset_feature.TrainingDatasetFeature. from_response_json(feat) for feat in features ] self._splits = [ TrainingDatasetSplit.from_response_json(split) for split in splits ] self._statistics_config = StatisticsConfig.from_response_json( statistics_config) self._label = [ feat.name.lower() for feat in self._features if feat.label ] self._vector_server = vector_server.VectorServer( featurestore_id, features=self._features)
def __init__( self, name, version, data_format, location, featurestore_id, coalesce=False, description=None, storage_connector=None, splits=None, seed=None, created=None, creator=None, features=None, statistics_config=None, featurestore_name=None, id=None, inode_id=None, training_dataset_type=None, from_query=None, querydto=None, label=None, ): self._id = id self._name = name self._version = version self._description = description self._data_format = data_format self._coalesce = coalesce self._seed = seed self._location = location self._from_query = from_query self._querydto = querydto self._feature_store_id = featurestore_id self._prepared_statement_connection = None self._prepared_statements = None self._serving_keys = None self._training_dataset_api = training_dataset_api.TrainingDatasetApi( featurestore_id) self._training_dataset_engine = training_dataset_engine.TrainingDatasetEngine( featurestore_id) self._statistics_engine = statistics_engine.StatisticsEngine( featurestore_id, self.ENTITY_TYPE) # set up depending on user initialized or coming from backend response if training_dataset_type is None: # no type -> user init self._features = features self.storage_connector = storage_connector self.splits = splits self.statistics_config = statistics_config self._label = label else: # type available -> init from backend response # make rest call to get all connector information, description etc. self._storage_connector = StorageConnector.from_response_json( storage_connector) self._features = [ training_dataset_feature.TrainingDatasetFeature. from_response_json(feat) for feat in features ] self._splits = splits self._training_dataset_type = training_dataset_type self._statistics_config = StatisticsConfig.from_response_json( statistics_config) self._label = [ feat.name.lower() for feat in self._features if feat.label ]
def __init__( self, name, version, featurestore_id, description="", partition_key=None, primary_key=None, hudi_precombine_key=None, featurestore_name=None, created=None, creator=None, id=None, features=None, location=None, jobs=None, online_enabled=False, time_travel_format=None, statistics_config=None, ): super().__init__(featurestore_id) self._feature_store_id = featurestore_id self._feature_store_name = featurestore_name self._description = description self._created = created self._creator = creator self._version = version self._name = name self._id = id self._features = [ feature.Feature.from_response_json(feat) if isinstance(feat, dict) else feat for feat in features ] self._location = location self._jobs = jobs self._online_enabled = online_enabled self._time_travel_format = (time_travel_format.upper() if time_travel_format is not None else None) if id is not None: # initialized by backend self._primary_key = [ feat.name for feat in self._features if feat.primary is True ] self._partition_key = [ feat.name for feat in self._features if feat.partition is True ] if time_travel_format is not None and time_travel_format.upper( ) == "HUDI": # hudi precombine key is always a single feature self._hudi_precombine_key = [ feat.name for feat in self._features if feat.hudi_precombine_key is True ][0] else: self._hudi_precombine_key = None self._statistics_config = StatisticsConfig.from_response_json( statistics_config) else: # initialized by user self._primary_key = primary_key self._partition_key = partition_key self._hudi_precombine_key = ( hudi_precombine_key if time_travel_format is not None and time_travel_format.upper() == "HUDI" else None) self.statistics_config = statistics_config self._feature_group_engine = feature_group_engine.FeatureGroupEngine( featurestore_id)
def __init__( self, name, version, featurestore_id, description="", partition_key=None, primary_key=None, featurestore_name=None, created=None, creator=None, id=None, features=None, location=None, jobs=None, desc_stats_enabled=None, feat_corr_enabled=None, feat_hist_enabled=None, statistic_columns=None, online_enabled=False, time_travel_format=None, hudi_enabled=False, statistics_config=None, ): super().__init__(featurestore_id) self._feature_store_id = featurestore_id self._feature_store_name = featurestore_name self._description = description self._created = created self._creator = creator self._version = version self._name = name self._id = id self._features = [ feature.Feature.from_response_json(feat) if isinstance(feat, dict) else feat for feat in features ] self._location = location self._jobs = jobs self._online_enabled = online_enabled self._time_travel_format = ( time_travel_format.upper() if time_travel_format is not None else None ) self._hudi_enabled = hudi_enabled if id is not None: # initialized by backend self.statistics_config = StatisticsConfig( desc_stats_enabled, feat_corr_enabled, feat_hist_enabled, statistic_columns, ) self._primary_key = [ feat.name for feat in self._features if feat.primary is True ] self._partition_key = [ feat.name for feat in self._features if feat.partition is True ] else: # initialized by user self.statistics_config = statistics_config self._primary_key = primary_key self._partition_key = partition_key self._feature_group_engine = feature_group_engine.FeatureGroupEngine( featurestore_id ) self._statistics_engine = statistics_engine.StatisticsEngine( featurestore_id, self.ENTITY_TYPE )
def __init__( self, name, version, description, featurestore_id, partition_key=None, primary_key=None, featurestore_name=None, created=None, creator=None, id=None, features=None, location=None, jobs=None, desc_stats_enabled=None, feat_corr_enabled=None, feat_hist_enabled=None, statistic_columns=None, online_enabled=False, hudi_enabled=False, default_storage="offline", statistics_config=None, ): self._feature_store_id = featurestore_id self._feature_store_name = featurestore_name self._description = description self._created = created self._creator = creator self._version = version self._name = name self._id = id self._features = [ feature.Feature.from_response_json(feat) for feat in features ] self._location = location self._jobs = jobs self._online_enabled = online_enabled self._default_storage = default_storage self._hudi_enabled = hudi_enabled if id is None: # Initialized from the API self._primary_key = primary_key self._partition_key = partition_key else: # Initialized from the backend self._primary_key = [f.name for f in self._features if f.primary] self._partition_key = [ f.name for f in self._features if f.partition ] if id is not None: # initialized by backend self.statistics_config = StatisticsConfig( desc_stats_enabled, feat_corr_enabled, feat_hist_enabled, statistic_columns, ) self._primary_key = [ feat.name for feat in self._features if feat.primary is True ] self._partition_key = [ feat.name for feat in self._features if feat.partition is True ] else: # initialized by user self.statistics_config = statistics_config self._primary_key = primary_key self._partition_key = partition_key self._feature_group_engine = feature_group_engine.FeatureGroupEngine( featurestore_id) self._statistics_engine = statistics_engine.StatisticsEngine( featurestore_id, self.ENTITY_TYPE)