def statistics_config(self, statistics_config):
     if isinstance(statistics_config, StatisticsConfig):
         self._statistics_config = statistics_config
     elif isinstance(statistics_config, dict):
         self._statistics_config = StatisticsConfig(**statistics_config)
     elif isinstance(statistics_config, bool):
         self._statistics_config = StatisticsConfig(statistics_config)
     elif statistics_config is None:
         self._statistics_config = StatisticsConfig()
     else:
         raise TypeError(
             "The argument `statistics_config` has to be `None` of type `StatisticsConfig, `bool` or `dict`, but is of type: `{}`"
             .format(type(statistics_config)))
Exemple #2
0
    def __init__(
        self,
        storage_connector,
        query=None,
        data_format=None,
        path=None,
        options={},
        name=None,
        version=None,
        description=None,
        featurestore_id=None,
        featurestore_name=None,
        created=None,
        creator=None,
        id=None,
        features=None,
        jobs=None,
        statistics_config=None,
    ):
        super().__init__(featurestore_id)

        self._feature_store_id = featurestore_id
        self._feature_store_name = featurestore_name
        self._description = description
        self._created = created
        self._creator = creator
        self._version = version
        self._name = name
        self._query = query
        self._data_format = data_format
        self._path = path
        self._id = id
        self._jobs = jobs

        self._feature_group_engine = (
            on_demand_feature_group_engine.OnDemandFeatureGroupEngine(
                featurestore_id))

        if self._id:
            # Got from Hopsworks, deserialize features and storage connector
            self._features = ([
                feature.Feature.from_response_json(feat) for feat in features
            ] if features else None)
            self._statistics_config = StatisticsConfig.from_response_json(
                statistics_config)

            self._options = (
                {option["name"]: option["value"]
                 for option in options} if options else None)
        else:
            self.statistics_config = statistics_config
            self._features = features
            self._options = options

        if storage_connector is not None and isinstance(
                storage_connector, dict):
            self._storage_connector = sc.StorageConnector.from_response_json(
                storage_connector)
        else:
            self._storage_connector = storage_connector
    def __init__(
        self,
        name,
        version,
        data_format,
        featurestore_id,
        location="",
        event_start_time=None,
        event_end_time=None,
        coalesce=False,
        description=None,
        storage_connector=None,
        splits=None,
        validation_size=None,
        test_size=None,
        train_start=None,
        train_end=None,
        validation_start=None,
        validation_end=None,
        test_start=None,
        test_end=None,
        seed=None,
        created=None,
        creator=None,
        features=None,
        statistics_config=None,
        featurestore_name=None,
        id=None,
        inode_id=None,
        training_dataset_type=None,
        from_query=None,
        querydto=None,
        label=None,
        transformation_functions=None,
        train_split=None,
    ):
        self._id = id
        self._name = name
        self._version = version
        self._description = description
        self._data_format = data_format
        self._start_time = self._convert_event_time_to_timestamp(
            event_start_time)
        self._end_time = self._convert_event_time_to_timestamp(event_end_time)
        self._validation_size = validation_size
        self._test_size = test_size
        self._train_start = train_start
        self._train_end = train_end
        self._validation_start = validation_start
        self._validation_end = validation_end
        self._test_start = test_start
        self._test_end = test_end
        self._coalesce = coalesce
        self._seed = seed
        self._location = location
        self._from_query = from_query
        self._querydto = querydto
        self._feature_store_id = featurestore_id
        self._transformation_functions = transformation_functions
        self._train_split = train_split

        self._training_dataset_api = training_dataset_api.TrainingDatasetApi(
            featurestore_id)

        self._training_dataset_engine = training_dataset_engine.TrainingDatasetEngine(
            featurestore_id)

        self._statistics_engine = statistics_engine.StatisticsEngine(
            featurestore_id, self.ENTITY_TYPE)

        self._code_engine = code_engine.CodeEngine(featurestore_id,
                                                   self.ENTITY_TYPE)

        self._transformation_function_engine = (
            transformation_function_engine.TransformationFunctionEngine(
                featurestore_id))
        if training_dataset_type:
            self.training_dataset_type = training_dataset_type
        else:
            self._training_dataset_type = None
        # set up depending on user initialized or coming from backend response
        if created is None:
            # no type -> user init
            self._features = features
            self.storage_connector = storage_connector
            self.splits = splits
            self.statistics_config = statistics_config
            self._label = label
            if validation_size or test_size:
                self._train_split = TrainingDatasetSplit.TRAIN
                self.splits = {
                    TrainingDatasetSplit.TRAIN:
                    1 - (validation_size or 0) - (test_size or 0),
                    TrainingDatasetSplit.VALIDATION:
                    validation_size,
                    TrainingDatasetSplit.TEST:
                    test_size,
                }
            self._set_time_splits(
                train_start,
                train_end,
                validation_start,
                validation_end,
                test_start,
                test_end,
            )
        else:
            # type available -> init from backend response
            # make rest call to get all connector information, description etc.
            self._storage_connector = StorageConnector.from_response_json(
                storage_connector)

            if features is None:
                features = []
            self._features = [
                training_dataset_feature.TrainingDatasetFeature.
                from_response_json(feat) for feat in features
            ]
            self._splits = [
                TrainingDatasetSplit.from_response_json(split)
                for split in splits
            ]
            self._statistics_config = StatisticsConfig.from_response_json(
                statistics_config)
            self._label = [
                feat.name.lower() for feat in self._features if feat.label
            ]

        self._vector_server = vector_server.VectorServer(
            featurestore_id, features=self._features)
Exemple #4
0
    def __init__(
        self,
        name,
        version,
        data_format,
        location,
        featurestore_id,
        coalesce=False,
        description=None,
        storage_connector=None,
        splits=None,
        seed=None,
        created=None,
        creator=None,
        features=None,
        statistics_config=None,
        featurestore_name=None,
        id=None,
        inode_id=None,
        training_dataset_type=None,
        from_query=None,
        querydto=None,
        label=None,
    ):
        self._id = id
        self._name = name
        self._version = version
        self._description = description
        self._data_format = data_format
        self._coalesce = coalesce
        self._seed = seed
        self._location = location
        self._from_query = from_query
        self._querydto = querydto
        self._feature_store_id = featurestore_id
        self._prepared_statement_connection = None
        self._prepared_statements = None
        self._serving_keys = None

        self._training_dataset_api = training_dataset_api.TrainingDatasetApi(
            featurestore_id)

        self._training_dataset_engine = training_dataset_engine.TrainingDatasetEngine(
            featurestore_id)

        self._statistics_engine = statistics_engine.StatisticsEngine(
            featurestore_id, self.ENTITY_TYPE)

        # set up depending on user initialized or coming from backend response
        if training_dataset_type is None:
            # no type -> user init
            self._features = features
            self.storage_connector = storage_connector
            self.splits = splits
            self.statistics_config = statistics_config
            self._label = label
        else:
            # type available -> init from backend response
            # make rest call to get all connector information, description etc.
            self._storage_connector = StorageConnector.from_response_json(
                storage_connector)

            self._features = [
                training_dataset_feature.TrainingDatasetFeature.
                from_response_json(feat) for feat in features
            ]
            self._splits = splits
            self._training_dataset_type = training_dataset_type
            self._statistics_config = StatisticsConfig.from_response_json(
                statistics_config)
            self._label = [
                feat.name.lower() for feat in self._features if feat.label
            ]
Exemple #5
0
    def __init__(
        self,
        name,
        version,
        featurestore_id,
        description="",
        partition_key=None,
        primary_key=None,
        hudi_precombine_key=None,
        featurestore_name=None,
        created=None,
        creator=None,
        id=None,
        features=None,
        location=None,
        jobs=None,
        online_enabled=False,
        time_travel_format=None,
        statistics_config=None,
    ):
        super().__init__(featurestore_id)

        self._feature_store_id = featurestore_id
        self._feature_store_name = featurestore_name
        self._description = description
        self._created = created
        self._creator = creator
        self._version = version
        self._name = name
        self._id = id
        self._features = [
            feature.Feature.from_response_json(feat)
            if isinstance(feat, dict) else feat for feat in features
        ]

        self._location = location
        self._jobs = jobs
        self._online_enabled = online_enabled
        self._time_travel_format = (time_travel_format.upper() if
                                    time_travel_format is not None else None)

        if id is not None:
            # initialized by backend
            self._primary_key = [
                feat.name for feat in self._features if feat.primary is True
            ]
            self._partition_key = [
                feat.name for feat in self._features if feat.partition is True
            ]
            if time_travel_format is not None and time_travel_format.upper(
            ) == "HUDI":
                # hudi precombine key is always a single feature
                self._hudi_precombine_key = [
                    feat.name for feat in self._features
                    if feat.hudi_precombine_key is True
                ][0]
            else:
                self._hudi_precombine_key = None
            self._statistics_config = StatisticsConfig.from_response_json(
                statistics_config)

        else:
            # initialized by user
            self._primary_key = primary_key
            self._partition_key = partition_key
            self._hudi_precombine_key = (
                hudi_precombine_key if time_travel_format is not None
                and time_travel_format.upper() == "HUDI" else None)
            self.statistics_config = statistics_config

        self._feature_group_engine = feature_group_engine.FeatureGroupEngine(
            featurestore_id)
Exemple #6
0
    def __init__(
        self,
        name,
        version,
        featurestore_id,
        description="",
        partition_key=None,
        primary_key=None,
        featurestore_name=None,
        created=None,
        creator=None,
        id=None,
        features=None,
        location=None,
        jobs=None,
        desc_stats_enabled=None,
        feat_corr_enabled=None,
        feat_hist_enabled=None,
        statistic_columns=None,
        online_enabled=False,
        time_travel_format=None,
        hudi_enabled=False,
        statistics_config=None,
    ):
        super().__init__(featurestore_id)

        self._feature_store_id = featurestore_id
        self._feature_store_name = featurestore_name
        self._description = description
        self._created = created
        self._creator = creator
        self._version = version
        self._name = name
        self._id = id
        self._features = [
            feature.Feature.from_response_json(feat) if isinstance(feat, dict) else feat
            for feat in features
        ]
        self._location = location
        self._jobs = jobs
        self._online_enabled = online_enabled
        self._time_travel_format = (
            time_travel_format.upper() if time_travel_format is not None else None
        )
        self._hudi_enabled = hudi_enabled

        if id is not None:
            # initialized by backend
            self.statistics_config = StatisticsConfig(
                desc_stats_enabled,
                feat_corr_enabled,
                feat_hist_enabled,
                statistic_columns,
            )
            self._primary_key = [
                feat.name for feat in self._features if feat.primary is True
            ]
            self._partition_key = [
                feat.name for feat in self._features if feat.partition is True
            ]
        else:
            # initialized by user
            self.statistics_config = statistics_config
            self._primary_key = primary_key
            self._partition_key = partition_key

        self._feature_group_engine = feature_group_engine.FeatureGroupEngine(
            featurestore_id
        )

        self._statistics_engine = statistics_engine.StatisticsEngine(
            featurestore_id, self.ENTITY_TYPE
        )
Exemple #7
0
    def __init__(
        self,
        name,
        version,
        description,
        featurestore_id,
        partition_key=None,
        primary_key=None,
        featurestore_name=None,
        created=None,
        creator=None,
        id=None,
        features=None,
        location=None,
        jobs=None,
        desc_stats_enabled=None,
        feat_corr_enabled=None,
        feat_hist_enabled=None,
        statistic_columns=None,
        online_enabled=False,
        hudi_enabled=False,
        default_storage="offline",
        statistics_config=None,
    ):
        self._feature_store_id = featurestore_id
        self._feature_store_name = featurestore_name
        self._description = description
        self._created = created
        self._creator = creator
        self._version = version
        self._name = name
        self._id = id
        self._features = [
            feature.Feature.from_response_json(feat) for feat in features
        ]
        self._location = location
        self._jobs = jobs
        self._online_enabled = online_enabled
        self._default_storage = default_storage
        self._hudi_enabled = hudi_enabled

        if id is None:
            # Initialized from the API
            self._primary_key = primary_key
            self._partition_key = partition_key
        else:
            # Initialized from the backend
            self._primary_key = [f.name for f in self._features if f.primary]
            self._partition_key = [
                f.name for f in self._features if f.partition
            ]

        if id is not None:
            # initialized by backend
            self.statistics_config = StatisticsConfig(
                desc_stats_enabled,
                feat_corr_enabled,
                feat_hist_enabled,
                statistic_columns,
            )
            self._primary_key = [
                feat.name for feat in self._features if feat.primary is True
            ]
            self._partition_key = [
                feat.name for feat in self._features if feat.partition is True
            ]
        else:
            # initialized by user
            self.statistics_config = statistics_config
            self._primary_key = primary_key
            self._partition_key = partition_key

        self._feature_group_engine = feature_group_engine.FeatureGroupEngine(
            featurestore_id)

        self._statistics_engine = statistics_engine.StatisticsEngine(
            featurestore_id, self.ENTITY_TYPE)