def __init__(
        self,
        feature_store_id,
        feature_store_name,
        feature_group,
        spark_context,
        spark_session,
    ):
        self._feature_group = feature_group
        self._spark_context = spark_context
        self._spark_session = spark_session
        self._feature_store_id = feature_store_id
        self._feature_store_name = feature_store_name
        self._base_path = self._feature_group.location
        self._table_name = util.feature_group_name(feature_group)

        self._primary_key = ",".join(feature_group.primary_key)
        self._partition_key = (",".join(feature_group.partition_key) if
                               len(feature_group.partition_key) >= 1 else "")
        self._partition_path = (":SIMPLE,".join(feature_group.partition_key) +
                                ":SIMPLE" if
                                len(feature_group.partition_key) >= 1 else "")
        self._pre_combine_key = (feature_group.hudi_precombine_key
                                 if feature_group.hudi_precombine_key else
                                 feature_group.primary_key[0])

        self._feature_group_api = feature_group_api.FeatureGroupApi(
            feature_store_id)
        self._storage_connector_api = storage_connector_api.StorageConnectorApi(
            self._feature_store_id)
        self._connstr = self._storage_connector_api.get(
            self._feature_store_name).connection_string
 def __init__(self, feature_store_id):
     self._tags_api = tags_api.TagsApi(feature_store_id, self.ENTITY_TYPE)
     self._feature_group_api = feature_group_api.FeatureGroupApi(
         feature_store_id)
     self._storage_connector_api = storage_connector_api.StorageConnectorApi(
         feature_store_id)
     self._kafka_api = kafka_api.KafkaApi()
Exemple #3
0
    def __init__(
        self,
        name,
        version,
        description,
        data_format,
        location,
        featurestore_id,
        storage_connector=None,
        splits=None,
        seed=None,
        cluster_analysis=None,
        created=None,
        creator=None,
        descriptive_statistics=None,
        feature_correlation_matrix=None,
        features=None,
        features_histogram=None,
        featurestore_name=None,
        id=None,
        jobs=None,
        inode_id=None,
        storage_connector_name=None,
        storage_connector_id=None,
        storage_connector_type=None,
        training_dataset_type=None,
    ):
        self._id = id
        self._name = name
        self._version = version
        self._description = description
        self._data_format = data_format
        self._seed = seed
        self._location = location

        self._training_dataset_api = training_dataset_api.TrainingDatasetApi(
            featurestore_id)

        self._training_dataset_engine = training_dataset_engine.TrainingDatasetEngine(
            featurestore_id)

        self._storage_connector_api = storage_connector_api.StorageConnectorApi(
            featurestore_id)

        # set up depending on user initialized or coming from backend response
        if training_dataset_type is None:
            # no type -> user init
            self._features = features
            self.storage_connector = storage_connector
            self.splits = splits
        else:
            # type available -> init from backend response
            # make rest call to get all connector information, description etc.
            self._storage_connector = self._storage_connector_api.get_by_id(
                storage_connector_id, storage_connector_type)
            self._features = [
                feature.Feature.from_response_json(feat) for feat in features
            ]
            self._splits = splits
            self._training_dataset_type = training_dataset_type
Exemple #4
0
    def __init__(
        self,
        feature_store_id,
        feature_store_name,
        feature_group,
        spark_context,
        spark_session,
    ):
        self._feature_group = feature_group
        self._spark_context = spark_context
        self._spark_session = spark_session
        self._feature_store_id = feature_store_id
        self._feature_store_name = feature_store_name
        self._base_path = self._feature_group.location
        self._table_name = feature_group.name + "_" + str(
            feature_group.version)

        self._primary_key = ",".join(feature_group.primary_key)
        self._partition_key = ",".join(feature_group.partition_key)
        self._partition_path = ":SIMPLE,".join(
            feature_group.partition_key) + ":SIMPLE"
        self._pre_combine_key = feature_group.primary_key[0]

        self._feature_group_api = feature_group_api.FeatureGroupApi(
            feature_store_id)
        self._storage_connector_api = storage_connector_api.StorageConnectorApi(
            self._feature_store_id)
        self._connstr = self._storage_connector_api.get(
            self._feature_store_name,
            storage_connector.StorageConnector.JDBC,
        ).connection_string
Exemple #5
0
 def __init__(self, feature_store_id, features=[], training_dataset_version=None):
     self._training_dataset_version = training_dataset_version
     self._features = features
     self._prepared_statement_engine = None
     self._prepared_statements = None
     self._serving_keys = None
     self._pkname_by_serving_index = None
     self._prefix_by_serving_index = None
     self._external = True
     self._feature_store_id = feature_store_id
     self._training_dataset_api = training_dataset_api.TrainingDatasetApi(
         feature_store_id
     )
     self._feature_view_api = feature_view_api.FeatureViewApi(feature_store_id)
     self._storage_connector_api = storage_connector_api.StorageConnectorApi(
         feature_store_id
     )
     self._transformation_function_engine = (
         transformation_function_engine.TransformationFunctionEngine(
             feature_store_id
         )
     )
     self._feature_view_engine = feature_view_engine.FeatureViewEngine(
         feature_store_id
     )
Exemple #6
0
    def __init__(self, id, name, description, featurestore_id):
        self._id = id
        self._name = name
        self._description = description
        self._featurestore_id = featurestore_id

        self._storage_connector_api = storage_connector_api.StorageConnectorApi(
            self._featurestore_id)
 def __init__(self, feature_store_id):
     self._training_dataset_api = training_dataset_api.TrainingDatasetApi(
         feature_store_id
     )
     self._tags_api = tags_api.TagsApi(feature_store_id, self.ENTITY_TYPE)
     self._storage_connector_api = storage_connector_api.StorageConnectorApi(
         feature_store_id
     )
Exemple #8
0
    def __init__(self, feature_store_id):
        self._feature_store_id = feature_store_id

        self._training_dataset_api = training_dataset_api.TrainingDatasetApi(
            feature_store_id)
        self._tags_api = tags_api.TagsApi(feature_store_id, self.ENTITY_TYPE)
        self._storage_connector_api = storage_connector_api.StorageConnectorApi(
            feature_store_id)
        self._transformation_function_engine = (
            transformation_function_engine.TransformationFunctionEngine(
                feature_store_id))
 def __init__(
     self, feature_store_name, feature_store_id, left_feature_group, left_features
 ):
     self._feature_store_name = feature_store_name
     self._feature_store_id = feature_store_id
     self._left_feature_group = left_feature_group
     self._left_features = util.parse_features(left_features)
     self._joins = []
     self._query_constructor_api = query_constructor_api.QueryConstructorApi()
     self._storage_connector_api = storage_connector_api.StorageConnectorApi(
         feature_store_id
     )
Exemple #10
0
    def __init__(
        self,
        featurestore_id,
        featurestore_name,
        created,
        hdfs_store_path,
        project_name,
        project_id,
        featurestore_description,
        inode_id,
        offline_featurestore_name,
        hive_endpoint,
        online_enabled,
        num_feature_groups=None,
        num_training_datasets=None,
        num_storage_connectors=None,
        online_featurestore_name=None,
        mysql_server_endpoint=None,
        online_featurestore_size=None,
    ):
        self._id = featurestore_id
        self._name = featurestore_name
        self._created = created
        self._hdfs_store_path = hdfs_store_path
        self._project_name = project_name
        self._project_id = project_id
        self._description = featurestore_description
        self._inode_id = inode_id
        self._online_feature_store_name = online_featurestore_name
        self._online_feature_store_size = online_featurestore_size
        self._offline_feature_store_name = offline_featurestore_name
        self._hive_endpoint = hive_endpoint
        self._mysql_server_endpoint = mysql_server_endpoint
        self._online_enabled = online_enabled
        self._num_feature_groups = num_feature_groups
        self._num_training_datasets = num_training_datasets
        self._num_storage_connectors = num_storage_connectors

        self._feature_group_api = feature_group_api.FeatureGroupApi(self._id)
        self._storage_connector_api = storage_connector_api.StorageConnectorApi(
            self._id)
        self._training_dataset_api = training_dataset_api.TrainingDatasetApi(
            self._id)
        self._expectations_api = expectations_api.ExpectationsApi(self._id)

        self._feature_group_engine = feature_group_engine.FeatureGroupEngine(
            self._id)

        self._transformation_function_engine = (
            transformation_function_engine.TransformationFunctionEngine(
                self._id))
        self._feature_view_engine = feature_view_engine.FeatureViewEngine(
            self._id)
Exemple #11
0
    def __init__(self, feature_store_id):
        self._feature_store_id = feature_store_id

        self._feature_view_api = feature_view_api.FeatureViewApi(
            feature_store_id)
        self._tags_api = tags_api.TagsApi(feature_store_id, self.ENTITY_TYPE)
        self._storage_connector_api = storage_connector_api.StorageConnectorApi(
            feature_store_id)
        self._transformation_function_engine = (
            transformation_function_engine.TransformationFunctionEngine(
                feature_store_id))
        self._td_code_engine = code_engine.CodeEngine(
            feature_store_id, self._TRAINING_DATA_API_PATH)
        self._statistics_engine = statistics_engine.StatisticsEngine(
            feature_store_id, self._TRAINING_DATA_API_PATH)
        self._training_dataset_engine = training_dataset_engine.TrainingDatasetEngine(
            feature_store_id)
        self._query_constructor_api = query_constructor_api.QueryConstructorApi(
        )
    def __init__(
        self,
        feature_store_id,
        feature_store_name,
        feature_group,
        spark_context,
        spark_session,
    ):
        self._feature_group = feature_group
        self._spark_context = spark_context
        self._spark_session = spark_session
        self._feature_store_id = feature_store_id
        self._feature_store_name = feature_store_name
        self._base_path = self._feature_group.location
        self._table_name = feature_group._get_online_table_name()

        self._primary_key = ",".join(feature_group.primary_key)

        # add event time to primary key for upserts
        if feature_group.event_time is not None:
            self._primary_key = self._primary_key + "," + feature_group.event_time

        self._partition_key = (",".join(feature_group.partition_key) if
                               len(feature_group.partition_key) >= 1 else "")
        self._partition_path = (":SIMPLE,".join(feature_group.partition_key) +
                                ":SIMPLE" if
                                len(feature_group.partition_key) >= 1 else "")
        self._pre_combine_key = (feature_group.hudi_precombine_key
                                 if feature_group.hudi_precombine_key else
                                 feature_group.primary_key[0])

        self._feature_group_api = feature_group_api.FeatureGroupApi(
            feature_store_id)
        self._storage_connector_api = storage_connector_api.StorageConnectorApi(
            self._feature_store_id)

        if self._feature_store_name:
            # For read operations we don't actually need the connector
            # Only to sync write operations
            self._connstr = self._storage_connector_api.get(
                self._feature_store_name).connection_string
        else:
            self._connstr = None
Exemple #13
0
    def __init__(
        self,
        featurestore_id,
        featurestore_name,
        created,
        hdfs_store_path,
        project_name,
        project_id,
        featurestore_description,
        inode_id,
        offline_featurestore_name,
        hive_endpoint,
        online_enabled,
        online_featurestore_name=None,
        mysql_server_endpoint=None,
        online_featurestore_size=None,
    ):
        self._id = featurestore_id
        self._name = featurestore_name
        self._created = created
        self._hdfs_store_path = hdfs_store_path
        self._project_name = project_name
        self._project_id = project_id
        self._description = featurestore_description
        self._inode_id = inode_id
        self._online_feature_store_name = online_featurestore_name
        self._online_feature_store_size = online_featurestore_size
        self._offline_feature_store_name = offline_featurestore_name
        self._hive_endpoint = hive_endpoint
        self._mysql_server_endpoint = mysql_server_endpoint
        self._online_enabled = online_enabled

        self._feature_group_api = feature_group_api.FeatureGroupApi(self._id)
        self._storage_connector_api = storage_connector_api.StorageConnectorApi(
            self._id)
        self._training_dataset_api = training_dataset_api.TrainingDatasetApi(
            self._id)

        self._feature_group_engine = feature_group_engine.FeatureGroupEngine(
            self._id)
Exemple #14
0
 def __init__(
     self,
     left_feature_group,
     left_features,
     feature_store_name=None,
     feature_store_id=None,
     left_feature_group_start_time=None,
     left_feature_group_end_time=None,
     joins=None,
     filter=None,
 ):
     self._feature_store_name = feature_store_name
     self._feature_store_id = feature_store_id
     self._left_feature_group = left_feature_group
     self._left_features = util.parse_features(left_features)
     self._left_feature_group_start_time = left_feature_group_start_time
     self._left_feature_group_end_time = left_feature_group_end_time
     self._joins = joins or []
     self._filter = filter
     self._query_constructor_api = query_constructor_api.QueryConstructorApi(
     )
     self._storage_connector_api = storage_connector_api.StorageConnectorApi(
         feature_store_id)
 def __init__(
     self,
     left_feature_group,
     left_features,
     feature_store_name=None,
     feature_store_id=None,
     left_feature_group_start_time=None,
     left_feature_group_end_time=None,
     joins=None,
     filter=None,
 ):
     self._feature_store_name = feature_store_name
     self._feature_store_id = feature_store_id
     self._left_feature_group = left_feature_group
     self._left_features = util.parse_features(left_features)
     self._left_feature_group_start_time = left_feature_group_start_time
     self._left_feature_group_end_time = left_feature_group_end_time
     self._joins = joins or []
     self._filter = Logic.from_response_json(filter)
     self._python_engine = True if engine.get_type() == "python" else False
     self._query_constructor_api = query_constructor_api.QueryConstructorApi()
     self._storage_connector_api = storage_connector_api.StorageConnectorApi(
         feature_store_id
     )
Exemple #16
0
 def __init__(self, feature_store_id):
     self._feature_group_api = feature_group_api.FeatureGroupApi(
         feature_store_id)
     self._storage_connector_api = storage_connector_api.StorageConnectorApi(
         feature_store_id)
     self._tags_api = tags_api.TagsApi(feature_store_id, "featuregroups")
    def __init__(
        self,
        name,
        version,
        data_format,
        location,
        featurestore_id,
        description=None,
        storage_connector=None,
        splits=None,
        seed=None,
        created=None,
        creator=None,
        features=None,
        statistics_config=None,
        featurestore_name=None,
        id=None,
        jobs=None,
        inode_id=None,
        storage_connector_name=None,
        storage_connector_id=None,
        storage_connector_type=None,
        training_dataset_type=None,
        from_query=None,
        querydto=None,
    ):
        self._id = id
        self._name = name
        self._version = version
        self._description = description
        self._data_format = data_format
        self._seed = seed
        self._location = location
        self._from_query = from_query
        self._querydto = querydto

        self._training_dataset_api = training_dataset_api.TrainingDatasetApi(
            featurestore_id)

        self._training_dataset_engine = training_dataset_engine.TrainingDatasetEngine(
            featurestore_id)

        self._storage_connector_api = storage_connector_api.StorageConnectorApi(
            featurestore_id)

        self._statistics_engine = statistics_engine.StatisticsEngine(
            featurestore_id, self.ENTITY_TYPE)

        # set up depending on user initialized or coming from backend response
        if training_dataset_type is None:
            # no type -> user init
            self._features = features
            self.storage_connector = storage_connector
            self.splits = splits
            self.statistics_config = statistics_config
        else:
            # type available -> init from backend response
            # make rest call to get all connector information, description etc.
            self._storage_connector = self._storage_connector_api.get_by_id(
                storage_connector_id, storage_connector_type)
            self._features = [
                training_dataset_feature.TrainingDatasetFeature.
                from_response_json(feat) for feat in features
            ]
            self._splits = splits
            self._training_dataset_type = training_dataset_type
            self.statistics_config = None