def __init__( self, feature_store_id, feature_store_name, feature_group, spark_context, spark_session, ): self._feature_group = feature_group self._spark_context = spark_context self._spark_session = spark_session self._feature_store_id = feature_store_id self._feature_store_name = feature_store_name self._base_path = self._feature_group.location self._table_name = util.feature_group_name(feature_group) self._primary_key = ",".join(feature_group.primary_key) self._partition_key = (",".join(feature_group.partition_key) if len(feature_group.partition_key) >= 1 else "") self._partition_path = (":SIMPLE,".join(feature_group.partition_key) + ":SIMPLE" if len(feature_group.partition_key) >= 1 else "") self._pre_combine_key = (feature_group.hudi_precombine_key if feature_group.hudi_precombine_key else feature_group.primary_key[0]) self._feature_group_api = feature_group_api.FeatureGroupApi( feature_store_id) self._storage_connector_api = storage_connector_api.StorageConnectorApi( self._feature_store_id) self._connstr = self._storage_connector_api.get( self._feature_store_name).connection_string
def __init__(self, feature_store_id): self._tags_api = tags_api.TagsApi(feature_store_id, self.ENTITY_TYPE) self._feature_group_api = feature_group_api.FeatureGroupApi( feature_store_id) self._storage_connector_api = storage_connector_api.StorageConnectorApi( feature_store_id) self._kafka_api = kafka_api.KafkaApi()
def __init__( self, name, version, description, data_format, location, featurestore_id, storage_connector=None, splits=None, seed=None, cluster_analysis=None, created=None, creator=None, descriptive_statistics=None, feature_correlation_matrix=None, features=None, features_histogram=None, featurestore_name=None, id=None, jobs=None, inode_id=None, storage_connector_name=None, storage_connector_id=None, storage_connector_type=None, training_dataset_type=None, ): self._id = id self._name = name self._version = version self._description = description self._data_format = data_format self._seed = seed self._location = location self._training_dataset_api = training_dataset_api.TrainingDatasetApi( featurestore_id) self._training_dataset_engine = training_dataset_engine.TrainingDatasetEngine( featurestore_id) self._storage_connector_api = storage_connector_api.StorageConnectorApi( featurestore_id) # set up depending on user initialized or coming from backend response if training_dataset_type is None: # no type -> user init self._features = features self.storage_connector = storage_connector self.splits = splits else: # type available -> init from backend response # make rest call to get all connector information, description etc. self._storage_connector = self._storage_connector_api.get_by_id( storage_connector_id, storage_connector_type) self._features = [ feature.Feature.from_response_json(feat) for feat in features ] self._splits = splits self._training_dataset_type = training_dataset_type
def __init__( self, feature_store_id, feature_store_name, feature_group, spark_context, spark_session, ): self._feature_group = feature_group self._spark_context = spark_context self._spark_session = spark_session self._feature_store_id = feature_store_id self._feature_store_name = feature_store_name self._base_path = self._feature_group.location self._table_name = feature_group.name + "_" + str( feature_group.version) self._primary_key = ",".join(feature_group.primary_key) self._partition_key = ",".join(feature_group.partition_key) self._partition_path = ":SIMPLE,".join( feature_group.partition_key) + ":SIMPLE" self._pre_combine_key = feature_group.primary_key[0] self._feature_group_api = feature_group_api.FeatureGroupApi( feature_store_id) self._storage_connector_api = storage_connector_api.StorageConnectorApi( self._feature_store_id) self._connstr = self._storage_connector_api.get( self._feature_store_name, storage_connector.StorageConnector.JDBC, ).connection_string
def __init__(self, feature_store_id, features=[], training_dataset_version=None): self._training_dataset_version = training_dataset_version self._features = features self._prepared_statement_engine = None self._prepared_statements = None self._serving_keys = None self._pkname_by_serving_index = None self._prefix_by_serving_index = None self._external = True self._feature_store_id = feature_store_id self._training_dataset_api = training_dataset_api.TrainingDatasetApi( feature_store_id ) self._feature_view_api = feature_view_api.FeatureViewApi(feature_store_id) self._storage_connector_api = storage_connector_api.StorageConnectorApi( feature_store_id ) self._transformation_function_engine = ( transformation_function_engine.TransformationFunctionEngine( feature_store_id ) ) self._feature_view_engine = feature_view_engine.FeatureViewEngine( feature_store_id )
def __init__(self, id, name, description, featurestore_id): self._id = id self._name = name self._description = description self._featurestore_id = featurestore_id self._storage_connector_api = storage_connector_api.StorageConnectorApi( self._featurestore_id)
def __init__(self, feature_store_id): self._training_dataset_api = training_dataset_api.TrainingDatasetApi( feature_store_id ) self._tags_api = tags_api.TagsApi(feature_store_id, self.ENTITY_TYPE) self._storage_connector_api = storage_connector_api.StorageConnectorApi( feature_store_id )
def __init__(self, feature_store_id): self._feature_store_id = feature_store_id self._training_dataset_api = training_dataset_api.TrainingDatasetApi( feature_store_id) self._tags_api = tags_api.TagsApi(feature_store_id, self.ENTITY_TYPE) self._storage_connector_api = storage_connector_api.StorageConnectorApi( feature_store_id) self._transformation_function_engine = ( transformation_function_engine.TransformationFunctionEngine( feature_store_id))
def __init__( self, feature_store_name, feature_store_id, left_feature_group, left_features ): self._feature_store_name = feature_store_name self._feature_store_id = feature_store_id self._left_feature_group = left_feature_group self._left_features = util.parse_features(left_features) self._joins = [] self._query_constructor_api = query_constructor_api.QueryConstructorApi() self._storage_connector_api = storage_connector_api.StorageConnectorApi( feature_store_id )
def __init__( self, featurestore_id, featurestore_name, created, hdfs_store_path, project_name, project_id, featurestore_description, inode_id, offline_featurestore_name, hive_endpoint, online_enabled, num_feature_groups=None, num_training_datasets=None, num_storage_connectors=None, online_featurestore_name=None, mysql_server_endpoint=None, online_featurestore_size=None, ): self._id = featurestore_id self._name = featurestore_name self._created = created self._hdfs_store_path = hdfs_store_path self._project_name = project_name self._project_id = project_id self._description = featurestore_description self._inode_id = inode_id self._online_feature_store_name = online_featurestore_name self._online_feature_store_size = online_featurestore_size self._offline_feature_store_name = offline_featurestore_name self._hive_endpoint = hive_endpoint self._mysql_server_endpoint = mysql_server_endpoint self._online_enabled = online_enabled self._num_feature_groups = num_feature_groups self._num_training_datasets = num_training_datasets self._num_storage_connectors = num_storage_connectors self._feature_group_api = feature_group_api.FeatureGroupApi(self._id) self._storage_connector_api = storage_connector_api.StorageConnectorApi( self._id) self._training_dataset_api = training_dataset_api.TrainingDatasetApi( self._id) self._expectations_api = expectations_api.ExpectationsApi(self._id) self._feature_group_engine = feature_group_engine.FeatureGroupEngine( self._id) self._transformation_function_engine = ( transformation_function_engine.TransformationFunctionEngine( self._id)) self._feature_view_engine = feature_view_engine.FeatureViewEngine( self._id)
def __init__(self, feature_store_id): self._feature_store_id = feature_store_id self._feature_view_api = feature_view_api.FeatureViewApi( feature_store_id) self._tags_api = tags_api.TagsApi(feature_store_id, self.ENTITY_TYPE) self._storage_connector_api = storage_connector_api.StorageConnectorApi( feature_store_id) self._transformation_function_engine = ( transformation_function_engine.TransformationFunctionEngine( feature_store_id)) self._td_code_engine = code_engine.CodeEngine( feature_store_id, self._TRAINING_DATA_API_PATH) self._statistics_engine = statistics_engine.StatisticsEngine( feature_store_id, self._TRAINING_DATA_API_PATH) self._training_dataset_engine = training_dataset_engine.TrainingDatasetEngine( feature_store_id) self._query_constructor_api = query_constructor_api.QueryConstructorApi( )
def __init__( self, feature_store_id, feature_store_name, feature_group, spark_context, spark_session, ): self._feature_group = feature_group self._spark_context = spark_context self._spark_session = spark_session self._feature_store_id = feature_store_id self._feature_store_name = feature_store_name self._base_path = self._feature_group.location self._table_name = feature_group._get_online_table_name() self._primary_key = ",".join(feature_group.primary_key) # add event time to primary key for upserts if feature_group.event_time is not None: self._primary_key = self._primary_key + "," + feature_group.event_time self._partition_key = (",".join(feature_group.partition_key) if len(feature_group.partition_key) >= 1 else "") self._partition_path = (":SIMPLE,".join(feature_group.partition_key) + ":SIMPLE" if len(feature_group.partition_key) >= 1 else "") self._pre_combine_key = (feature_group.hudi_precombine_key if feature_group.hudi_precombine_key else feature_group.primary_key[0]) self._feature_group_api = feature_group_api.FeatureGroupApi( feature_store_id) self._storage_connector_api = storage_connector_api.StorageConnectorApi( self._feature_store_id) if self._feature_store_name: # For read operations we don't actually need the connector # Only to sync write operations self._connstr = self._storage_connector_api.get( self._feature_store_name).connection_string else: self._connstr = None
def __init__( self, featurestore_id, featurestore_name, created, hdfs_store_path, project_name, project_id, featurestore_description, inode_id, offline_featurestore_name, hive_endpoint, online_enabled, online_featurestore_name=None, mysql_server_endpoint=None, online_featurestore_size=None, ): self._id = featurestore_id self._name = featurestore_name self._created = created self._hdfs_store_path = hdfs_store_path self._project_name = project_name self._project_id = project_id self._description = featurestore_description self._inode_id = inode_id self._online_feature_store_name = online_featurestore_name self._online_feature_store_size = online_featurestore_size self._offline_feature_store_name = offline_featurestore_name self._hive_endpoint = hive_endpoint self._mysql_server_endpoint = mysql_server_endpoint self._online_enabled = online_enabled self._feature_group_api = feature_group_api.FeatureGroupApi(self._id) self._storage_connector_api = storage_connector_api.StorageConnectorApi( self._id) self._training_dataset_api = training_dataset_api.TrainingDatasetApi( self._id) self._feature_group_engine = feature_group_engine.FeatureGroupEngine( self._id)
def __init__( self, left_feature_group, left_features, feature_store_name=None, feature_store_id=None, left_feature_group_start_time=None, left_feature_group_end_time=None, joins=None, filter=None, ): self._feature_store_name = feature_store_name self._feature_store_id = feature_store_id self._left_feature_group = left_feature_group self._left_features = util.parse_features(left_features) self._left_feature_group_start_time = left_feature_group_start_time self._left_feature_group_end_time = left_feature_group_end_time self._joins = joins or [] self._filter = filter self._query_constructor_api = query_constructor_api.QueryConstructorApi( ) self._storage_connector_api = storage_connector_api.StorageConnectorApi( feature_store_id)
def __init__( self, left_feature_group, left_features, feature_store_name=None, feature_store_id=None, left_feature_group_start_time=None, left_feature_group_end_time=None, joins=None, filter=None, ): self._feature_store_name = feature_store_name self._feature_store_id = feature_store_id self._left_feature_group = left_feature_group self._left_features = util.parse_features(left_features) self._left_feature_group_start_time = left_feature_group_start_time self._left_feature_group_end_time = left_feature_group_end_time self._joins = joins or [] self._filter = Logic.from_response_json(filter) self._python_engine = True if engine.get_type() == "python" else False self._query_constructor_api = query_constructor_api.QueryConstructorApi() self._storage_connector_api = storage_connector_api.StorageConnectorApi( feature_store_id )
def __init__(self, feature_store_id): self._feature_group_api = feature_group_api.FeatureGroupApi( feature_store_id) self._storage_connector_api = storage_connector_api.StorageConnectorApi( feature_store_id) self._tags_api = tags_api.TagsApi(feature_store_id, "featuregroups")
def __init__( self, name, version, data_format, location, featurestore_id, description=None, storage_connector=None, splits=None, seed=None, created=None, creator=None, features=None, statistics_config=None, featurestore_name=None, id=None, jobs=None, inode_id=None, storage_connector_name=None, storage_connector_id=None, storage_connector_type=None, training_dataset_type=None, from_query=None, querydto=None, ): self._id = id self._name = name self._version = version self._description = description self._data_format = data_format self._seed = seed self._location = location self._from_query = from_query self._querydto = querydto self._training_dataset_api = training_dataset_api.TrainingDatasetApi( featurestore_id) self._training_dataset_engine = training_dataset_engine.TrainingDatasetEngine( featurestore_id) self._storage_connector_api = storage_connector_api.StorageConnectorApi( featurestore_id) self._statistics_engine = statistics_engine.StatisticsEngine( featurestore_id, self.ENTITY_TYPE) # set up depending on user initialized or coming from backend response if training_dataset_type is None: # no type -> user init self._features = features self.storage_connector = storage_connector self.splits = splits self.statistics_config = statistics_config else: # type available -> init from backend response # make rest call to get all connector information, description etc. self._storage_connector = self._storage_connector_api.get_by_id( storage_connector_id, storage_connector_type) self._features = [ training_dataset_feature.TrainingDatasetFeature. from_response_json(feat) for feat in features ] self._splits = splits self._training_dataset_type = training_dataset_type self.statistics_config = None