def __init__(self, feature_store_id):
     self._tags_api = tags_api.TagsApi(feature_store_id, self.ENTITY_TYPE)
     self._feature_group_api = feature_group_api.FeatureGroupApi(
         feature_store_id)
     self._storage_connector_api = storage_connector_api.StorageConnectorApi(
         feature_store_id)
     self._kafka_api = kafka_api.KafkaApi()
Ejemplo n.º 2
0
    def __init__(
        self,
        feature_store_id,
        feature_store_name,
        feature_group,
        spark_context,
        spark_session,
    ):
        self._feature_group = feature_group
        self._spark_context = spark_context
        self._spark_session = spark_session
        self._feature_store_id = feature_store_id
        self._feature_store_name = feature_store_name
        self._base_path = self._feature_group.location
        self._table_name = util.feature_group_name(feature_group)

        self._primary_key = ",".join(feature_group.primary_key)
        self._partition_key = (",".join(feature_group.partition_key) if
                               len(feature_group.partition_key) >= 1 else "")
        self._partition_path = (":SIMPLE,".join(feature_group.partition_key) +
                                ":SIMPLE" if
                                len(feature_group.partition_key) >= 1 else "")
        self._pre_combine_key = (feature_group.hudi_precombine_key
                                 if feature_group.hudi_precombine_key else
                                 feature_group.primary_key[0])

        self._feature_group_api = feature_group_api.FeatureGroupApi(
            feature_store_id)
        self._storage_connector_api = storage_connector_api.StorageConnectorApi(
            self._feature_store_id)
        self._connstr = self._storage_connector_api.get(
            self._feature_store_name).connection_string
Ejemplo n.º 3
0
    def __init__(
        self,
        feature_store_id,
        feature_store_name,
        feature_group,
        spark_context,
        spark_session,
    ):
        self._feature_group = feature_group
        self._spark_context = spark_context
        self._spark_session = spark_session
        self._feature_store_id = feature_store_id
        self._feature_store_name = feature_store_name
        self._base_path = self._feature_group.location
        self._table_name = feature_group.name + "_" + str(
            feature_group.version)

        self._primary_key = ",".join(feature_group.primary_key)
        self._partition_key = ",".join(feature_group.partition_key)
        self._partition_path = ":SIMPLE,".join(
            feature_group.partition_key) + ":SIMPLE"
        self._pre_combine_key = feature_group.primary_key[0]

        self._feature_group_api = feature_group_api.FeatureGroupApi(
            feature_store_id)
        self._storage_connector_api = storage_connector_api.StorageConnectorApi(
            self._feature_store_id)
        self._connstr = self._storage_connector_api.get(
            self._feature_store_name,
            storage_connector.StorageConnector.JDBC,
        ).connection_string
Ejemplo n.º 4
0
    def save_dataframe(
        self,
        feature_group,
        dataframe,
        operation,
        online_enabled,
        storage,
        offline_write_options,
        online_write_options,
        validation_id=None,
    ):
        # App configuration
        app_options = self._get_app_options(offline_write_options)

        # Setup job for ingestion
        # Configure Hopsworks ingestion job
        print("Configuring ingestion job...")
        fg_api = feature_group_api.FeatureGroupApi(
            feature_group.feature_store_id)
        ingestion_job = fg_api.ingestion(feature_group, app_options)

        # Upload dataframe into Hopsworks
        print("Uploading Pandas dataframe...")
        self._dataset_api.upload(feature_group, ingestion_job.data_path,
                                 dataframe)

        # Launch job
        print("Launching ingestion job...")
        self._job_api.launch(ingestion_job.job.name)
        print(
            "Ingestion Job started successfully, you can follow the progress at {}"
            .format(self._get_job_url(ingestion_job.job.href)))
Ejemplo n.º 5
0
    def __init__(
        self,
        featurestore_id,
        featurestore_name,
        created,
        hdfs_store_path,
        project_name,
        project_id,
        featurestore_description,
        inode_id,
        offline_featurestore_name,
        hive_endpoint,
        online_enabled,
        num_feature_groups=None,
        num_training_datasets=None,
        num_storage_connectors=None,
        online_featurestore_name=None,
        mysql_server_endpoint=None,
        online_featurestore_size=None,
    ):
        self._id = featurestore_id
        self._name = featurestore_name
        self._created = created
        self._hdfs_store_path = hdfs_store_path
        self._project_name = project_name
        self._project_id = project_id
        self._description = featurestore_description
        self._inode_id = inode_id
        self._online_feature_store_name = online_featurestore_name
        self._online_feature_store_size = online_featurestore_size
        self._offline_feature_store_name = offline_featurestore_name
        self._hive_endpoint = hive_endpoint
        self._mysql_server_endpoint = mysql_server_endpoint
        self._online_enabled = online_enabled
        self._num_feature_groups = num_feature_groups
        self._num_training_datasets = num_training_datasets
        self._num_storage_connectors = num_storage_connectors

        self._feature_group_api = feature_group_api.FeatureGroupApi(self._id)
        self._storage_connector_api = storage_connector_api.StorageConnectorApi(
            self._id)
        self._training_dataset_api = training_dataset_api.TrainingDatasetApi(
            self._id)
        self._expectations_api = expectations_api.ExpectationsApi(self._id)

        self._feature_group_engine = feature_group_engine.FeatureGroupEngine(
            self._id)

        self._transformation_function_engine = (
            transformation_function_engine.TransformationFunctionEngine(
                self._id))
        self._feature_view_engine = feature_view_engine.FeatureViewEngine(
            self._id)
Ejemplo n.º 6
0
    def __init__(
        self,
        feature_store_id,
        feature_store_name,
        feature_group,
        spark_context,
        spark_session,
    ):
        self._feature_group = feature_group
        self._spark_context = spark_context
        self._spark_session = spark_session
        self._feature_store_id = feature_store_id
        self._feature_store_name = feature_store_name
        self._base_path = self._feature_group.location
        self._table_name = feature_group._get_online_table_name()

        self._primary_key = ",".join(feature_group.primary_key)

        # add event time to primary key for upserts
        if feature_group.event_time is not None:
            self._primary_key = self._primary_key + "," + feature_group.event_time

        self._partition_key = (",".join(feature_group.partition_key) if
                               len(feature_group.partition_key) >= 1 else "")
        self._partition_path = (":SIMPLE,".join(feature_group.partition_key) +
                                ":SIMPLE" if
                                len(feature_group.partition_key) >= 1 else "")
        self._pre_combine_key = (feature_group.hudi_precombine_key
                                 if feature_group.hudi_precombine_key else
                                 feature_group.primary_key[0])

        self._feature_group_api = feature_group_api.FeatureGroupApi(
            feature_store_id)
        self._storage_connector_api = storage_connector_api.StorageConnectorApi(
            self._feature_store_id)

        if self._feature_store_name:
            # For read operations we don't actually need the connector
            # Only to sync write operations
            self._connstr = self._storage_connector_api.get(
                self._feature_store_name).connection_string
        else:
            self._connstr = None
Ejemplo n.º 7
0
    def __init__(
        self,
        featurestore_id,
        featurestore_name,
        created,
        hdfs_store_path,
        project_name,
        project_id,
        featurestore_description,
        inode_id,
        offline_featurestore_name,
        hive_endpoint,
        online_enabled,
        online_featurestore_name=None,
        mysql_server_endpoint=None,
        online_featurestore_size=None,
    ):
        self._id = featurestore_id
        self._name = featurestore_name
        self._created = created
        self._hdfs_store_path = hdfs_store_path
        self._project_name = project_name
        self._project_id = project_id
        self._description = featurestore_description
        self._inode_id = inode_id
        self._online_feature_store_name = online_featurestore_name
        self._online_feature_store_size = online_featurestore_size
        self._offline_feature_store_name = offline_featurestore_name
        self._hive_endpoint = hive_endpoint
        self._mysql_server_endpoint = mysql_server_endpoint
        self._online_enabled = online_enabled

        self._feature_group_api = feature_group_api.FeatureGroupApi(self._id)
        self._storage_connector_api = storage_connector_api.StorageConnectorApi(
            self._id)
        self._training_dataset_api = training_dataset_api.TrainingDatasetApi(
            self._id)

        self._feature_group_engine = feature_group_engine.FeatureGroupEngine(
            self._id)
Ejemplo n.º 8
0
 def __init__(self, feature_store_id):
     self._feature_group_api = feature_group_api.FeatureGroupApi(
         feature_store_id)
     self._storage_connector_api = storage_connector_api.StorageConnectorApi(
         feature_store_id)
     self._tags_api = tags_api.TagsApi(feature_store_id, "featuregroups")