예제 #1
0
def _do_get_cached_featuregroup(featuregroup_name, featurestore=None, featuregroup_version=1, online=False):
    """
    Gets a cached featuregroup from a featurestore as a pandas dataframe

    Args:
        :featuregroup_name: name of the featuregroup to get
        :featurestore: the featurestore where the featuregroup resides, defaults to the project's featurestore
        :featuregroup_version: (Optional) the version of the featuregroup
        :online: a boolean flag whether to fetch the online feature or the offline one (assuming that the
                 feature group that the feature is stored in has online serving enabled)
                 (for cached feature groups only)

    Returns:
        a pandas dataframe with the contents of the feature group

    """
    if featurestore is None:
        featurestore = fs_utils._do_get_project_featurestore()

    featuregroup_query = FeaturegroupQuery(
        featuregroup_name, featurestore, featuregroup_version)
    logical_query_plan = LogicalQueryPlan(featuregroup_query)
    logical_query_plan.create_logical_plan()
    logical_query_plan.construct_sql()
    dataframe = _run_and_log_sql(
        logical_query_plan.sql_str, featurestore=featurestore, online=online)
    return dataframe
예제 #2
0
def _do_get_features(features, featurestore_metadata, featurestore=None, featuregroups_version_dict={}, join_key=None,
                     online=False):
    """
    Gets a list of features (columns) from the featurestore. If no featuregroup is specified it will query hopsworks
    metastore to find where the features are stored.

    Args:
        :features: a list of features to get from the featurestore
        :featurestore: the featurestore where the featuregroup resides, defaults to the project's featurestore
        :featuregroups: (Optional) a dict with (fg --> version) for all the featuregroups where the features resides
        :featuregroup_version: (Optional) the version of the featuregroup
        :join_key: (Optional) column name to join on
        :featurestore_metadata: the metadata of the featurestore
        :online: a boolean flag whether to fetch the online feature or the offline one (assuming that the
                 feature group that the feature is stored in has online serving enabled)
                 (for cached feature groups only)

    Returns:
        A pandas dataframe with all the features

    """
    if featurestore is None:
        featurestore = fs_utils._do_get_project_featurestore()

    features_query = FeaturesQuery(
        features, featurestore_metadata, featurestore, featuregroups_version_dict, join_key)
    logical_query_plan = LogicalQueryPlan(features_query)
    logical_query_plan.create_logical_plan()
    logical_query_plan.construct_sql()

    result = _run_and_log_sql(logical_query_plan.sql_str, featurestore, online)

    return result
예제 #3
0
def _do_get_feature(feature, featurestore_metadata, featurestore=None, featuregroup=None, featuregroup_version=1,
                    online=False):
    """
    Gets a particular feature (column) from a featurestore, if no featuregroup is specified it queries
    hopsworks metastore to see if the feature exists in any of the featuregroups in the featurestore.
    If the user knows which featuregroup contain the feature, it should be specified as it will improve performance
    of the query.

    Args:
        :feature: the feature name to get
        :featurestore: the featurestore where the featuregroup resides, defaults to the project's featurestore
        :featuregroup: (Optional) the featuregroup where the feature resides
        :featuregroup_version: (Optional) the version of the featuregroup
        :featurestore_metadata: the metadata of the featurestore to query
        :online: a boolean flag whether to fetch the online feature or the offline one (assuming that the
                 feature group that the feature is stored in has online serving enabled)
                 (for cached feature groups only)

    Returns:
        A pandas dataframe with the feature

    """
    if featurestore is None:
        featurestore = fs_utils._do_get_project_featurestore()

    feature_query = FeatureQuery(
        feature, featurestore_metadata, featurestore, featuregroup, featuregroup_version)
    logical_query_plan = LogicalQueryPlan(feature_query)
    logical_query_plan.create_logical_plan()
    logical_query_plan.construct_sql()

    dataframe = _run_and_log_sql(
        logical_query_plan.sql_str, featurestore, online)
    return dataframe
예제 #4
0
def _do_get_feature(feature,
                    featurestore_metadata,
                    featurestore=None,
                    featuregroup=None,
                    featuregroup_version=1):
    """
    Gets a particular feature (column) from a featurestore, if no featuregroup is specified it queries
    hopsworks metastore to see if the feature exists in any of the featuregroups in the featurestore.
    If the user knows which featuregroup contain the feature, it should be specified as it will improve performance
    of the query.

    Args:
        :feature: the feature name to get
        :featurestore: the featurestore where the featuregroup resides, defaults to the project's featurestore
        :featuregroup: (Optional) the featuregroup where the feature resides
        :featuregroup_version: (Optional) the version of the featuregroup
        :featurestore_metadata: the metadata of the featurestore to query

    Returns:
        A pandas dataframe with the feature

    """
    hive = util._create_hive_connection(featurestore)

    feature_query = FeatureQuery(feature, featurestore_metadata, featurestore,
                                 featuregroup, featuregroup_version)
    logical_query_plan = LogicalQueryPlan(feature_query)
    logical_query_plan.create_logical_plan()
    logical_query_plan.construct_sql()

    dataframe = _run_and_log_sql(hive, logical_query_plan.sql_str)
    return dataframe
예제 #5
0
def _do_get_features(features, featurestore_metadata, featurestore=None, featuregroups_version_dict={}, join_key=None):
    """
    Gets a list of features (columns) from the featurestore. If no featuregroup is specified it will query hopsworks
    metastore to find where the features are stored.

    Args:
        :features: a list of features to get from the featurestore
        :featurestore: the featurestore where the featuregroup resides, defaults to the project's featurestore
        :featuregroups: (Optional) a dict with (fg --> version) for all the featuregroups where the features resides
        :featuregroup_version: (Optional) the version of the featuregroup
        :join_key: (Optional) column name to join on
        :featurestore_metadata: the metadata of the featurestore

    Returns:
        A pandas dataframe with all the features

    """
    if featurestore is None:
        featurestore = fs_utils._do_get_project_featurestore()

    hive_conn = util._create_hive_connection(featurestore)

    features_query = FeaturesQuery(features, featurestore_metadata, featurestore, featuregroups_version_dict, join_key)
    logical_query_plan = LogicalQueryPlan(features_query)
    logical_query_plan.create_logical_plan()
    logical_query_plan.construct_sql()

    result = _run_and_log_sql(hive_conn, logical_query_plan.sql_str)

    return result
예제 #6
0
def _do_get_cached_featuregroup(featuregroup_name,
                                featurestore=None,
                                featuregroup_version=1):
    """
    Gets a cached featuregroup from a featurestore as a pandas dataframe

    Args:
        :featuregroup_name: name of the featuregroup to get
        :featurestore: the featurestore where the featuregroup resides, defaults to the project's featurestore
        :featuregroup_version: (Optional) the version of the featuregroup

    Returns:
        a pandas dataframe with the contents of the feature group

    """
    hive_conn = util._create_hive_connection(featurestore)

    featuregroup_query = FeaturegroupQuery(featuregroup_name, featurestore,
                                           featuregroup_version)
    logical_query_plan = LogicalQueryPlan(featuregroup_query)
    logical_query_plan.create_logical_plan()
    logical_query_plan.construct_sql()
    dataframe = _run_and_log_sql(hive_conn, logical_query_plan.sql_str)
    return dataframe