def test_metadata(es, tmpdir):
    identity_feature_descriptions = {'sessions: device_name': 'the name of the device used for each session',
                                     'customers: id': "the customer's id"}
    agg_feat = AggregationFeature(IdentityFeature(es['sessions'].ww['device_name']), 'customers', NumUnique)
    agg_description = 'The number of unique elements in the name of the device used for each '\
                      'session of all instances of "sessions" for each customer\'s id.'
    assert describe_feature(agg_feat, feature_descriptions=identity_feature_descriptions) == agg_description

    transform_feat = GroupByTransformFeature(IdentityFeature(es['log'].ww['value']), CumMean, IdentityFeature(es['log'].ww['session_id']))
    transform_description = 'The running average of the "value" for each "session_id".'
    primitive_templates = {"cum_mean": "the running average of {}"}
    assert describe_feature(transform_feat, primitive_templates=primitive_templates) == transform_description

    custom_agg = AggregationFeature(IdentityFeature(es['log'].ww['zipcode']), 'sessions', Mode)
    auto_description = 'The most frequently occurring value of the "zipcode" of all instances of "log" for each "id" in "sessions".'
    custom_agg_description = "the most frequently used zipcode"
    custom_feature_description = custom_agg_description[0].upper() + custom_agg_description[1:] + '.'
    feature_description_dict = {'sessions: MODE(log.zipcode)': custom_agg_description}
    assert describe_feature(custom_agg) == auto_description
    assert describe_feature(custom_agg, feature_descriptions=feature_description_dict) == custom_feature_description

    metadata = {
        'feature_descriptions': {**identity_feature_descriptions, **feature_description_dict},
        'primitive_templates': primitive_templates
    }
    metadata_path = os.path.join(tmpdir, 'description_metadata.json')
    with open(metadata_path, 'w') as f:
        json.dump(metadata, f)
    assert describe_feature(agg_feat, metadata_file=metadata_path) == agg_description
    assert describe_feature(transform_feat, metadata_file=metadata_path) == transform_description
    assert describe_feature(custom_agg, metadata_file=metadata_path) == custom_feature_description
def test_stacked(es, trans_feat):
    stacked = AggregationFeature(trans_feat, es['cohorts'], Mode)
    graph = graph_feature(stacked).source

    feat_name = stacked.get_name()
    intermediate_name = trans_feat.get_name()
    agg_primitive = '0_{}_mode'.format(feat_name)
    trans_primitive = '1_{}_year'.format(intermediate_name)
    groupby_node = '{}_groupby_customers--cohort'.format(feat_name)

    trans_prim_edge = 'customers:cancel_date -> "{}"'.format(trans_primitive)
    intermediate_edge = '"{}" -> customers:"{}"'.format(trans_primitive, intermediate_name)
    groupby_edge = 'customers:cohort -> "{}"'.format(groupby_node)
    groupby_input = 'customers:"{}" -> "{}"'.format(intermediate_name, groupby_node)
    agg_input = '"{}" -> "{}"'.format(groupby_node, agg_primitive)
    feat_edge = '"{}" -> cohorts:"{}"'.format(agg_primitive, feat_name)

    graph_components = [feat_name, intermediate_name, agg_primitive, trans_primitive,
                        groupby_node, trans_prim_edge, intermediate_edge, groupby_edge,
                        groupby_input, agg_input, feat_edge]
    for component in graph_components:
        assert component in graph

    agg_primitive = agg_primitive.replace('(', '\\(').replace(')', '\\)')
    agg_node = re.findall('"{}" \\[label.*'.format(agg_primitive), graph)
    assert len(agg_node) == 1
    assert 'Step 2' in agg_node[0]

    trans_primitive = trans_primitive.replace('(', '\\(').replace(')', '\\)')
    trans_node = re.findall('"{}" \\[label.*'.format(trans_primitive), graph)
    assert len(trans_node) == 1
    assert 'Step 1' in trans_node[0]
Beispiel #3
0
def test_direct_description(es):
    feature = DirectFeature(
        IdentityFeature(es["customers"].ww["loves_ice_cream"]), "sessions"
    )
    description = (
        'The "loves_ice_cream" for the instance of "customers" associated '
        'with this instance of "sessions".'
    )
    assert describe_feature(feature) == description

    deep_direct = DirectFeature(feature, "log")
    deep_description = (
        'The "loves_ice_cream" for the instance of "customers" '
        'associated with the instance of "sessions" associated with '
        'this instance of "log".'
    )
    assert describe_feature(deep_direct) == deep_description

    agg = AggregationFeature(
        IdentityFeature(es["log"].ww["purchased"]), "sessions", PercentTrue
    )
    complicated_direct = DirectFeature(agg, "log")
    agg_on_direct = AggregationFeature(complicated_direct, "products", Mean)

    complicated_description = (
        "The average of the percentage of true values in "
        'the "purchased" of all instances of "log" for each "id" in "sessions" for '
        'the instance of "sessions" associated with this instance of "log" of all '
        'instances of "log" for each "id" in "products".'
    )
    assert describe_feature(agg_on_direct) == complicated_description
def test_aggregation_description(es):
    feature = AggregationFeature(IdentityFeature(es['log'].ww['value']), 'sessions', Mean)
    description = 'The average of the "value" of all instances of "log" for each "id" in "sessions".'
    assert describe_feature(feature) == description

    stacked_agg = AggregationFeature(feature, 'customers', Sum)
    stacked_description = 'The sum of t{} of all instances of "sessions" for each "id" ' \
                          'in "customers".'.format(description[1:-1])
    assert describe_feature(stacked_agg) == stacked_description
Beispiel #5
0
def test_multioutput(es):
    multioutput = AggregationFeature(
        IdentityFeature(es["log"].ww["zipcode"]), "sessions", NMostCommon
    )
    feat = FeatureOutputSlice(multioutput, 0)
    graph = graph_feature(feat).source

    feat_name = feat.get_name()
    prim_node = "0_{}_n_most_common".format(multioutput.get_name())
    groupby_node = "{}_groupby_log--session_id".format(multioutput.get_name())

    sessions_table = "\u2605 sessions (target)"
    log_table = "log"
    groupby_edge = 'log:session_id -> "{}"'.format(groupby_node)
    groupby_input = 'log:zipcode -> "{}"'.format(groupby_node)
    prim_input = '"{}" -> "{}"'.format(groupby_node, prim_node)
    feat_edge = '"{}" -> sessions:"{}"'.format(prim_node, feat_name)

    graph_components = [
        feat_name,
        prim_node,
        groupby_node,
        sessions_table,
        log_table,
        groupby_edge,
        groupby_input,
        prim_input,
        feat_edge,
    ]

    for component in graph_components:
        assert component in graph

    dataframes = {
        "log": [log_table, "zipcode", "session_id"],
        "sessions": [sessions_table, feat_name],
    }
    for dataframe in dataframes:
        regex = r"{} \[label=<\n<TABLE.*?</TABLE>>".format(dataframe)
        matches = re.findall(regex, graph, re.DOTALL)
        assert len(matches) == 1

        rows = re.findall(r"<TR.*?</TR>", matches[0], re.DOTALL)
        assert len(rows) == len(dataframes[dataframe])
        for row in rows:
            matched = False
            for i in dataframes[dataframe]:
                if i in row:
                    matched = True
                    dataframes[dataframe].remove(i)
                    break
            assert matched
Beispiel #6
0
def test_aggregation_description_use_previous(es):
    feature = AggregationFeature(
        IdentityFeature(es["log"].ww["value"]), "sessions", Mean, use_previous="5d"
    )
    description = 'The average of the "value" of the previous 5 days of "log" for each "id" in "sessions".'

    assert describe_feature(feature) == description
def test_generic_description(es):
    class NoName(TransformPrimitive):
        input_types = [ColumnSchema(semantic_tags={'category'})]
        output_type = ColumnSchema(semantic_tags={'category'})

        def generate_name(self, base_feature_names):
            return u"%s(%s%s)" % (
                'NO_NAME',
                u", ".join(base_feature_names),
                self.get_args_string(),
            )

    class CustomAgg(AggregationPrimitive):
        name = 'custom_aggregation'
        input_types = [ColumnSchema(semantic_tags={'category'})]
        output_type = ColumnSchema(semantic_tags={'category'})

    class CustomTrans(TransformPrimitive):
        name = 'custom_transform'
        input_types = [ColumnSchema(semantic_tags={'category'})]
        output_type = ColumnSchema(semantic_tags={'category'})

    no_name = TransformFeature(IdentityFeature(es['log'].ww['zipcode']), NoName)
    no_name_description = 'The result of applying NoName to the "zipcode".'
    assert describe_feature(no_name) == no_name_description

    custom_agg = AggregationFeature(IdentityFeature(es['log'].ww['zipcode']), 'customers', CustomAgg)
    custom_agg_description = 'The result of applying CUSTOM_AGGREGATION to the "zipcode" of all instances of "log" for each "id" in "customers".'
    assert describe_feature(custom_agg) == custom_agg_description

    custom_trans = TransformFeature(IdentityFeature(es['log'].ww['zipcode']), CustomTrans)
    custom_trans_description = 'The result of applying CUSTOM_TRANSFORM to the "zipcode".'
    assert describe_feature(custom_trans) == custom_trans_description
Beispiel #8
0
def test_aggregation_description_use_previous(es):
    feature = AggregationFeature(es['log']['value'],
                                 es['sessions'],
                                 Mean,
                                 use_previous='5d')
    description = 'The average of the "value" of the previous 5 days of "log" for each "id" in "sessions".'

    assert describe_feature(feature) == description
def test_aggregation_description_where(es):
    where_feature = TransformFeature(IdentityFeature(es['log'].ww['countrycode']), EqualScalar('US'))
    feature = AggregationFeature(IdentityFeature(es['log'].ww['value']), 'sessions',
                                 Mean, where=where_feature)
    description = 'The average of the "value" of all instances of "log" where the ' \
                  '"countrycode" is US for each "id" in "sessions".'

    assert describe_feature(feature) == description
def test_multioutput_description(es):
    n_most_common = NMostCommon(2)
    n_most_common_feature = AggregationFeature(IdentityFeature(es['log'].ww['zipcode']), 'sessions', n_most_common)
    first_most_common_slice = n_most_common_feature[0]
    second_most_common_slice = n_most_common_feature[1]

    n_most_common_base = 'The 2 most common values of the "zipcode" of all instances of "log" for each "id" in "sessions".'
    n_most_common_first = 'The most common value of the "zipcode" of all instances of "log" ' \
                          'for each "id" in "sessions".'
    n_most_common_second = 'The 2nd most common value of the "zipcode" of all instances of ' \
                           '"log" for each "id" in "sessions".'

    assert describe_feature(n_most_common_feature) == n_most_common_base
    assert describe_feature(first_most_common_slice) == n_most_common_first
    assert describe_feature(second_most_common_slice) == n_most_common_second

    class CustomMultiOutput(TransformPrimitive):
        name = "custom_multioutput"
        input_types = [ColumnSchema(semantic_tags={'category'})]
        return_type = ColumnSchema(semantic_tags={'category'})

        number_output_features = 4

    custom_feat = TransformFeature(IdentityFeature(es['log'].ww['zipcode']), CustomMultiOutput)

    generic_base = 'The result of applying CUSTOM_MULTIOUTPUT to the "zipcode".'
    generic_first = 'The 1st output from applying CUSTOM_MULTIOUTPUT to the "zipcode".'
    generic_second = 'The 2nd output from applying CUSTOM_MULTIOUTPUT to the "zipcode".'

    assert describe_feature(custom_feat) == generic_base
    assert describe_feature(custom_feat[0]) == generic_first
    assert describe_feature(custom_feat[1]) == generic_second

    CustomMultiOutput.description_template = ['the multioutput of {}',
                                              'the {nth_slice} multioutput part of {}']
    template_base = 'The multioutput of the "zipcode".'
    template_first_slice = 'The 1st multioutput part of the "zipcode".'
    template_second_slice = 'The 2nd multioutput part of the "zipcode".'
    template_third_slice = 'The 3rd multioutput part of the "zipcode".'
    template_fourth_slice = 'The 4th multioutput part of the "zipcode".'
    assert describe_feature(custom_feat) == template_base
    assert describe_feature(custom_feat[0]) == template_first_slice
    assert describe_feature(custom_feat[1]) == template_second_slice
    assert describe_feature(custom_feat[2]) == template_third_slice
    assert describe_feature(custom_feat[3]) == template_fourth_slice

    CustomMultiOutput.description_template = ['the multioutput of {}',
                                              'the primary multioutput part of {}',
                                              'the secondary multioutput part of {}']
    custom_base = 'The multioutput of the "zipcode".'
    custom_first_slice = 'The primary multioutput part of the "zipcode".'
    custom_second_slice = 'The secondary multioutput part of the "zipcode".'
    bad_slice_error = 'Slice out of range of template'
    assert describe_feature(custom_feat) == custom_base
    assert describe_feature(custom_feat[0]) == custom_first_slice
    assert describe_feature(custom_feat[1]) == custom_second_slice
    with pytest.raises(IndexError, match=bad_slice_error):
        describe_feature(custom_feat[2])
Beispiel #11
0
def test_multioutput(es):
    multioutput = AggregationFeature(es['log']['zipcode'], es['sessions'],
                                     NMostCommon)
    feat = FeatureOutputSlice(multioutput, 0)
    graph = graph_feature(feat).source

    feat_name = feat.get_name()
    prim_node = '0_{}_n_most_common'.format(multioutput.get_name())
    groupby_node = '{}_groupby_log--session_id'.format(multioutput.get_name())

    sessions_table = '\u2605 sessions (target)'
    log_table = 'log'
    groupby_edge = 'log:session_id -> "{}"'.format(groupby_node)
    groupby_input = 'log:zipcode -> "{}"'.format(groupby_node)
    prim_input = '"{}" -> "{}"'.format(groupby_node, prim_node)
    feat_edge = '"{}" -> sessions:"{}"'.format(prim_node, feat_name)

    graph_components = [
        feat_name, prim_node, groupby_node, sessions_table, log_table,
        groupby_edge, groupby_input, prim_input, feat_edge
    ]

    for component in graph_components:
        assert component in graph

    entities = {
        'log': [log_table, 'zipcode', 'session_id'],
        'sessions': [sessions_table, feat_name]
    }
    for entity in entities:
        regex = r"{} \[label=<\n<TABLE.*?</TABLE>>".format(entity)
        matches = re.findall(regex, graph, re.DOTALL)
        assert len(matches) == 1

        rows = re.findall(r"<TR.*?</TR>", matches[0], re.DOTALL)
        assert len(rows) == len(entities[entity])
        for row in rows:
            matched = False
            for i in entities[entity]:
                if i in row:
                    matched = True
                    entities[entity].remove(i)
                    break
            assert matched
def test_aggregation(es):
    feat = AggregationFeature(IdentityFeature(es['log'].ww['id']), 'sessions',
                              Count)
    graph = graph_feature(feat).source

    feat_name = feat.get_name()
    prim_node = '0_{}_count'.format(feat_name)
    groupby_node = '{}_groupby_log--session_id'.format(feat_name)

    sessions_table = '\u2605 sessions (target)'
    log_table = 'log'
    groupby_edge = 'log:session_id -> "{}"'.format(groupby_node)
    groupby_input = 'log:id -> "{}"'.format(groupby_node)
    prim_input = '"{}" -> "{}"'.format(groupby_node, prim_node)
    feat_edge = '"{}" -> sessions:"{}"'.format(prim_node, feat_name)

    graph_components = [
        feat_name, prim_node, groupby_node, sessions_table, log_table,
        groupby_edge, groupby_input, prim_input, feat_edge
    ]

    for component in graph_components:
        assert component in graph

    dataframes = {
        'log': [log_table, 'id', 'session_id'],
        'sessions': [sessions_table, feat_name]
    }
    for dataframe in dataframes:
        regex = r"{} \[label=<\n<TABLE.*?</TABLE>>".format(dataframe)
        matches = re.findall(regex, graph, re.DOTALL)
        assert len(matches) == 1

        rows = re.findall(r"<TR.*?</TR>", matches[0], re.DOTALL)
        assert len(rows) == len(dataframes[dataframe])
        for row in rows:
            matched = False
            for i in dataframes[dataframe]:
                if i in row:
                    matched = True
                    dataframes[dataframe].remove(i)
                    break
            assert matched
def test_direct_description(es):
    feature = DirectFeature(IdentityFeature(es['customers'].ww['loves_ice_cream']), 'sessions')
    description = 'The "loves_ice_cream" for the instance of "customers" associated ' \
                  'with this instance of "sessions".'
    assert describe_feature(feature) == description

    deep_direct = DirectFeature(feature, 'log')
    deep_description = 'The "loves_ice_cream" for the instance of "customers" ' \
                       'associated with the instance of "sessions" associated with ' \
                       'this instance of "log".'
    assert describe_feature(deep_direct) == deep_description

    agg = AggregationFeature(IdentityFeature(es['log'].ww['purchased']), 'sessions', PercentTrue)
    complicated_direct = DirectFeature(agg, 'log')
    agg_on_direct = AggregationFeature(complicated_direct, 'products', Mean)

    complicated_description = 'The average of the percentage of true values in ' \
        'the "purchased" of all instances of "log" for each "id" in "sessions" for ' \
        'the instance of "sessions" associated with this instance of "log" of all ' \
        'instances of "log" for each "id" in "products".'
    assert describe_feature(agg_on_direct) == complicated_description
Beispiel #14
0
def test_aggregation_description_where(es):
    where_feature = TransformFeature(
        IdentityFeature(es["log"].ww["countrycode"]), EqualScalar("US")
    )
    feature = AggregationFeature(
        IdentityFeature(es["log"].ww["value"]), "sessions", Mean, where=where_feature
    )
    description = (
        'The average of the "value" of all instances of "log" where the '
        '"countrycode" is US for each "id" in "sessions".'
    )

    assert describe_feature(feature) == description
    def _build_agg_features(self,
                            all_features,
                            parent_entity,
                            child_entity,
                            max_depth=0):
        if max_depth is not None and max_depth < 0:
            return

        new_max_depth = None
        if max_depth is not None:
            new_max_depth = max_depth - 1

        for agg_prim in self.agg_primitives:
            # if multiple input_types, only use first one for DFS
            input_types = agg_prim.input_types
            if type(input_types[0]) == list:
                input_types = input_types[0]

            features = self._features_by_type(all_features=all_features,
                                              entity=child_entity,
                                              variable_type=set(input_types),
                                              max_depth=new_max_depth)

            # remove features in relationship path
            relationship_path = self.es.find_backward_path(
                parent_entity.id, child_entity.id)

            features = [
                f for f in features if
                not self._feature_in_relationship_path(relationship_path, f)
            ]
            matching_inputs = match(input_types,
                                    features,
                                    commutative=agg_prim.commutative)
            wheres = list(self.where_clauses[child_entity.id])

            for matching_input in matching_inputs:
                if not check_stacking(agg_prim, matching_input):
                    continue
                new_f = AggregationFeature(matching_input,
                                           parent_entity=parent_entity,
                                           primitive=agg_prim)
                self._handle_new_feature(new_f, all_features)

                # Obey allow where
                if not agg_prim.allow_where:
                    continue

                # limit the stacking of where features
                # count up the the number of where features
                # in this feature and its dependencies
                feat_wheres = []
                for f in matching_input:
                    if isinstance(f,
                                  AggregationFeature) and f.where is not None:
                        feat_wheres.append(f)
                    for feat in f.get_dependencies(deep=True):
                        if (isinstance(feat, AggregationFeature)
                                and feat.where is not None):
                            feat_wheres.append(feat)

                if len(feat_wheres) >= self.where_stacking_limit:
                    continue

                # limits the aggregation feature by the given allowed feature types.
                if not any([
                        issubclass(type(agg_prim), type(primitive))
                        for primitive in self.where_primitives
                ]):
                    continue

                for where in wheres:
                    # limits the where feats so they are different than base feats
                    base_hashes = [f.hash() for f in new_f.base_features]
                    if any([
                            base_feat.hash() in base_hashes
                            for base_feat in where.base_features
                    ]):
                        continue

                    new_f = AggregationFeature(matching_input,
                                               parent_entity=parent_entity,
                                               where=where,
                                               primitive=agg_prim)

                    self._handle_new_feature(new_f, all_features)
    def _build_agg_features(self, all_features, parent_entity, child_entity,
                            max_depth, relationship_path):
        new_max_depth = None
        if max_depth is not None:
            new_max_depth = max_depth - 1
        for agg_prim in self.agg_primitives:
            current_options = self.primitive_options[agg_prim.name]

            if ignore_entity_for_primitive(current_options, child_entity):
                continue
            # if multiple input_types, only use first one for DFS
            input_types = agg_prim.input_types
            if type(input_types[0]) == list:
                input_types = input_types[0]

            def feature_filter(f):
                # Remove direct features of parent entity and features in relationship path.
                return (not _direct_of_entity(f, parent_entity)) \
                    and not self._feature_in_relationship_path(relationship_path, f)

            matching_inputs = self._get_matching_inputs(
                all_features,
                child_entity,
                new_max_depth,
                input_types,
                agg_prim,
                current_options,
                feature_filter=feature_filter)
            matching_inputs = filter_matches_by_options(
                matching_inputs, current_options)
            wheres = list(self.where_clauses[child_entity.id])

            for matching_input in matching_inputs:
                if not check_stacking(agg_prim, matching_input):
                    continue
                new_f = AggregationFeature(matching_input,
                                           parent_entity=parent_entity,
                                           relationship_path=relationship_path,
                                           primitive=agg_prim)
                self._handle_new_feature(new_f, all_features)

                # limit the stacking of where features
                # count up the the number of where features
                # in this feature and its dependencies
                feat_wheres = []
                for f in matching_input:
                    if isinstance(f,
                                  AggregationFeature) and f.where is not None:
                        feat_wheres.append(f)
                    for feat in f.get_dependencies(deep=True):
                        if (isinstance(feat, AggregationFeature)
                                and feat.where is not None):
                            feat_wheres.append(feat)

                if len(feat_wheres) >= self.where_stacking_limit:
                    continue

                # limits the aggregation feature by the given allowed feature types.
                if not any([
                        issubclass(type(agg_prim), type(primitive))
                        for primitive in self.where_primitives
                ]):
                    continue

                for where in wheres:
                    # limits the where feats so they are different than base feats
                    base_names = [f.unique_name() for f in new_f.base_features]
                    if any([
                            base_feat.unique_name() in base_names
                            for base_feat in where.base_features
                    ]):
                        continue

                    new_f = AggregationFeature(
                        matching_input,
                        parent_entity=parent_entity,
                        relationship_path=relationship_path,
                        where=where,
                        primitive=agg_prim)

                    self._handle_new_feature(new_f, all_features)
Beispiel #17
0
def test_metadata(es, tmpdir):
    identity_feature_descriptions = {
        "sessions: device_name": "the name of the device used for each session",
        "customers: id": "the customer's id",
    }
    agg_feat = AggregationFeature(
        IdentityFeature(es["sessions"].ww["device_name"]), "customers", NumUnique
    )
    agg_description = (
        "The number of unique elements in the name of the device used for each "
        'session of all instances of "sessions" for each customer\'s id.'
    )
    assert (
        describe_feature(agg_feat, feature_descriptions=identity_feature_descriptions)
        == agg_description
    )

    transform_feat = GroupByTransformFeature(
        IdentityFeature(es["log"].ww["value"]),
        CumMean,
        IdentityFeature(es["log"].ww["session_id"]),
    )
    transform_description = 'The running average of the "value" for each "session_id".'
    primitive_templates = {"cum_mean": "the running average of {}"}
    assert (
        describe_feature(transform_feat, primitive_templates=primitive_templates)
        == transform_description
    )

    custom_agg = AggregationFeature(
        IdentityFeature(es["log"].ww["zipcode"]), "sessions", Mode
    )
    auto_description = 'The most frequently occurring value of the "zipcode" of all instances of "log" for each "id" in "sessions".'
    custom_agg_description = "the most frequently used zipcode"
    custom_feature_description = (
        custom_agg_description[0].upper() + custom_agg_description[1:] + "."
    )
    feature_description_dict = {"sessions: MODE(log.zipcode)": custom_agg_description}
    assert describe_feature(custom_agg) == auto_description
    assert (
        describe_feature(custom_agg, feature_descriptions=feature_description_dict)
        == custom_feature_description
    )

    metadata = {
        "feature_descriptions": {
            **identity_feature_descriptions,
            **feature_description_dict,
        },
        "primitive_templates": primitive_templates,
    }
    metadata_path = os.path.join(tmpdir, "description_metadata.json")
    with open(metadata_path, "w") as f:
        json.dump(metadata, f)
    assert describe_feature(agg_feat, metadata_file=metadata_path) == agg_description
    assert (
        describe_feature(transform_feat, metadata_file=metadata_path)
        == transform_description
    )
    assert (
        describe_feature(custom_agg, metadata_file=metadata_path)
        == custom_feature_description
    )