Python filter_matches_by_options Examples, featuretools.primitives.options_utils.filter_matches_by_options Python Examples

Example #1

0

Show file

File: deep_feature_synthesis.py Project: vikibytes/featuretools

    def _get_matching_inputs(self,
                             all_features,
                             entity,
                             max_depth,
                             input_types,
                             primitive,
                             primitive_options,
                             require_direct_input=False,
                             feature_filter=None):
        features = self._features_by_type(all_features=all_features,
                                          entity=entity,
                                          max_depth=max_depth,
                                          variable_type=set(input_types))
        if feature_filter:
            features = [f for f in features if feature_filter(f)]

        matching_inputs = match(input_types,
                                features,
                                commutative=primitive.commutative,
                                require_direct_input=require_direct_input)

        if require_direct_input:
            # Don't create trans features of inputs which are all direct
            # features with the same relationship_path.
            matching_inputs = {
                inputs
                for inputs in matching_inputs
                if not _all_direct_and_same_path(inputs)
            }
        matching_inputs = filter_matches_by_options(matching_inputs,
                                                    primitive_options)
        return matching_inputs

Example #2

0

Show file

File: deep_feature_synthesis.py Project: alteryx/featuretools

    def _get_matching_inputs(
        self,
        all_features,
        dataframe,
        max_depth,
        input_types,
        primitive,
        primitive_options,
        require_direct_input=False,
        feature_filter=None,
    ):

        if not isinstance(input_types[0], list):
            input_types = [input_types]
        matching_inputs = []

        for input_type in input_types:
            features = self._features_by_type(
                all_features=all_features,
                dataframe=dataframe,
                max_depth=max_depth,
                column_schemas=list(input_type),
            )
            if feature_filter:
                features = [f for f in features if feature_filter(f)]

            matches = match(
                input_type,
                features,
                commutative=primitive.commutative,
                require_direct_input=require_direct_input,
            )

            matching_inputs.extend(matches)

        if require_direct_input:
            # Don't create trans features of inputs which are all direct
            # features with the same relationship_path.
            matching_inputs = {
                inputs
                for inputs in matching_inputs
                if not _all_direct_and_same_path(inputs)
            }
        matching_inputs = filter_matches_by_options(
            matching_inputs,
            primitive_options,
            commutative=primitive.commutative)

        # Don't build features on numeric foreign key columns
        matching_inputs = [
            match for match in matching_inputs
            if not _match_contains_numeric_foreign_key(match)
        ]

        return matching_inputs

Example #3

0

Show file

File: deep_feature_synthesis.py Project: vikibytes/featuretools

    def _build_agg_features(self, all_features, parent_entity, child_entity,
                            max_depth, relationship_path):
        new_max_depth = None
        if max_depth is not None:
            new_max_depth = max_depth - 1
        for agg_prim in self.agg_primitives:
            current_options = self.primitive_options[agg_prim.name]

            if ignore_entity_for_primitive(current_options, child_entity):
                continue
            # if multiple input_types, only use first one for DFS
            input_types = agg_prim.input_types
            if type(input_types[0]) == list:
                input_types = input_types[0]

            def feature_filter(f):
                # Remove direct features of parent entity and features in relationship path.
                return (not _direct_of_entity(f, parent_entity)) \
                    and not self._feature_in_relationship_path(relationship_path, f)

            matching_inputs = self._get_matching_inputs(
                all_features,
                child_entity,
                new_max_depth,
                input_types,
                agg_prim,
                current_options,
                feature_filter=feature_filter)
            matching_inputs = filter_matches_by_options(
                matching_inputs, current_options)
            wheres = list(self.where_clauses[child_entity.id])

            for matching_input in matching_inputs:
                if not check_stacking(agg_prim, matching_input):
                    continue
                new_f = AggregationFeature(matching_input,
                                           parent_entity=parent_entity,
                                           relationship_path=relationship_path,
                                           primitive=agg_prim)
                self._handle_new_feature(new_f, all_features)

                # limit the stacking of where features
                # count up the the number of where features
                # in this feature and its dependencies
                feat_wheres = []
                for f in matching_input:
                    if isinstance(f,
                                  AggregationFeature) and f.where is not None:
                        feat_wheres.append(f)
                    for feat in f.get_dependencies(deep=True):
                        if (isinstance(feat, AggregationFeature)
                                and feat.where is not None):
                            feat_wheres.append(feat)

                if len(feat_wheres) >= self.where_stacking_limit:
                    continue

                # limits the aggregation feature by the given allowed feature types.
                if not any([
                        issubclass(type(agg_prim), type(primitive))
                        for primitive in self.where_primitives
                ]):
                    continue

                for where in wheres:
                    # limits the where feats so they are different than base feats
                    base_names = [f.unique_name() for f in new_f.base_features]
                    if any([
                            base_feat.unique_name() in base_names
                            for base_feat in where.base_features
                    ]):
                        continue

                    new_f = AggregationFeature(
                        matching_input,
                        parent_entity=parent_entity,
                        relationship_path=relationship_path,
                        where=where,
                        primitive=agg_prim)

                    self._handle_new_feature(new_f, all_features)