def _get_matching_inputs(self, all_features, entity, max_depth, input_types, primitive, primitive_options, require_direct_input=False, feature_filter=None): features = self._features_by_type(all_features=all_features, entity=entity, max_depth=max_depth, variable_type=set(input_types)) if feature_filter: features = [f for f in features if feature_filter(f)] matching_inputs = match(input_types, features, commutative=primitive.commutative, require_direct_input=require_direct_input) if require_direct_input: # Don't create trans features of inputs which are all direct # features with the same relationship_path. matching_inputs = { inputs for inputs in matching_inputs if not _all_direct_and_same_path(inputs) } matching_inputs = filter_matches_by_options(matching_inputs, primitive_options) return matching_inputs
def _get_matching_inputs( self, all_features, dataframe, max_depth, input_types, primitive, primitive_options, require_direct_input=False, feature_filter=None, ): if not isinstance(input_types[0], list): input_types = [input_types] matching_inputs = [] for input_type in input_types: features = self._features_by_type( all_features=all_features, dataframe=dataframe, max_depth=max_depth, column_schemas=list(input_type), ) if feature_filter: features = [f for f in features if feature_filter(f)] matches = match( input_type, features, commutative=primitive.commutative, require_direct_input=require_direct_input, ) matching_inputs.extend(matches) if require_direct_input: # Don't create trans features of inputs which are all direct # features with the same relationship_path. matching_inputs = { inputs for inputs in matching_inputs if not _all_direct_and_same_path(inputs) } matching_inputs = filter_matches_by_options( matching_inputs, primitive_options, commutative=primitive.commutative) # Don't build features on numeric foreign key columns matching_inputs = [ match for match in matching_inputs if not _match_contains_numeric_foreign_key(match) ] return matching_inputs
def _build_agg_features(self, all_features, parent_entity, child_entity, max_depth, relationship_path): new_max_depth = None if max_depth is not None: new_max_depth = max_depth - 1 for agg_prim in self.agg_primitives: current_options = self.primitive_options[agg_prim.name] if ignore_entity_for_primitive(current_options, child_entity): continue # if multiple input_types, only use first one for DFS input_types = agg_prim.input_types if type(input_types[0]) == list: input_types = input_types[0] def feature_filter(f): # Remove direct features of parent entity and features in relationship path. return (not _direct_of_entity(f, parent_entity)) \ and not self._feature_in_relationship_path(relationship_path, f) matching_inputs = self._get_matching_inputs( all_features, child_entity, new_max_depth, input_types, agg_prim, current_options, feature_filter=feature_filter) matching_inputs = filter_matches_by_options( matching_inputs, current_options) wheres = list(self.where_clauses[child_entity.id]) for matching_input in matching_inputs: if not check_stacking(agg_prim, matching_input): continue new_f = AggregationFeature(matching_input, parent_entity=parent_entity, relationship_path=relationship_path, primitive=agg_prim) self._handle_new_feature(new_f, all_features) # limit the stacking of where features # count up the the number of where features # in this feature and its dependencies feat_wheres = [] for f in matching_input: if isinstance(f, AggregationFeature) and f.where is not None: feat_wheres.append(f) for feat in f.get_dependencies(deep=True): if (isinstance(feat, AggregationFeature) and feat.where is not None): feat_wheres.append(feat) if len(feat_wheres) >= self.where_stacking_limit: continue # limits the aggregation feature by the given allowed feature types. if not any([ issubclass(type(agg_prim), type(primitive)) for primitive in self.where_primitives ]): continue for where in wheres: # limits the where feats so they are different than base feats base_names = [f.unique_name() for f in new_f.base_features] if any([ base_feat.unique_name() in base_names for base_feat in where.base_features ]): continue new_f = AggregationFeature( matching_input, parent_entity=parent_entity, relationship_path=relationship_path, where=where, primitive=agg_prim) self._handle_new_feature(new_f, all_features)