def _build_agg_features(self, all_features, parent_entity, child_entity, max_depth, relationship_path): new_max_depth = None if max_depth is not None: new_max_depth = max_depth - 1 for agg_prim in self.agg_primitives: current_options = self.primitive_options[agg_prim.name] if ignore_entity_for_primitive(current_options, child_entity): continue # if multiple input_types, only use first one for DFS input_types = agg_prim.input_types if type(input_types[0]) == list: input_types = input_types[0] def feature_filter(f): # Remove direct features of parent entity and features in relationship path. return (not _direct_of_entity(f, parent_entity)) \ and not self._feature_in_relationship_path(relationship_path, f) matching_inputs = self._get_matching_inputs( all_features, child_entity, new_max_depth, input_types, agg_prim, current_options, feature_filter=feature_filter) matching_inputs = filter_matches_by_options( matching_inputs, current_options) wheres = list(self.where_clauses[child_entity.id]) for matching_input in matching_inputs: if not check_stacking(agg_prim, matching_input): continue new_f = AggregationFeature(matching_input, parent_entity=parent_entity, relationship_path=relationship_path, primitive=agg_prim) self._handle_new_feature(new_f, all_features) # limit the stacking of where features # count up the the number of where features # in this feature and its dependencies feat_wheres = [] for f in matching_input: if isinstance(f, AggregationFeature) and f.where is not None: feat_wheres.append(f) for feat in f.get_dependencies(deep=True): if (isinstance(feat, AggregationFeature) and feat.where is not None): feat_wheres.append(feat) if len(feat_wheres) >= self.where_stacking_limit: continue # limits the aggregation feature by the given allowed feature types. if not any([ issubclass(type(agg_prim), type(primitive)) for primitive in self.where_primitives ]): continue for where in wheres: # limits the where feats so they are different than base feats base_names = [f.unique_name() for f in new_f.base_features] if any([ base_feat.unique_name() in base_names for base_feat in where.base_features ]): continue new_f = AggregationFeature( matching_input, parent_entity=parent_entity, relationship_path=relationship_path, where=where, primitive=agg_prim) self._handle_new_feature(new_f, all_features)
def _build_transform_features(self, all_features, entity, max_depth=0, require_direct_input=False): """Creates trans_features for all the variables in an entity Args: all_features (dict[:class:`.Entity`.id:dict->[str->:class:`BaseFeature`]]): Dict containing a dict for each entity. Each nested dict has features as values with their ids as keys entity (Entity): Entity to calculate features for. """ new_max_depth = None if max_depth is not None: new_max_depth = max_depth - 1 for trans_prim in self.trans_primitives: current_options = self.primitive_options[trans_prim.name] if ignore_entity_for_primitive(current_options, entity): continue # if multiple input_types, only use first one for DFS input_types = trans_prim.input_types if type(input_types[0]) == list: input_types = input_types[0] matching_inputs = self._get_matching_inputs( all_features, entity, new_max_depth, input_types, trans_prim, current_options, require_direct_input=require_direct_input) for matching_input in matching_inputs: if all(bf.number_output_features == 1 for bf in matching_input): new_f = TransformFeature(matching_input, primitive=trans_prim) self._handle_new_feature(all_features=all_features, new_feature=new_f) for groupby_prim in self.groupby_trans_primitives: current_options = self.primitive_options[groupby_prim.name] if ignore_entity_for_primitive(current_options, entity, groupby=True): continue input_types = groupby_prim.input_types[:] # if multiple input_types, only use first one for DFS if type(input_types[0]) == list: input_types = input_types[0] matching_inputs = self._get_matching_inputs( all_features, entity, new_max_depth, input_types, groupby_prim, current_options) # get columns to use as groupbys, use IDs as default unless other groupbys specified if any([ 'include_groupby_variables' in option and entity.id in option['include_groupby_variables'] for option in current_options ]): default_type = variable_types.PandasTypes._all else: default_type = set([Id]) groupby_matches = self._features_by_type( all_features=all_features, entity=entity, max_depth=new_max_depth, variable_type=default_type) groupby_matches = filter_groupby_matches_by_options( groupby_matches, current_options) # If require_direct_input, require a DirectFeature in input or as a # groupby, and don't create features of inputs/groupbys which are # all direct features with the same relationship path for matching_input in matching_inputs: if all(bf.number_output_features == 1 for bf in matching_input): for groupby in groupby_matches: if require_direct_input and ( _all_direct_and_same_path(matching_input + (groupby, )) or not any([ isinstance(feature, DirectFeature) for feature in (matching_input + (groupby, )) ])): continue new_f = GroupByTransformFeature(list(matching_input), groupby=groupby[0], primitive=groupby_prim) self._handle_new_feature(all_features=all_features, new_feature=new_f)