def check_trans_primitive(primitive): trans_prim_dict = ftypes.get_transform_primitives() if is_string(primitive): if primitive.lower() not in trans_prim_dict: raise ValueError( "Unknown transform primitive {}. ".format(primitive), "Call ft.primitives.list_primitives() to get", " a list of available primitives") primitive = trans_prim_dict[primitive.lower()] primitive = handle_primitive(primitive) if not isinstance(primitive, TransformPrimitive): raise ValueError("Primitive {} in trans_primitives or " "groupby_trans_primitives is not a transform " "primitive".format(type(primitive))) return primitive
def __init__(self, target_entity_id, entityset, agg_primitives=None, trans_primitives=None, where_primitives=None, max_depth=2, max_hlevel=2, max_features=-1, allowed_paths=None, ignore_entities=None, ignore_variables=None, seed_features=None, drop_contains=None, drop_exact=None, where_stacking_limit=1): # need to change max_depth and max_hlevel to None because DFs terminates when <0 if max_depth == -1: max_depth = None self.max_depth = max_depth if max_hlevel == -1: max_hlevel = None self.max_hlevel = max_hlevel self.max_features = max_features self.allowed_paths = allowed_paths if self.allowed_paths: self.allowed_paths = set() for path in allowed_paths: self.allowed_paths.add(tuple(path)) if ignore_entities is None: self.ignore_entities = set() else: if not isinstance(ignore_entities, list): raise TypeError('ignore_entities must be a list') assert target_entity_id not in ignore_entities,\ "Can't ignore target_entity!" self.ignore_entities = set(ignore_entities) self.ignore_variables = defaultdict(set) if ignore_variables is not None: for eid, vars in ignore_variables.items(): self.ignore_variables[eid] = set(vars) self.target_entity_id = target_entity_id self.es = entityset if agg_primitives is None: agg_primitives = [ ftypes.Sum, ftypes.Std, ftypes.Max, ftypes.Skew, ftypes.Min, ftypes.Mean, ftypes.Count, ftypes.PercentTrue, ftypes.NUnique, ftypes.Mode ] self.agg_primitives = [] agg_prim_dict = ftypes.get_aggregation_primitives() for a in agg_primitives: if is_string(a): if a.lower() not in agg_prim_dict: raise ValueError( "Unknown aggregation primitive {}. ".format(a), "Call ft.primitives.list_primitives() to get", " a list of available primitives") a = agg_prim_dict[a.lower()] a = handle_primitive(a) self.agg_primitives.append(a) if trans_primitives is None: trans_primitives = [ ftypes.Day, ftypes.Year, ftypes.Month, ftypes.Weekday, ftypes.Haversine, ftypes.NumWords, ftypes.NumCharacters ] # ftypes.TimeSince self.trans_primitives = [] trans_prim_dict = ftypes.get_transform_primitives() for t in trans_primitives: if is_string(t): if t.lower() not in trans_prim_dict: raise ValueError( "Unknown transform primitive {}. ".format(t), "Call ft.primitives.list_primitives() to get", " a list of available primitives") t = trans_prim_dict[t.lower()] t = handle_primitive(t) self.trans_primitives.append(t) if where_primitives is None: where_primitives = [ftypes.Count] self.where_primitives = [] for p in where_primitives: if is_string(p): prim_obj = agg_prim_dict.get(p.lower(), None) if prim_obj is None: raise ValueError( "Unknown where primitive {}. ".format(p), "Call ft.primitives.list_primitives() to get", " a list of available primitives") p = prim_obj p = handle_primitive(p) self.where_primitives.append(p) self.seed_features = seed_features or [] self.drop_exact = drop_exact or [] self.drop_contains = drop_contains or [] self.where_stacking_limit = where_stacking_limit
def __init__(self, target_entity_id, entityset, agg_primitives=None, trans_primitives=None, where_primitives=None, max_depth=2, max_hlevel=2, max_features=-1, allowed_paths=None, ignore_entities=None, ignore_variables=None, seed_features=None, drop_contains=None, drop_exact=None, where_stacking_limit=1): # need to change max_depth and max_hlevel to None because DFs terminates when <0 if max_depth == -1: max_depth = None self.max_depth = max_depth if max_hlevel == -1: max_hlevel = None self.max_hlevel = max_hlevel self.max_features = max_features self.allowed_paths = allowed_paths if self.allowed_paths: self.allowed_paths = set() for path in allowed_paths: self.allowed_paths.add(tuple(path)) if ignore_entities is None: self.ignore_entities = set() else: if not isinstance(ignore_entities, list): raise TypeError('ignore_entities must be a list') assert target_entity_id not in ignore_entities,\ "Can't ignore target_entity!" self.ignore_entities = set(ignore_entities) self.ignore_variables = defaultdict(set) if ignore_variables is not None: for eid, vars in ignore_variables.items(): self.ignore_variables[eid] = set(vars) self.target_entity_id = target_entity_id self.es = entityset if agg_primitives is None: agg_primitives = [ftypes.Sum, ftypes.Std, ftypes.Max, ftypes.Skew, ftypes.Min, ftypes.Mean, ftypes.Count, ftypes.PercentTrue, ftypes.NUnique, ftypes.Mode] self.agg_primitives = [] agg_prim_dict = ftypes.get_aggregation_primitives() for a in agg_primitives: if is_string(a): if a.lower() not in agg_prim_dict: raise ValueError("Unknown aggregation primitive {}. ".format(a), "Call ft.primitives.list_primitives() to get", " a list of available primitives") a = agg_prim_dict[a.lower()] self.agg_primitives.append(a) if trans_primitives is None: trans_primitives = [ftypes.Day, ftypes.Year, ftypes.Month, ftypes.Weekday, ftypes.Haversine, ftypes.NumWords, ftypes.NumCharacters] # ftypes.TimeSince self.trans_primitives = [] trans_prim_dict = ftypes.get_transform_primitives() for t in trans_primitives: if is_string(t): if t.lower() not in trans_prim_dict: raise ValueError("Unknown transform primitive {}. ".format(t), "Call ft.primitives.list_primitives() to get", " a list of available primitives") t = trans_prim_dict[t.lower()] self.trans_primitives.append(t) if where_primitives is None: where_primitives = [ftypes.Count] self.where_primitives = [] for p in where_primitives: if is_string(p): prim_obj = agg_prim_dict.get(p.lower(), None) if prim_obj is None: raise ValueError("Unknown where primitive {}. ".format(p), "Call ft.primitives.list_primitives() to get", " a list of available primitives") p = prim_obj self.where_primitives.append(p) self.seed_features = seed_features or [] self.drop_exact = drop_exact or [] self.drop_contains = drop_contains or [] self.where_stacking_limit = where_stacking_limit
def __init__(self, target_entity_id, entityset, filters=None, agg_primitives=None, trans_primitives=None, where_primitives=None, max_depth=None, max_hlevel=None, max_features=None, allowed_paths=None, ignore_entities=None, ignore_variables=None, seed_features=None, drop_contains=None, drop_exact=None, where_stacking_limit=1): if max_depth is None: max_depth = 2 elif max_depth == -1: max_depth = None self.max_depth = max_depth if max_hlevel is None: max_hlevel = 2 elif max_hlevel == -1: max_hlevel = None self.max_hlevel = max_hlevel if max_features is None: max_features = -1 self.max_features = max_features self.allowed_paths = allowed_paths if self.allowed_paths: self.allowed_paths = set() for path in allowed_paths: self.allowed_paths.add(tuple(path)) if ignore_entities is None: self.ignore_entities = set() else: assert target_entity_id not in ignore_entities,\ "Can't ignore target_entity!" self.ignore_entities = set(ignore_entities) self.ignore_variables = defaultdict(set) if ignore_variables is not None: for eid, vars in ignore_variables.items(): self.ignore_variables[eid] = set(vars) self.target_entity_id = target_entity_id self.es = entityset if filters is None: filters = [TraverseUp(), LimitModeUniques()] self.post_instance_filters = [] self.traversal_filters = [] for f in filters: if f.filter_type == 'post_instance': self.post_instance_filters.append(f) elif f.filter_type == 'traversal': self.traversal_filters.append(f) else: raise NotImplementedError("Unknown filter type {}".format( f.filter_type)) if agg_primitives is None: agg_primitives = [ ftypes.Sum, ftypes.Std, ftypes.Max, ftypes.Skew, ftypes.Min, ftypes.Mean, ftypes.Count, ftypes.PercentTrue, ftypes.NUnique, ftypes.Mode ] self.agg_primitives = [] agg_prim_dict = ftypes.get_aggregation_primitives() for a in agg_primitives: if isinstance(a, basestring): if a.lower() not in agg_prim_dict: raise ValueError( "Unknown aggregation primitive {}. ".format(a), "Call ft.primitives.list_primitives() to get", " a list of available primitives") self.agg_primitives.append(agg_prim_dict[a.lower()]) else: self.agg_primitives.append(a) if trans_primitives is None: trans_primitives = [ ftypes.Day, ftypes.Year, ftypes.Month, ftypes.Weekday, ftypes.Haversine, ftypes.NumWords, ftypes.NumCharacters ] # ftypes.TimeSince self.trans_primitives = [] trans_prim_dict = ftypes.get_transform_primitives() for t in trans_primitives: if isinstance(t, basestring): if t.lower() not in trans_prim_dict: raise ValueError( "Unknown transform primitive {}. ".format(t), "Call ft.primitives.list_primitives() to get", " a list of available primitives") self.trans_primitives.append(trans_prim_dict[t.lower()]) else: self.trans_primitives.append(t) if where_primitives is None: where_primitives = [ftypes.Count] self.where_primitives = [] for p in where_primitives: if isinstance(p, basestring): prim_obj = agg_prim_dict.get(p.lower(), None) if prim_obj is None: raise ValueError( "Unknown where primitive {}. ".format(p), "Call ft.primitives.list_primitives() to get", " a list of available primitives") self.where_primitives.append(prim_obj) else: self.where_primitives.append(p) self.seed_features = seed_features or [] self.drop_exact = drop_exact or [] self.drop_contains = drop_contains or [] self.where_stacking_limit = where_stacking_limit
def __init__(self, target_entity_id, entityset, filters=None, agg_primitives=None, trans_primitives=None, where_primitives=None, max_depth=None, max_hlevel=None, max_features=None, allowed_paths=None, ignore_entities=None, ignore_variables=None, seed_features=None, drop_contains=None, drop_exact=None, where_stacking_limit=1): if max_depth is None: max_depth = 2 elif max_depth == -1: max_depth = None self.max_depth = max_depth if max_hlevel is None: max_hlevel = 2 elif max_hlevel == -1: max_hlevel = None self.max_hlevel = max_hlevel if max_features is None: max_features = -1 self.max_features = max_features self.allowed_paths = allowed_paths if self.allowed_paths: self.allowed_paths = set() for path in allowed_paths: self.allowed_paths.add(tuple(path)) if ignore_entities is None: self.ignore_entities = set() else: assert target_entity_id not in ignore_entities,\ "Can't ignore target_entity!" self.ignore_entities = set(ignore_entities) self.ignore_variables = defaultdict(set) if ignore_variables is not None: for eid, vars in ignore_variables.items(): self.ignore_variables[eid] = set(vars) self.target_entity_id = target_entity_id self.es = entityset if filters is None: filters = [TraverseUp(), LimitModeUniques()] self.post_instance_filters = [] self.traversal_filters = [] for f in filters: if f.filter_type == 'post_instance': self.post_instance_filters.append(f) elif f.filter_type == 'traversal': self.traversal_filters.append(f) else: raise NotImplementedError("Unknown filter type {}" .format(f.filter_type)) if agg_primitives is None: agg_primitives = [ftypes.Sum, ftypes.Std, ftypes.Max, ftypes.Skew, ftypes.Min, ftypes.Mean, ftypes.Count, ftypes.PercentTrue, ftypes.NUnique, ftypes.Mode] self.agg_primitives = [] agg_prim_dict = ftypes.get_aggregation_primitives() for a in agg_primitives: if isinstance(a, basestring): if a.lower() not in agg_prim_dict: raise ValueError("Unknown aggregation primitive {}. ".format(a), "Call ft.primitives.list_primitives() to get", " a list of available primitives") self.agg_primitives.append(agg_prim_dict[a.lower()]) else: self.agg_primitives.append(a) if trans_primitives is None: trans_primitives = [ftypes.Day, ftypes.Year, ftypes.Month, ftypes.Weekday, ftypes.Haversine, ftypes.NumWords, ftypes.NumCharacters] # ftypes.TimeSince self.trans_primitives = [] trans_prim_dict = ftypes.get_transform_primitives() for t in trans_primitives: if isinstance(t, basestring): if t.lower() not in trans_prim_dict: raise ValueError("Unknown transform primitive {}. ".format(t), "Call ft.primitives.list_primitives() to get", " a list of available primitives") self.trans_primitives.append(trans_prim_dict[t.lower()]) else: self.trans_primitives.append(t) if where_primitives is None: where_primitives = [ftypes.Count] self.where_primitives = [] for p in where_primitives: if isinstance(p, basestring): prim_obj = agg_prim_dict.get(p.lower(), None) if prim_obj is None: raise ValueError("Unknown where primitive {}. ".format(p), "Call ft.primitives.list_primitives() to get", " a list of available primitives") self.where_primitives.append(prim_obj) else: self.where_primitives.append(p) self.seed_features = seed_features or [] self.drop_exact = drop_exact or [] self.drop_contains = drop_contains or [] self.where_stacking_limit = where_stacking_limit