def __init__(self,
                 target_entity_id,
                 entityset,
                 agg_primitives=None,
                 trans_primitives=None,
                 where_primitives=None,
                 max_depth=2,
                 max_hlevel=2,
                 max_features=-1,
                 allowed_paths=None,
                 ignore_entities=None,
                 ignore_variables=None,
                 seed_features=None,
                 drop_contains=None,
                 drop_exact=None,
                 where_stacking_limit=1):
        # need to change max_depth and max_hlevel to None because DFs terminates when  <0
        if max_depth == -1:
            max_depth = None
        self.max_depth = max_depth

        if max_hlevel == -1:
            max_hlevel = None
        self.max_hlevel = max_hlevel

        self.max_features = max_features

        self.allowed_paths = allowed_paths
        if self.allowed_paths:
            self.allowed_paths = set()
            for path in allowed_paths:
                self.allowed_paths.add(tuple(path))

        if ignore_entities is None:
            self.ignore_entities = set()
        else:
            if not isinstance(ignore_entities, list):
                raise TypeError('ignore_entities must be a list')
            assert target_entity_id not in ignore_entities,\
                "Can't ignore target_entity!"
            self.ignore_entities = set(ignore_entities)

        self.ignore_variables = defaultdict(set)
        if ignore_variables is not None:
            for eid, vars in ignore_variables.items():
                self.ignore_variables[eid] = set(vars)
        self.target_entity_id = target_entity_id
        self.es = entityset

        if agg_primitives is None:
            agg_primitives = [
                ftypes.Sum, ftypes.Std, ftypes.Max, ftypes.Skew, ftypes.Min,
                ftypes.Mean, ftypes.Count, ftypes.PercentTrue, ftypes.NUnique,
                ftypes.Mode
            ]
        self.agg_primitives = []
        agg_prim_dict = ftypes.get_aggregation_primitives()
        for a in agg_primitives:
            if is_string(a):
                if a.lower() not in agg_prim_dict:
                    raise ValueError(
                        "Unknown aggregation primitive {}. ".format(a),
                        "Call ft.primitives.list_primitives() to get",
                        " a list of available primitives")
                a = agg_prim_dict[a.lower()]
            a = handle_primitive(a)
            self.agg_primitives.append(a)

        if trans_primitives is None:
            trans_primitives = [
                ftypes.Day, ftypes.Year, ftypes.Month, ftypes.Weekday,
                ftypes.Haversine, ftypes.NumWords, ftypes.NumCharacters
            ]  # ftypes.TimeSince
        self.trans_primitives = []
        trans_prim_dict = ftypes.get_transform_primitives()
        for t in trans_primitives:
            if is_string(t):
                if t.lower() not in trans_prim_dict:
                    raise ValueError(
                        "Unknown transform primitive {}. ".format(t),
                        "Call ft.primitives.list_primitives() to get",
                        " a list of available primitives")
                t = trans_prim_dict[t.lower()]
            t = handle_primitive(t)
            self.trans_primitives.append(t)

        if where_primitives is None:
            where_primitives = [ftypes.Count]
        self.where_primitives = []
        for p in where_primitives:
            if is_string(p):
                prim_obj = agg_prim_dict.get(p.lower(), None)
                if prim_obj is None:
                    raise ValueError(
                        "Unknown where primitive {}. ".format(p),
                        "Call ft.primitives.list_primitives() to get",
                        " a list of available primitives")
                p = prim_obj
            p = handle_primitive(p)
            self.where_primitives.append(p)

        self.seed_features = seed_features or []
        self.drop_exact = drop_exact or []
        self.drop_contains = drop_contains or []
        self.where_stacking_limit = where_stacking_limit
Example #2
0
    def __init__(self,
                 target_entity_id,
                 entityset,
                 filters=None,
                 agg_primitives=None,
                 trans_primitives=None,
                 where_primitives=None,
                 max_depth=None,
                 max_hlevel=None,
                 max_features=None,
                 allowed_paths=None,
                 ignore_entities=None,
                 ignore_variables=None,
                 seed_features=None,
                 drop_contains=None,
                 drop_exact=None,
                 where_stacking_limit=1):

        if max_depth is None:
            max_depth = 2
        elif max_depth == -1:
            max_depth = None
        self.max_depth = max_depth

        if max_hlevel is None:
            max_hlevel = 2
        elif max_hlevel == -1:
            max_hlevel = None
        self.max_hlevel = max_hlevel

        if max_features is None:
            max_features = -1
        self.max_features = max_features

        self.allowed_paths = allowed_paths
        if self.allowed_paths:
            self.allowed_paths = set()
            for path in allowed_paths:
                self.allowed_paths.add(tuple(path))

        if ignore_entities is None:
            self.ignore_entities = set()
        else:
            assert target_entity_id not in ignore_entities,\
                "Can't ignore target_entity!"
            self.ignore_entities = set(ignore_entities)

        self.ignore_variables = defaultdict(set)
        if ignore_variables is not None:
            for eid, vars in ignore_variables.items():
                self.ignore_variables[eid] = set(vars)
        self.target_entity_id = target_entity_id
        self.es = entityset

        if filters is None:
            filters = [TraverseUp(), LimitModeUniques()]

        self.post_instance_filters = []
        self.traversal_filters = []

        for f in filters:
            if f.filter_type == 'post_instance':
                self.post_instance_filters.append(f)
            elif f.filter_type == 'traversal':
                self.traversal_filters.append(f)
            else:
                raise NotImplementedError("Unknown filter type {}".format(
                    f.filter_type))

        if agg_primitives is None:
            agg_primitives = [
                ftypes.Sum, ftypes.Std, ftypes.Max, ftypes.Skew, ftypes.Min,
                ftypes.Mean, ftypes.Count, ftypes.PercentTrue, ftypes.NUnique,
                ftypes.Mode
            ]
        self.agg_primitives = []
        agg_prim_dict = ftypes.get_aggregation_primitives()
        for a in agg_primitives:
            if isinstance(a, basestring):
                if a.lower() not in agg_prim_dict:
                    raise ValueError(
                        "Unknown aggregation primitive {}. ".format(a),
                        "Call ft.primitives.list_primitives() to get",
                        " a list of available primitives")
                self.agg_primitives.append(agg_prim_dict[a.lower()])
            else:
                self.agg_primitives.append(a)

        if trans_primitives is None:
            trans_primitives = [
                ftypes.Day, ftypes.Year, ftypes.Month, ftypes.Weekday,
                ftypes.Haversine, ftypes.NumWords, ftypes.NumCharacters
            ]  # ftypes.TimeSince
        self.trans_primitives = []
        trans_prim_dict = ftypes.get_transform_primitives()
        for t in trans_primitives:
            if isinstance(t, basestring):
                if t.lower() not in trans_prim_dict:
                    raise ValueError(
                        "Unknown transform primitive {}. ".format(t),
                        "Call ft.primitives.list_primitives() to get",
                        " a list of available primitives")
                self.trans_primitives.append(trans_prim_dict[t.lower()])
            else:
                self.trans_primitives.append(t)

        if where_primitives is None:
            where_primitives = [ftypes.Count]
        self.where_primitives = []
        for p in where_primitives:
            if isinstance(p, basestring):
                prim_obj = agg_prim_dict.get(p.lower(), None)
                if prim_obj is None:
                    raise ValueError(
                        "Unknown where primitive {}. ".format(p),
                        "Call ft.primitives.list_primitives() to get",
                        " a list of available primitives")

                self.where_primitives.append(prim_obj)
            else:
                self.where_primitives.append(p)

        self.seed_features = seed_features or []
        self.drop_exact = drop_exact or []
        self.drop_contains = drop_contains or []
        self.where_stacking_limit = where_stacking_limit
    def __init__(self,
                 target_entity_id,
                 entityset,
                 agg_primitives=None,
                 trans_primitives=None,
                 where_primitives=None,
                 max_depth=2,
                 max_hlevel=2,
                 max_features=-1,
                 allowed_paths=None,
                 ignore_entities=None,
                 ignore_variables=None,
                 seed_features=None,
                 drop_contains=None,
                 drop_exact=None,
                 where_stacking_limit=1):
        # need to change max_depth and max_hlevel to None because DFs terminates when  <0
        if max_depth == -1:
            max_depth = None
        self.max_depth = max_depth

        if max_hlevel == -1:
            max_hlevel = None
        self.max_hlevel = max_hlevel

        self.max_features = max_features

        self.allowed_paths = allowed_paths
        if self.allowed_paths:
            self.allowed_paths = set()
            for path in allowed_paths:
                self.allowed_paths.add(tuple(path))

        if ignore_entities is None:
            self.ignore_entities = set()
        else:
            if not isinstance(ignore_entities, list):
                raise TypeError('ignore_entities must be a list')
            assert target_entity_id not in ignore_entities,\
                "Can't ignore target_entity!"
            self.ignore_entities = set(ignore_entities)

        self.ignore_variables = defaultdict(set)
        if ignore_variables is not None:
            for eid, vars in ignore_variables.items():
                self.ignore_variables[eid] = set(vars)
        self.target_entity_id = target_entity_id
        self.es = entityset

        if agg_primitives is None:
            agg_primitives = [ftypes.Sum, ftypes.Std, ftypes.Max, ftypes.Skew,
                              ftypes.Min, ftypes.Mean, ftypes.Count,
                              ftypes.PercentTrue, ftypes.NUnique, ftypes.Mode]
        self.agg_primitives = []
        agg_prim_dict = ftypes.get_aggregation_primitives()
        for a in agg_primitives:
            if is_string(a):
                if a.lower() not in agg_prim_dict:
                    raise ValueError("Unknown aggregation primitive {}. ".format(a),
                                     "Call ft.primitives.list_primitives() to get",
                                     " a list of available primitives")
                a = agg_prim_dict[a.lower()]

            self.agg_primitives.append(a)

        if trans_primitives is None:
            trans_primitives = [ftypes.Day, ftypes.Year, ftypes.Month,
                                ftypes.Weekday, ftypes.Haversine,
                                ftypes.NumWords, ftypes.NumCharacters]  # ftypes.TimeSince
        self.trans_primitives = []
        trans_prim_dict = ftypes.get_transform_primitives()
        for t in trans_primitives:
            if is_string(t):
                if t.lower() not in trans_prim_dict:
                    raise ValueError("Unknown transform primitive {}. ".format(t),
                                     "Call ft.primitives.list_primitives() to get",
                                     " a list of available primitives")
                t = trans_prim_dict[t.lower()]

            self.trans_primitives.append(t)

        if where_primitives is None:
            where_primitives = [ftypes.Count]
        self.where_primitives = []
        for p in where_primitives:
            if is_string(p):
                prim_obj = agg_prim_dict.get(p.lower(), None)
                if prim_obj is None:
                    raise ValueError("Unknown where primitive {}. ".format(p),
                                     "Call ft.primitives.list_primitives() to get",
                                     " a list of available primitives")
                p = prim_obj

            self.where_primitives.append(p)

        self.seed_features = seed_features or []
        self.drop_exact = drop_exact or []
        self.drop_contains = drop_contains or []
        self.where_stacking_limit = where_stacking_limit
    def __init__(self,
                 target_entity_id,
                 entityset,
                 filters=None,
                 agg_primitives=None,
                 trans_primitives=None,
                 where_primitives=None,
                 max_depth=None,
                 max_hlevel=None,
                 max_features=None,
                 allowed_paths=None,
                 ignore_entities=None,
                 ignore_variables=None,
                 seed_features=None,
                 drop_contains=None,
                 drop_exact=None,
                 where_stacking_limit=1):

        if max_depth is None:
            max_depth = 2
        elif max_depth == -1:
            max_depth = None
        self.max_depth = max_depth

        if max_hlevel is None:
            max_hlevel = 2
        elif max_hlevel == -1:
            max_hlevel = None
        self.max_hlevel = max_hlevel

        if max_features is None:
            max_features = -1
        self.max_features = max_features

        self.allowed_paths = allowed_paths
        if self.allowed_paths:
            self.allowed_paths = set()
            for path in allowed_paths:
                self.allowed_paths.add(tuple(path))

        if ignore_entities is None:
            self.ignore_entities = set()
        else:
            assert target_entity_id not in ignore_entities,\
                "Can't ignore target_entity!"
            self.ignore_entities = set(ignore_entities)

        self.ignore_variables = defaultdict(set)
        if ignore_variables is not None:
            for eid, vars in ignore_variables.items():
                self.ignore_variables[eid] = set(vars)
        self.target_entity_id = target_entity_id
        self.es = entityset

        if filters is None:
            filters = [TraverseUp(),
                       LimitModeUniques()]

        self.post_instance_filters = []
        self.traversal_filters = []

        for f in filters:
            if f.filter_type == 'post_instance':
                self.post_instance_filters.append(f)
            elif f.filter_type == 'traversal':
                self.traversal_filters.append(f)
            else:
                raise NotImplementedError("Unknown filter type {}"
                                          .format(f.filter_type))

        if agg_primitives is None:
            agg_primitives = [ftypes.Sum, ftypes.Std, ftypes.Max, ftypes.Skew,
                              ftypes.Min, ftypes.Mean, ftypes.Count,
                              ftypes.PercentTrue, ftypes.NUnique, ftypes.Mode]
        self.agg_primitives = []
        agg_prim_dict = ftypes.get_aggregation_primitives()
        for a in agg_primitives:
            if isinstance(a, basestring):
                if a.lower() not in agg_prim_dict:
                    raise ValueError("Unknown aggregation primitive {}. ".format(a),
                                     "Call ft.primitives.list_primitives() to get",
                                     " a list of available primitives")
                self.agg_primitives.append(agg_prim_dict[a.lower()])
            else:
                self.agg_primitives.append(a)

        if trans_primitives is None:
            trans_primitives = [ftypes.Day, ftypes.Year, ftypes.Month,
                                ftypes.Weekday, ftypes.Haversine,
                                ftypes.NumWords, ftypes.NumCharacters]  # ftypes.TimeSince
        self.trans_primitives = []
        trans_prim_dict = ftypes.get_transform_primitives()
        for t in trans_primitives:
            if isinstance(t, basestring):
                if t.lower() not in trans_prim_dict:
                    raise ValueError("Unknown transform primitive {}. ".format(t),
                                     "Call ft.primitives.list_primitives() to get",
                                     " a list of available primitives")
                self.trans_primitives.append(trans_prim_dict[t.lower()])
            else:
                self.trans_primitives.append(t)

        if where_primitives is None:
            where_primitives = [ftypes.Count]
        self.where_primitives = []
        for p in where_primitives:
            if isinstance(p, basestring):
                prim_obj = agg_prim_dict.get(p.lower(), None)
                if prim_obj is None:
                    raise ValueError("Unknown where primitive {}. ".format(p),
                                     "Call ft.primitives.list_primitives() to get",
                                     " a list of available primitives")

                self.where_primitives.append(prim_obj)
            else:
                self.where_primitives.append(p)

        self.seed_features = seed_features or []
        self.drop_exact = drop_exact or []
        self.drop_contains = drop_contains or []
        self.where_stacking_limit = where_stacking_limit