Esempio n. 1
0
    def __init__(self, tree, fields, objective_field=None):

        self.fields = fields
        self.objective_id = objective_field
        self.output = tree['output']

        if tree['predicate'] is True:
            self.predicate = True
        else:
            self.predicate = Predicate(tree['predicate']['operator'],
                                       tree['predicate']['field'],
                                       tree['predicate']['value'],
                                       tree['predicate'].get('term', None))

        self.id = tree.get('id')
        children = []
        if 'children' in tree:
            for child in tree['children']:
                children.append(self.__class__( \
                    child,
                    self.fields,
                    objective_field=objective_field))
        self.children = children
        self.count = tree['count']
        self.g_sum = tree.get('g_sum')
        self.h_sum = tree.get('h_sum')
Esempio n. 2
0
 def __init__(self, predicates_list):
     self.predicates = []
     for predicate in predicates_list:
         if predicate == True:
             self.predicates.append(True)
         else:
             self.predicates.append(
                 Predicate(predicate.get('op'), predicate.get('field'),
                           predicate.get('value'), predicate.get('term')))
Esempio n. 3
0
 def __init__(self, tree, offsets):
     predicate = get_predicate(tree)
     if isinstance(predicate, bool):
         self.predicate = predicate
     else:
         [operator, field, value, term, _] = predicate
         self.predicate = Predicate(INVERSE_OP[operator], field, value,
                                    term)
     node = get_node(tree)
     for attr in offsets:
         if attr not in ["children#", "children"]:
             setattr(self, attr, node[offsets[attr]])
     children = [] if node[offsets["children#"]] == 0 else \
         node[offsets["children"]]
     setattr(self, "children", children)
Esempio n. 4
0
    def __init__(self, tree, fields, objective_field=None):

        self.fields = fields
        self.objective_field = objective_field
        self.output = tree['output']

        if tree['predicate'] is True:
            self.predicate = True
        else:
            self.predicate = Predicate(tree['predicate']['operator'],
                                       tree['predicate']['field'],
                                       tree['predicate']['value'],
                                       tree['predicate'].get('term', None))

        children = []
        if 'children' in tree:
            for child in tree['children']:
                children.append(Tree(child, self.fields, objective_field))

        self.children = children
        self.count = tree['count']
        self.confidence = tree.get('confidence', None)
        if 'distribution' in tree:
            self.distribution = tree['distribution']
        elif ('objective_summary' in tree):
            summary = tree['objective_summary']
            if 'bins' in summary:
                self.distribution = summary['bins']
            elif 'counts' in summary:
                self.distribution = summary['counts']
            elif 'categories' in summary:
                self.distribution = summary['categories']
        else:
            summary = self.fields[self.objective_field]['summary']
            if 'bins' in summary:
                self.distribution = summary['bins']
            elif 'counts' in summary:
                self.distribution = summary['counts']
            elif 'categories' in summary:
                self.distribution = summary['categories']
Esempio n. 5
0
    def depth_first_search(tree, path):
        """Search for leafs' values and instances

        """
        node = get_node(tree)
        predicate = get_predicate(tree)
        if isinstance(predicate, list):
            [operation, field, value, term, _] = predicate
            operator = INVERSE_OP[operation]
            path.append(Predicate(operator, field, value, term))
            if term:
                if field not in model.terms:
                    model.terms[field] = []
                if term not in model.terms[field]:
                    model.terms[field].append(term)

        if node[offsets["children#"]] == 0:
            add_to_groups(
                groups, node[offsets["output"]], path, node[offsets["count"]],
                node[offsets["confidence"]],
                gini_impurity(node[offsets["distribution"]],
                              node[offsets["count"]]))
            return node[offsets["count"]]
        children = node[offsets["children"]][:]
        children.reverse()

        children_sum = 0
        for child in children:
            children_sum += depth_first_search(child, path[:])
        if children_sum < node[offsets["count"]]:
            add_to_groups(
                groups, node[offsets["output"]], path,
                node[offsets["count"]] - children_sum,
                node[offsets["confidence"]],
                gini_impurity(node[offsets["distribution"]],
                              node[offsets["count"]]))
        return node[offsets["count"]]
Esempio n. 6
0
    def __init__(self,
                 tree,
                 fields,
                 objective_field=None,
                 root_distribution=None,
                 parent_id=None,
                 ids_map=None,
                 subtree=True):

        self.fields = fields
        self.objective_id = objective_field
        self.output = tree['output']

        if tree['predicate'] is True:
            self.predicate = True
        else:
            self.predicate = Predicate(tree['predicate']['operator'],
                                       tree['predicate']['field'],
                                       tree['predicate']['value'],
                                       tree['predicate'].get('term', None))
        if 'id' in tree:
            self.id = tree['id']
            self.parent_id = parent_id
            if isinstance(ids_map, dict):
                ids_map[self.id] = self
        else:
            self.id = None

        children = []
        if 'children' in tree:
            for child in tree['children']:
                children.append(
                    Tree(child,
                         self.fields,
                         objective_field=objective_field,
                         parent_id=self.id,
                         ids_map=ids_map,
                         subtree=subtree))

        self.children = children
        self.regression = self.is_regression()
        self.count = tree['count']
        self.confidence = tree.get('confidence', None)
        if 'distribution' in tree:
            self.distribution = tree['distribution']
        elif 'objective_summary' in tree:
            summary = tree['objective_summary']
            if 'bins' in summary:
                self.distribution = summary['bins']
            elif 'counts' in summary:
                self.distribution = summary['counts']
            elif 'categories' in summary:
                self.distribution = summary['categories']
        else:
            summary = root_distribution
            if 'bins' in summary:
                self.distribution = summary['bins']
            elif 'counts' in summary:
                self.distribution = summary['counts']
            elif 'categories' in summary:
                self.distribution = summary['categories']
Esempio n. 7
0
    def __init__(self,
                 tree,
                 fields,
                 objective_field=None,
                 root_distribution=None,
                 parent_id=None,
                 ids_map=None,
                 subtree=True,
                 tree_info=None):

        self.fields = fields
        self.objective_id = objective_field
        self.output = tree['output']

        if tree['predicate'] is True:
            self.predicate = True
        else:
            self.predicate = Predicate(tree['predicate']['operator'],
                                       tree['predicate']['field'],
                                       tree['predicate']['value'],
                                       tree['predicate'].get('term', None))
        if 'id' in tree:
            self.id = tree['id']
            self.parent_id = parent_id
            if isinstance(ids_map, dict):
                ids_map[self.id] = self
        else:
            self.id = None

        children = []
        if 'children' in tree:
            for child in tree['children']:
                children.append(self.__class__( \
                    child,
                    self.fields,
                    objective_field=objective_field,
                    parent_id=self.id,
                    ids_map=ids_map,
                    subtree=subtree,
                    tree_info=tree_info))

        self.children = children
        self.regression = self.is_regression()
        tree_info['regression'] = (self.regression
                                   and tree_info.get('regression', True))
        self.count = tree['count']
        self.confidence = tree.get('confidence', None)
        self.distribution = None
        self.max = None
        self.min = None
        self.weighted = False
        summary = None
        if 'distribution' in tree:
            self.distribution = tree['distribution']
        elif 'objective_summary' in tree:
            summary = tree['objective_summary']
            (self.distribution_unit,
             self.distribution) = extract_distribution(summary)
            if 'weighted_objective_summary' in tree:
                summary = tree['weighted_objective_summary']
                (self.weighted_distribution_unit,
                 self.weighted_distribution) = extract_distribution(summary)
                self.weight = tree['weight']
                self.weighted = True
        else:
            summary = root_distribution
            (self.distribution_unit,
             self.distribution) = extract_distribution(summary)
        if self.regression:
            tree_info['max_bins'] = max(tree_info.get('max_bins', 0),
                                        len(self.distribution))
            self.median = None
            if summary:
                self.median = summary.get('median')
            if not self.median:
                self.median = dist_median(self.distribution, self.count)
            self.max = summary.get('maximum') or \
                max([value for [value, _] in self.distribution])
            self.min = summary.get('minimum') or \
                min([value for [value, _] in self.distribution])
        self.impurity = None
        if not self.regression and self.distribution is not None:
            self.impurity = self.gini_impurity()