def __init__(self, tree, fields, objective_field=None): self.fields = fields self.objective_id = objective_field self.output = tree['output'] if tree['predicate'] is True: self.predicate = True else: self.predicate = Predicate(tree['predicate']['operator'], tree['predicate']['field'], tree['predicate']['value'], tree['predicate'].get('term', None)) self.id = tree.get('id') children = [] if 'children' in tree: for child in tree['children']: children.append(self.__class__( \ child, self.fields, objective_field=objective_field)) self.children = children self.count = tree['count'] self.g_sum = tree.get('g_sum') self.h_sum = tree.get('h_sum')
def __init__(self, predicates_list): self.predicates = [] for predicate in predicates_list: if predicate == True: self.predicates.append(True) else: self.predicates.append( Predicate(predicate.get('op'), predicate.get('field'), predicate.get('value'), predicate.get('term')))
def __init__(self, tree, offsets): predicate = get_predicate(tree) if isinstance(predicate, bool): self.predicate = predicate else: [operator, field, value, term, _] = predicate self.predicate = Predicate(INVERSE_OP[operator], field, value, term) node = get_node(tree) for attr in offsets: if attr not in ["children#", "children"]: setattr(self, attr, node[offsets[attr]]) children = [] if node[offsets["children#"]] == 0 else \ node[offsets["children"]] setattr(self, "children", children)
def __init__(self, tree, fields, objective_field=None): self.fields = fields self.objective_field = objective_field self.output = tree['output'] if tree['predicate'] is True: self.predicate = True else: self.predicate = Predicate(tree['predicate']['operator'], tree['predicate']['field'], tree['predicate']['value'], tree['predicate'].get('term', None)) children = [] if 'children' in tree: for child in tree['children']: children.append(Tree(child, self.fields, objective_field)) self.children = children self.count = tree['count'] self.confidence = tree.get('confidence', None) if 'distribution' in tree: self.distribution = tree['distribution'] elif ('objective_summary' in tree): summary = tree['objective_summary'] if 'bins' in summary: self.distribution = summary['bins'] elif 'counts' in summary: self.distribution = summary['counts'] elif 'categories' in summary: self.distribution = summary['categories'] else: summary = self.fields[self.objective_field]['summary'] if 'bins' in summary: self.distribution = summary['bins'] elif 'counts' in summary: self.distribution = summary['counts'] elif 'categories' in summary: self.distribution = summary['categories']
def depth_first_search(tree, path): """Search for leafs' values and instances """ node = get_node(tree) predicate = get_predicate(tree) if isinstance(predicate, list): [operation, field, value, term, _] = predicate operator = INVERSE_OP[operation] path.append(Predicate(operator, field, value, term)) if term: if field not in model.terms: model.terms[field] = [] if term not in model.terms[field]: model.terms[field].append(term) if node[offsets["children#"]] == 0: add_to_groups( groups, node[offsets["output"]], path, node[offsets["count"]], node[offsets["confidence"]], gini_impurity(node[offsets["distribution"]], node[offsets["count"]])) return node[offsets["count"]] children = node[offsets["children"]][:] children.reverse() children_sum = 0 for child in children: children_sum += depth_first_search(child, path[:]) if children_sum < node[offsets["count"]]: add_to_groups( groups, node[offsets["output"]], path, node[offsets["count"]] - children_sum, node[offsets["confidence"]], gini_impurity(node[offsets["distribution"]], node[offsets["count"]])) return node[offsets["count"]]
def __init__(self, tree, fields, objective_field=None, root_distribution=None, parent_id=None, ids_map=None, subtree=True): self.fields = fields self.objective_id = objective_field self.output = tree['output'] if tree['predicate'] is True: self.predicate = True else: self.predicate = Predicate(tree['predicate']['operator'], tree['predicate']['field'], tree['predicate']['value'], tree['predicate'].get('term', None)) if 'id' in tree: self.id = tree['id'] self.parent_id = parent_id if isinstance(ids_map, dict): ids_map[self.id] = self else: self.id = None children = [] if 'children' in tree: for child in tree['children']: children.append( Tree(child, self.fields, objective_field=objective_field, parent_id=self.id, ids_map=ids_map, subtree=subtree)) self.children = children self.regression = self.is_regression() self.count = tree['count'] self.confidence = tree.get('confidence', None) if 'distribution' in tree: self.distribution = tree['distribution'] elif 'objective_summary' in tree: summary = tree['objective_summary'] if 'bins' in summary: self.distribution = summary['bins'] elif 'counts' in summary: self.distribution = summary['counts'] elif 'categories' in summary: self.distribution = summary['categories'] else: summary = root_distribution if 'bins' in summary: self.distribution = summary['bins'] elif 'counts' in summary: self.distribution = summary['counts'] elif 'categories' in summary: self.distribution = summary['categories']
def __init__(self, tree, fields, objective_field=None, root_distribution=None, parent_id=None, ids_map=None, subtree=True, tree_info=None): self.fields = fields self.objective_id = objective_field self.output = tree['output'] if tree['predicate'] is True: self.predicate = True else: self.predicate = Predicate(tree['predicate']['operator'], tree['predicate']['field'], tree['predicate']['value'], tree['predicate'].get('term', None)) if 'id' in tree: self.id = tree['id'] self.parent_id = parent_id if isinstance(ids_map, dict): ids_map[self.id] = self else: self.id = None children = [] if 'children' in tree: for child in tree['children']: children.append(self.__class__( \ child, self.fields, objective_field=objective_field, parent_id=self.id, ids_map=ids_map, subtree=subtree, tree_info=tree_info)) self.children = children self.regression = self.is_regression() tree_info['regression'] = (self.regression and tree_info.get('regression', True)) self.count = tree['count'] self.confidence = tree.get('confidence', None) self.distribution = None self.max = None self.min = None self.weighted = False summary = None if 'distribution' in tree: self.distribution = tree['distribution'] elif 'objective_summary' in tree: summary = tree['objective_summary'] (self.distribution_unit, self.distribution) = extract_distribution(summary) if 'weighted_objective_summary' in tree: summary = tree['weighted_objective_summary'] (self.weighted_distribution_unit, self.weighted_distribution) = extract_distribution(summary) self.weight = tree['weight'] self.weighted = True else: summary = root_distribution (self.distribution_unit, self.distribution) = extract_distribution(summary) if self.regression: tree_info['max_bins'] = max(tree_info.get('max_bins', 0), len(self.distribution)) self.median = None if summary: self.median = summary.get('median') if not self.median: self.median = dist_median(self.distribution, self.count) self.max = summary.get('maximum') or \ max([value for [value, _] in self.distribution]) self.min = summary.get('minimum') or \ min([value for [value, _] in self.distribution]) self.impurity = None if not self.regression and self.distribution is not None: self.impurity = self.gini_impurity()