def _expand_rule(self, rule): """ If the rule has enough statistics, possible expanding candidates are checked. If the best candidate verifies the Hoeffding bound, a new predicate is add to the rule. The rule statistics are update to fit the new description. """ if len(rule.observed_class_distribution) >= 2: class_idx = None if self.expand_criterion == _HELLINGER: split_criterion = HellingerDistanceCriterion() elif self.expand_criterion == _INFOGAIN: split_criterion = InfoGainExpandCriterion() else: split_criterion = FoilGainExpandCriterion() class_idx = rule.class_idx should_expand = False best_expand_suggestions = rule.get_best_expand_suggestion(split_criterion, class_idx) best_expand_suggestions.sort(key=attrgetter('merit')) if len(best_expand_suggestions) < 2: should_expand = len(best_expand_suggestions) > 0 else: hoeffding_bound = self.compute_hoeffding_bound(split_criterion.get_range_of_merit( rule.observed_class_distribution), self.expand_confidence, rule.get_weight_seen()) best_suggestion = best_expand_suggestions[-1] second_best_suggestion = best_expand_suggestions[-2] if ((best_suggestion.merit - second_best_suggestion.merit) > hoeffding_bound) or \ (hoeffding_bound < self.tie_threshold): should_expand = True if self.remove_poor_atts is not None and self.remove_poor_atts: poor_atts = set() # Scan 1 - add any poor attribute to set for i in range(len(best_expand_suggestions)): if best_expand_suggestions[i] is not None: split_atts = [best_expand_suggestions[i].att_idx] if len(split_atts) == 1: if best_suggestion.merit - best_expand_suggestions[i].merit > hoeffding_bound: poor_atts.add(int(split_atts[0])) # Scan 2 - remove good attributes from set for i in range(len(best_expand_suggestions)): if best_expand_suggestions[i] is not None: split_atts = [best_expand_suggestions[i].att_idx] if len(split_atts) == 1: if best_suggestion.merit - best_expand_suggestions[i].merit < hoeffding_bound: try: poor_atts.remove(int(split_atts[0])) except KeyError: pass for poor_att in poor_atts: rule.disable_attribute(poor_att) if should_expand: best_suggestion = best_expand_suggestions[-1] new_pred = Predicate(best_suggestion.att_idx, best_suggestion.operator, best_suggestion.att_val) add_pred = True for pred in rule.predicate_set: if (pred.operator == new_pred.operator) and (pred.att_idx == new_pred.att_idx): if pred.operator == "<=": pred.value = min(pred.value, new_pred.value) rule.observed_class_distribution = best_suggestion. \ resulting_stats_from_split(0).copy() elif pred.operator == ">": pred.value = max(pred.value, new_pred.value) rule.observed_class_distribution = best_suggestion. \ resulting_stats_from_split(1).copy() rule._attribute_observers = {} add_pred = False break if add_pred: rule.predicate_set.append(new_pred) rule._attribute_observers = {} rule.observed_class_distribution = {} if new_pred.operator in ["=", "<="]: rule.observed_class_distribution = best_suggestion. \ resulting_stats_from_split(0).copy() else: rule.observed_class_distribution = best_suggestion. \ resulting_stats_from_split(1).copy() if self.expand_criterion == _FOILGAIN: if not self.ordered_rules: for c in rule.observed_class_distribution.keys(): if c != rule.class_idx: new_rule = copy.deepcopy(rule) new_rule.class_idx = c split_criterion = FoilGainExpandCriterion() should_expand = False best_expand_suggestions = new_rule.get_best_expand_suggestion(split_criterion, c) best_expand_suggestions.sort(key=attrgetter('merit')) if len(best_expand_suggestions) < 2: should_expand = len(best_expand_suggestions) > 0 else: hoeffding_bound = self.compute_hoeffding_bound( split_criterion.get_range_of_merit( new_rule.observed_class_distribution), self.expand_confidence, new_rule.get_weight_seen()) best_suggestion = best_expand_suggestions[-1] second_best_suggestion = best_expand_suggestions[-2] if ((best_suggestion.merit - second_best_suggestion.merit) > hoeffding_bound) \ or (hoeffding_bound < self.tie_threshold): should_expand = True if self.remove_poor_atts is not None and self.remove_poor_atts: poor_atts = set() # Scan 1 - add any poor attribute to set for i in range(len(best_expand_suggestions)): if best_expand_suggestions[i] is not None: split_atts = [best_expand_suggestions[i].att_idx] if len(split_atts) == 1: if best_suggestion.merit - best_expand_suggestions[i].merit > \ hoeffding_bound: poor_atts.add(int(split_atts[0])) # Scan 2 - remove good attributes from set for i in range(len(best_expand_suggestions)): if best_expand_suggestions[i] is not None: split_atts = [best_expand_suggestions[i].att_idx] if len(split_atts) == 1: if best_suggestion.merit - best_expand_suggestions[i].merit < \ hoeffding_bound: try: poor_atts.remove(int(split_atts[0])) except KeyError: pass for poor_att in poor_atts: new_rule.disable_attribute(poor_att) if should_expand: best_suggestion = best_expand_suggestions[-1] new_pred = Predicate(best_suggestion.att_idx, best_suggestion.operator, best_suggestion.att_val) add_pred = True for pred in new_rule.predicate_set: if (pred.operator == new_pred.operator) and ( pred.att_idx == new_pred.att_idx): if pred.operator == "<=": pred.value = min(pred.value, new_pred.value) new_rule.observed_class_distribution = best_suggestion. \ resulting_stats_from_split(0).copy() elif pred.operator == ">": pred.value = max(pred.value, new_pred.value) new_rule.observed_class_distribution = best_suggestion. \ resulting_stats_from_split(1).copy() new_rule._attribute_observers = {} add_pred = False break if add_pred: new_rule.predicate_set.append(new_pred) new_rule._attribute_observers = {} new_rule.observed_class_distribution = {} if new_pred.operator in ["=", "<="]: new_rule.observed_class_distribution = best_suggestion. \ resulting_stats_from_split(0).copy() else: new_rule.observed_class_distribution = best_suggestion. \ resulting_stats_from_split(1).copy() self.rule_set.append(copy.deepcopy(new_rule))
def _create_rule(self): """ Create a new rule from the default rule. If the default rule has enough statistics, possible expanding candidates are checked. If the best candidate verifies the Hoeffding bound, a new rule is created if a one predicate. The rule statistics are passed down to the new rule and the default rule is reset. """ if len(self.default_rule.observed_class_distribution) >= 2: if self.expand_criterion in [_INFOGAIN, _HELLINGER]: if self.expand_criterion == _HELLINGER: expand_criterion = HellingerDistanceCriterion() else: expand_criterion = InfoGainExpandCriterion() should_expand = False best_expand_suggestions = self.default_rule.get_best_expand_suggestion(expand_criterion, None) best_expand_suggestions.sort(key=attrgetter('merit')) if len(best_expand_suggestions) < 2: should_expand = len(best_expand_suggestions) > 0 else: hoeffding_bound = self.compute_hoeffding_bound(expand_criterion.get_range_of_merit( self.default_rule.observed_class_distribution), self.expand_confidence, self.default_rule.get_weight_seen()) best_suggestion = best_expand_suggestions[-1] second_best_suggestion = best_expand_suggestions[-2] if ((best_suggestion.merit - second_best_suggestion.merit) > hoeffding_bound) or \ (hoeffding_bound < self.tie_threshold): should_expand = True if should_expand: best_suggestion = best_expand_suggestions[-1] new_pred = Predicate(best_suggestion.att_idx, best_suggestion.operator, best_suggestion.att_val) self.rule_set.append(self.new_rule(None, copy.deepcopy(self.drift_detector), None)) self.rule_set[-1].predicate_set.append(new_pred) self.default_rule.restart() if new_pred.operator in ["=", "<="]: self.rule_set[-1].observed_class_distribution = best_suggestion. \ resulting_stats_from_split(0).copy() self.default_rule.observed_class_distribution = best_suggestion. \ resulting_stats_from_split(1).copy() else: self.rule_set[-1].observed_class_distribution = best_suggestion. \ resulting_stats_from_split(1).copy() self.default_rule.observed_class_distribution = best_suggestion. \ resulting_stats_from_split(0).copy() else: self.default_rule.weight_seen_at_last_expand = self.default_rule.get_weight_seen() elif self.expand_criterion == _FOILGAIN: expand_criterion = FoilGainExpandCriterion() should_expand = False for class_idx in self.default_rule.observed_class_distribution.keys(): best_expand_suggestions = self.default_rule.get_best_expand_suggestion(expand_criterion, class_idx) best_expand_suggestions.sort(key=attrgetter('merit')) if len(best_expand_suggestions) < 2: should_expand = len(best_expand_suggestions) > 0 else: hoeffding_bound = self.compute_hoeffding_bound(expand_criterion.get_range_of_merit( self.default_rule.observed_class_distribution), self.expand_confidence, self.default_rule.get_weight_seen()) best_suggestion = best_expand_suggestions[-1] second_best_suggestion = best_expand_suggestions[-2] if ((best_suggestion.merit - second_best_suggestion.merit) > hoeffding_bound) or ( hoeffding_bound < self.tie_threshold): should_expand = True if should_expand: best_suggestion = best_expand_suggestions[-1] new_pred = Predicate(best_suggestion.att_idx, best_suggestion.operator, best_suggestion.att_val) self.rule_set.append(self.new_rule(None, copy.deepcopy(self.drift_detector), class_idx)) self.rule_set[-1].predicate_set.append(new_pred) if new_pred.operator in ["=", "<="]: self.rule_set[-1].observed_class_distribution = best_suggestion. \ resulting_stats_from_split(0).copy() else: self.rule_set[-1].observed_class_distribution = best_suggestion. \ resulting_stats_from_split(1).copy() if should_expand: self.default_rule.restart() else: self.default_rule.weight_seen_at_last_expand = self.default_rule.get_weight_seen() else: self.default_rule.weight_seen_at_last_expand = self.default_rule.get_weight_seen()
def branch_rule(self, branch): condition = '==' if branch == 0 else '!=' return Predicate(self._att_idx, condition, self._att_value)
def branch_rule(self, branch): return Predicate(self._att_idx, '==', branch)
def branch_rule(self, branch): condition = '<' if branch == 0 else '>' equals_branch = 0 if self._equals_passes_test else 1 condition += '=' if branch == equals_branch else '' return Predicate(self._att_idx, condition, self._att_value)
def branch_rule(self, branch): return Predicate( self._att_idx, '==', self._reverse_branch_mapping[branch] )