class AdaLearningNodeForRegression(LearningNodePerceptron, NewNode): def __init__(self, initial_class_observations, perceptron_weight, random_state=None): super().__init__(initial_class_observations, perceptron_weight, random_state) self._estimation_error_weight = ADWIN() self._error_change = False self._randomSeed = 1 self._classifier_random = check_random_state(self._randomSeed) def calc_byte_size(self): byte_size = self.__sizeof__() if self._estimation_error_weight is not None: byte_size += self._estimation_error_weight.get_length_estimation( ) return byte_size # Override NewNode def number_leaves(self): return 1 # Override NewNode def get_error_estimation(self): return self._estimation_error_weight.estimation # Override NewNode def get_error_width(self): return self._estimation_error_weight.width # Override NewNode def is_null_error(self): return self._estimation_error_weight is None def kill_tree_children(self, hat): pass # Override NewNode def learn_from_instance(self, X, y, weight, rhat, parent, parent_branch): super().learn_from_instance(X, y, weight, rhat) true_target = y target_prediction = rhat.predict([X])[0] normalized_error = rhat.get_normalized_error( target_prediction, true_target) if self._estimation_error_weight is None: self._estimation_error_weight = ADWIN() old_error = self.get_error_estimation() # Add element to Adwin self._estimation_error_weight.add_element(normalized_error) # Detect change with Adwin self._error_change = self._estimation_error_weight.detected_change( ) if self._error_change is True and old_error > self.get_error_estimation( ): self._error_change = False # call ActiveLearningNode weight_seen = self.get_weight_seen() if weight_seen - self.get_weight_seen_at_last_split_evaluation( ) >= rhat.grace_period: rhat._attempt_to_split(self, parent, parent_branch) self.set_weight_seen_at_last_split_evaluation(weight_seen) # Override NewNode, New for option votes def filter_instance_to_leaves(self, X, y, weight, parent, parent_branch, update_splitter_counts, found_nodes=None): if found_nodes is None: found_nodes = [] found_nodes.append( HoeffdingTree.FoundNode(self, parent, parent_branch))
class AdaLearningNode(LearningNodeNBAdaptive, NewNode): def __init__(self, initial_class_observations): LearningNodeNBAdaptive.__init__(self, initial_class_observations) self.estimationErrorWeight = ADWIN() self.ErrorChange = False self._randomSeed = 1 self._classifier_random = check_random_state(self._randomSeed) def calc_byte_size(self): byte_size = self.__sizeof__() if self.estimationErrorWeight is not None: byte_size += self.estimationErrorWeight.get_length_estimation() return byte_size # Override NewNode def number_leaves(self): return 1 # Override NewNode def get_error_estimation(self): return self.estimationErrorWeight.estimation # Override NewNode def get_error_width(self): return self.estimationErrorWeight.width # Override NewNode def is_null_error(self): return (self.estimationErrorWeight is None) def kill_tree_children(self, hat): pass # Override NewNode def learn_from_instance(self, X, y, weight, hat, parent, parent_branch): true_class = y k = self._classifier_random.poisson(1.0) # if k > 0: # weight = weight * k tmp = self.get_class_votes(X, hat) class_prediction = get_max_value_key(tmp) bl_correct = (true_class == class_prediction) if self.estimationErrorWeight is None: self.estimationErrorWeight = ADWIN() old_error = self.get_error_estimation() # Add element to Adwin add = 0.0 if (bl_correct is True) else 1.0 self.estimationErrorWeight.add_element(add) # Detect change with Adwin self.ErrorChange = self.estimationErrorWeight.detected_change() if self.ErrorChange is True and old_error > self.get_error_estimation( ): self.ErrorChange = False # Update statistics call LearningNodeNBAdaptive super().learn_from_instance(X, y, weight, hat) # CHECK changed self to super # call ActiveLearningNode weight_seen = self.get_weight_seen() if weight_seen - self.get_weight_seen_at_last_split_evaluation( ) >= hat.grace_period: hat._attempt_to_split(self, parent, parent_branch) self.set_weight_seen_at_last_split_evaluation(weight_seen) # Override LearningNodeNBAdaptive def get_class_votes(self, X, ht): dist = {} prediction_option = ht.leaf_prediction # MC if prediction_option == MAJORITY_CLASS: dist = self.get_observed_class_distribution() # NB elif prediction_option == NAIVE_BAYES: dist = do_naive_bayes_prediction( X, self._observed_class_distribution, self._attribute_observers) # NBAdaptive else: if self._mc_correct_weight > self._nb_correct_weight: dist = self.get_observed_class_distribution() else: dist = do_naive_bayes_prediction( X, self._observed_class_distribution, self._attribute_observers) dist_sum = sum(dist.values()) # sum all values in dictionary normalization_factor = dist_sum * self.get_error_estimation( ) * self.get_error_estimation() if normalization_factor > 0.0: normalize_values_in_dict(dist, normalization_factor) return dist # Override NewNode, New for option votes def filter_instance_to_leaves(self, X, y, weight, parent, parent_branch, update_splitter_counts, found_nodes=None): if found_nodes is None: found_nodes = [] found_nodes.append( HoeffdingTree.FoundNode(self, parent, parent_branch))
class AdaSplitNodeForRegression(SplitNode, NewNode): def __init__(self, split_test, class_observations): super().__init__(split_test, class_observations) self._estimation_error_weight = ADWIN() self._alternate_tree = None self.error_change = False self._random_seed = 1 self._classifier_random = check_random_state(self._random_seed) # Override SplitNode def calc_byte_size_including_subtree(self): byte_size = self.__sizeof__() if self._alternate_tree is not None: byte_size += self._alternate_tree.calc_byte_size_including_subtree( ) if self._estimation_error_weight is not None: byte_size += self._estimation_error_weight.get_length_estimation( ) for child in self._children: if child is not None: byte_size += child.calc_byte_size_including_subtree() return byte_size # Override NewNode def number_leaves(self): num_of_leaves = 0 for child in self._children: if child is not None: num_of_leaves += child.number_leaves() return num_of_leaves # Override NewNode def get_error_estimation(self): return self._estimation_error_weight.estimation # Override NewNode def get_error_width(self): w = 0.0 if self.is_null_error() is False: w = self._estimation_error_weight.width return w # Override NewNode def is_null_error(self): return self._estimation_error_weight is None # Override NewNode def learn_from_instance(self, X, y, weight, rhat, parent, parent_branch): true_target = y normalized_error = 0.0 if self.filter_instance_to_leaf(X, parent, parent_branch).node is not None: target_prediction = rhat.predict([X])[0] normalized_error = rhat.get_normalized_error( target_prediction, true_target) if self._estimation_error_weight is None: self._estimation_error_weight = ADWIN() old_error = self.get_error_estimation() # Add element to Change detector self._estimation_error_weight.add_element(normalized_error) # Detect change self.error_change = self._estimation_error_weight.detected_change() if self.error_change is True and old_error > self.get_error_estimation( ): self.error_change = False # Check condition to build a new alternate tree if self.error_change is True: self._alternate_tree = rhat._new_learning_node() rhat.alternate_trees_cnt += 1 # Condition to replace alternate tree elif self._alternate_tree is not None and self._alternate_tree.is_null_error( ) is False: print("we'll be replacing the actual tree") if self.get_error_width() > error_width_threshold \ and self._alternate_tree.get_error_width() > error_width_threshold: old_error_rate = self.get_error_estimation() alt_error_rate = self._alternate_tree.get_error_estimation( ) fDelta = .05 fN = 1.0 / self._alternate_tree.get_error_width() + 1.0 / ( self.get_error_width()) bound = math.sqrt(2.0 * old_error_rate * (1.0 - old_error_rate) * math.log(2.0 / fDelta) * fN) # To check, bound never less than (old_error_rate - alt_error_rate) if bound < (old_error_rate - alt_error_rate): rhat._active_leaf_node_cnt -= self.number_leaves() rhat._active_leaf_node_cnt += self._alternate_tree.number_leaves( ) self.kill_tree_children(rhat) if parent is not None: parent.set_child(parent_branch, self._alternate_tree) else: rhat._tree_root = rhat._tree_root._alternate_tree rhat.switch_alternate_trees_cnt += 1 elif bound < alt_error_rate - old_error_rate: if isinstance(self._alternate_tree, HoeffdingTree.ActiveLearningNode): self._alternate_tree = None elif isinstance(self._alternate_tree, HoeffdingTree.ActiveLearningNode): self._alternate_tree = None else: self._alternate_tree.kill_tree_children(rhat) rhat.pruned_alternate_trees_cnt += 1 # hat.pruned_alternate_trees_cnt to check # Learn_From_Instance alternate Tree and Child nodes if self._alternate_tree is not None: self._alternate_tree.learn_from_instance( X, y, weight, rhat, parent, parent_branch) child_branch = self.instance_child_index(X) child = self.get_child(child_branch) if child is not None: child.learn_from_instance(X, y, weight, rhat, parent, parent_branch) # Override NewNode def kill_tree_children(self, rhat): for child in self._children: if child is not None: # Delete alternate tree if it exists if isinstance(child, rhat.AdaSplitNodeForRegression ) and child._alternate_tree is not None: self._pruned_alternate_trees += 1 # Recursive delete of SplitNodes if isinstance(child, rhat.AdaSplitNodeForRegression): child.kill_tree_children(rhat) if isinstance(child, HoeffdingTree.ActiveLearningNode): child = None rhat._active_leaf_node_cnt -= 1 elif isinstance(child, HoeffdingTree.InactiveLearningNode): child = None rhat._inactive_leaf_node_cnt -= 1 # override NewNode def filter_instance_to_leaves(self, X, y, weight, parent, parent_branch, update_splitter_counts=False, found_nodes=None): if found_nodes is None: found_nodes = [] if update_splitter_counts: try: self._observed_class_distribution[0] += weight self._observed_class_distribution[1] += y * weight self._observed_class_distribution[2] += y * y * weight except KeyError: self._observed_class_distribution[0] = weight self._observed_class_distribution[1] = y * weight self._observed_class_distribution[2] = y * y * weight child_index = self.instance_child_index(X) if child_index >= 0: child = self.get_child(child_index) if child is not None: child.filter_instance_to_leaves(X, y, weight, parent, parent_branch, update_splitter_counts, found_nodes) else: found_nodes.append( HoeffdingTree.FoundNode(None, self, child_index)) if self._alternate_tree is not None: self._alternate_tree.filter_instance_to_leaves( X, y, weight, self, -999, update_splitter_counts, found_nodes)
class AdaSplitNode(SplitNode, NewNode): def __init__(self, split_test, class_observations): SplitNode.__init__(self, split_test, class_observations) self._estimation_error_weight = ADWIN() self._alternate_tree = None self.error_change = False self._random_seed = 1 self._classifier_random = check_random_state(self._random_seed) # Override SplitNode def calc_byte_size_including_subtree(self): byte_size = self.__sizeof__() if self._alternate_tree is not None: byte_size += self._alternate_tree.calc_byte_size_including_subtree( ) if self._estimation_error_weight is not None: byte_size += self._estimation_error_weight.get_length_estimation( ) for child in self._children: if child is not None: byte_size += child.calc_byte_size_including_subtree() return byte_size # Override NewNode def number_leaves(self): num_of_leaves = 0 for child in self._children: if child is not None: num_of_leaves += child.number_leaves() return num_of_leaves # Override NewNode def get_error_estimation(self): return self._estimation_error_weight.estimation # Override NewNode def get_error_width(self): w = 0.0 if self.is_null_error() is False: w = self._estimation_error_weight.width return w # Override NewNode def is_null_error(self): return self._estimation_error_weight is None # Override NewNode def learn_from_instance(self, X, y, weight, hat, parent, parent_branch): true_class = y class_prediction = 0 if self.filter_instance_to_leaf(X, parent, parent_branch).node is not None: class_prediction = get_max_value_key( self.filter_instance_to_leaf( X, parent, parent_branch).node.get_class_votes(X, hat)) bl_correct = (true_class == class_prediction) if self._estimation_error_weight is None: self._estimation_error_weight = ADWIN() old_error = self.get_error_estimation() # Add element to ADWIN add = 0.0 if (bl_correct is True) else 1.0 self._estimation_error_weight.add_element(add) # Detect change with ADWIN self.error_change = self._estimation_error_weight.detected_change() if self.error_change is True and old_error > self.get_error_estimation( ): self.error_change = False # Check condition to build a new alternate tree if self.error_change is True: self._alternate_tree = hat._new_learning_node() hat._alternateTrees += 1 # Condition to replace alternate tree elif self._alternate_tree is not None and self._alternate_tree.is_null_error( ) is False: if self.get_error_width() > error_width_threshold \ and self._alternate_tree.get_error_width() > error_width_threshold: old_error_rate = self.get_error_estimation() alt_error_rate = self._alternate_tree.get_error_estimation( ) fDelta = .05 fN = 1.0 / self._alternate_tree.get_error_width() + 1.0 / ( self.get_error_width()) bound = math.sqrt(2.0 * old_error_rate * (1.0 - old_error_rate) * math.log(2.0 / fDelta) * fN) # To check, bound never less than (old_error_rate - alt_error_rate) if bound < (old_error_rate - alt_error_rate): hat._active_leaf_node_cnt -= self.number_leaves() hat._active_leaf_node_cnt += self._alternate_tree.number_leaves( ) self.kill_tree_children(hat) if parent is not None: parent.set_child(parent_branch, self._alternate_tree) else: hat._tree_root = hat._tree_root.alternateTree hat._switchAlternateTrees += 1 elif bound < alt_error_rate - old_error_rate: if isinstance(self._alternate_tree, HAT.ActiveLearningNode): self._alternate_tree = None elif isinstance(self._alternate_tree, HAT.ActiveLearningNode): self._alternate_tree = None else: self._alternate_tree.kill_tree_children(hat) hat._prunedalternateTree += 1 # hat._pruned_alternate_trees to check # Learn_From_Instance alternate Tree and Child nodes if self._alternate_tree is not None: self._alternate_tree.learn_from_instance( X, y, weight, hat, parent, parent_branch) child_branch = self.instance_child_index(X) child = self.get_child(child_branch) if child is not None: child.learn_from_instance(X, y, weight, hat, parent, parent_branch) # Override NewNode def kill_tree_children(self, hat): for child in self._children: if child is not None: # Delete alternate tree if it exists if isinstance(child, HAT.AdaSplitNode ) and child._alternate_tree is not None: self._pruned_alternate_trees += 1 # Recursive delete of SplitNodes if isinstance(child, HAT.AdaSplitNode): child.kill_tree_children(hat) if isinstance(child, HAT.ActiveLearningNode): child = None hat._active_leaf_node_cnt -= 1 elif isinstance(child, HAT.InactiveLearningNode): child = None hat._inactive_leaf_node_cnt -= 1 # override NewNode def filter_instance_to_leaves(self, X, y, weight, parent, parent_branch, update_splitter_counts=False, found_nodes=None): if found_nodes is None: found_nodes = [] if update_splitter_counts: try: self._observed_class_distribution[ y] += weight # Dictionary (class_value, weight) except KeyError: self._observed_class_distribution[y] = weight child_index = self.instance_child_index(X) if child_index >= 0: child = self.get_child(child_index) if child is not None: child.filter_instance_to_leaves(X, y, weight, parent, parent_branch, update_splitter_counts, found_nodes) else: found_nodes.append( HoeffdingTree.FoundNode(None, self, child_index)) if self._alternate_tree is not None: self._alternate_tree.filter_instance_to_leaves( X, y, weight, self, -999, update_splitter_counts, found_nodes)