class AdaLearningNode(LearningNodeNBAdaptive, NewNode): def __init__(self, initial_class_observations): LearningNodeNBAdaptive.__init__(self, initial_class_observations) self.estimationErrorWeight = ADWIN() self.ErrorChange = False self.randomSeed = 1 self.classifierRandom = random.seed(self.randomSeed) def calc_byte_size(self): byte_size = self.__sizeof__() if self.estimationErrorWeight is not None: byte_size += self.estimationErrorWeight.get_length_estimation() return byte_size # Override NewNode def number_leaves(self): return 1 # Override NewNode def get_error_estimation(self): return self.estimationErrorWeight._estimation # Override NewNode def get_error_width(self): return self.estimationErrorWeight._width # Override NewNode def is_null_error(self): return (self.estimationErrorWeight is None) def kill_tree_childs(self, hat): pass # Override NewNode def learn_from_instance(self, X, y, weight, hat, parent, parent_branch): true_class = y k = np.random.poisson(1.0, self.classifierRandom) if k > 0: weight = weight * k tmp = self.get_class_votes(X, hat) class_prediction = get_max_value_index(tmp) bl_correct = (true_class == class_prediction) if self.estimationErrorWeight is None: self.estimationErrorWeight = ADWIN() old_error = self.get_error_estimation() # Add element to Adwin add = 0.0 if (bl_correct is True) else 1.0 self.estimationErrorWeight.add_element(add) # Detect change with Adwin self.ErrorChange = self.estimationErrorWeight.detected_change() if (self.ErrorChange is True and old_error > self.get_error_estimation()): self.ErrorChange = False # Update statistics call LearningNodeNBAdaptive super().learn_from_instance(X, y, weight, hat) # CHECK changed self to super # call ActiveLearningNode weight_seen = self.get_weight_seen() if weight_seen - self.get_weight_seen_at_last_split_evaluation( ) >= hat.grace_period: hat._attempt_to_split(self, parent, parent_branch) self.set_weight_seen_at_last_split_evaluation(weight_seen) # Override LearningNodeNBAdaptive def get_class_votes(self, X, ht): dist = {} prediction_option = ht.leaf_prediction if prediction_option == MAJORITY_CLASS: #MC dist = self.get_observed_class_distribution() elif prediction_option == NAIVE_BAYES: #NB dist = do_naive_bayes_prediction( X, self._observed_class_distribution, self._attribute_observers) # NBAdaptive if self._mc_correct_weight > self._nb_correct_weight: dist = self.get_observed_class_distribution() else: dist = do_naive_bayes_prediction( X, self._observed_class_distribution, self._attribute_observers) dist_sum = sum(dist.values()) # sum all values in dictionary if dist_sum * self.get_error_estimation( ) * self.get_error_estimation() > 0.0: normalize_values_in_dict( dist_sum * self.get_error_estimation() * self.get_error_estimation(), dist) return dist # Override NewNode, New for option votes def filter_instance_to_leaves(self, X, split_parent, parent_branch, update_splitter_counts, found_nodes=None): if found_nodes is None: found_nodes = [] found_nodes.append( HoeffdingTree.FoundNode(self, split_parent, parent_branch))
class AdaSplitNode(SplitNode, NewNode): def __init__(self, split_test, class_observations, size): SplitNode.__init__(self, split_test, class_observations, size) self._estimation_error_weight = ADWIN() self._alternate_tree = None # CHECK not HoeffdingTree.Node(), I force alternatetree to be None so that will be that initialized as _new_learning_node (line 154) self.error_change = False self._random_seed = 1 self._classifier_random = random.seed(self._random_seed) # Override SplitNode def calc_byte_size_including_subtree(self): byte_size = self.__sizeof__() if self._alternate_tree is not None: byte_size += self._alternate_tree.calc_byte_size_including_subtree( ) if self._estimation_error_weight is not None: byte_size += self._estimation_error_weight.get_length_estimation( ) for child in self._children: if child is not None: byte_size += child.calc_byte_size_including_subtree() return byte_size # Override NewNode def number_leaves(self): num_of_leaves = 0 for child in self._children: if child is not None: num_of_leaves += child.number_leaves() return num_of_leaves # Override NewNode def get_error_estimation(self): return self._estimation_error_weight._estimation # Override NewNode def get_error_width(self): w = 0.0 if (self.is_null_error() is False): w = self._estimation_error_weight._width return w # Override NewNode def is_null_error(self): return (self._estimation_error_weight is None) # Override NewNode def learn_from_instance(self, X, y, weight, hat, parent, parent_branch): true_class = y class_prediction = 0 if (self.filter_instance_to_leaf(X, parent, parent_branch).node) is not None: class_prediction = get_max_value_index( self.filter_instance_to_leaf( X, parent, parent_branch).node.get_class_votes(X, hat)) bl_correct = (true_class == class_prediction) if self._estimation_error_weight is None: self._estimation_error_weight = ADWIN() old_error = self.get_error_estimation() # Add element to Adwin add = 0.0 if (bl_correct is True) else 1.0 self._estimation_error_weight.add_element(add) # Detect change with Adwin self.error_change = self._estimation_error_weight.detected_change() if (self.error_change is True and old_error > self.get_error_estimation()): self.error_change = False #Check condition to build a new alternate tree if (self.error_change is True): self._alternate_tree = hat._new_learning_node( ) # check call to new learning node hat._alternateTrees += 1 #Condition to replace alternate tree elif (self._alternate_tree is not None and self._alternate_tree.is_null_error() is False): if (self.get_error_width() > error_width_threshold and self._alternate_tree.get_error_width() > error_width_threshold): old_error_rate = self.get_error_estimation() alt_error_rate = self._alternate_tree.get_error_estimation( ) fDelta = .05 fN = 1.0 / self._alternate_tree.get_error_width() + 1.0 / ( self.get_error_width()) bound = math.sqrt(2.0 * old_error_rate * (1.0 - old_error_rate) * math.log(2.0 / fDelta) * fN) # To check, bound never less than (old_error_rate - alt_error_rate) if bound < (old_error_rate - alt_error_rate): hat._active_leaf_node_cnt -= self.number_leaves() hat._active_leaf_node_cnt += self._alternate_tree.number_leaves( ) self.kill_tree_childs(hat) if parent is not None: parent.set_child(parent_branch, self._alternate_tree) else: hat._tree_root = hat._tree_root.alternateTree hat._switchAlternateTrees += 1 elif (bound < alt_error_rate - old_error_rate): if isinstance(self._alternate_tree, HAT.ActiveLearningNode): self._alternate_tree = None elif (isinstance(self._alternate_tree, HAT.ActiveLearningNode)): self._alternate_tree = None else: self._alternate_tree.kill_tree_childs(hat) hat._prunedalternateTree += 1 # hat._pruned_alternate_trees to check # Learn_From_Instance alternate Tree and Child nodes if self._alternate_tree is not None: self._alternate_tree.learn_from_instance( X, y, weight, hat, parent, parent_branch) child_branch = self.instance_child_index(X) child = self.get_child(child_branch) if child is not None: child.learn_from_instance(X, y, weight, hat, parent, parent_branch) # Override NewNode def kill_tree_childs(self, hat): for child in self._children: if child is not None: # Delete alternate tree if it exists if (isinstance(child, HAT.AdaSplitNode) and child._alternate_tree is not None): self._pruned_alternate_trees += 1 # Recursive delete of SplitNodes if isinstance(child, HAT.AdaSplitNode): child.kill_tree_childs(hat) if isinstance(child, HAT.ActiveLearningNode): child = None hat._active_leaf_node_cnt -= 1 elif isinstance(child, HAT.InactiveLearningNode): child = None hat._inactive_leaf_node_cnt -= 1 # override NewNode def filter_instance_to_leaves(self, X, parent, parent_branch, update_splitter_counts, found_nodes=None): if found_nodes is None: found_nodes = [] child_index = self.instance_child_index(X) if child_index >= 0: child = self.get_child(child_index) if child is not None: child.filter_instance_to_leaves(X, parent, parent_branch, update_splitter_counts, found_nodes) else: found_nodes.append( HoeffdingTree.FoundNode(None, self, child_index)) if self._alternate_tree is not None: self._alternate_tree.filter_instance_to_leaves( X, self, -999, update_splitter_counts, found_nodes)