def __init__(self, stats, depth, splitter, adwin_delta, seed, **kwargs): super().__init__(stats, depth, splitter, **kwargs) self.adwin_delta = adwin_delta self._adwin = ADWIN(delta=self.adwin_delta) self._error_change = False self._rng = check_random_state(seed) # Normalization of info monitored by drift detectors (using Welford's algorithm) self._error_normalizer = Var(ddof=1)
def __init__(self, stats, depth, attr_obs, attr_obs_params, leaf_model, adwin_delta, seed): super().__init__(stats, depth, attr_obs, attr_obs_params, leaf_model) self.adwin_delta = adwin_delta self._adwin = ADWIN(delta=self.adwin_delta) self.error_change = False self._rng = check_random_state(seed) # Normalization of info monitored by drift detectors (using Welford's algorithm) self._error_normalizer = Var(ddof=1)
def __init__(self, split_test, stats, depth, adwin_delta, seed): stats = stats if stats else Var() super().__init__(split_test, stats, depth) self.adwin_delta = adwin_delta self._adwin = ADWIN(delta=self.adwin_delta) self._alternate_tree = None self._error_change = False self._rng = check_random_state(seed) # Normalization of info monitored by drift detectors (using Welford's algorithm) self._error_normalizer = Var(ddof=1)
def learn_one(self, x, y, *, sample_weight=1.0, tree=None, parent=None, parent_branch=None): y_pred = self.prediction(x, tree=tree) normalized_error = normalize_error(y, y_pred, self) if tree.bootstrap_sampling: # Perform bootstrap-sampling k = self._rng.poisson(1.0) if k > 0: sample_weight = sample_weight * k if self._adwin is None: self._adwin = ADWIN(delta=self.adwin_delta) old_error = self.error_estimation # Update ADWIN self._error_change, _ = self._adwin.update(normalized_error) # Error is decreasing if self._error_change and old_error > self.error_estimation: self._error_change = False # Update learning model super().learn_one(x, y, sample_weight=sample_weight, tree=tree) weight_seen = self.total_weight if weight_seen - self.last_split_attempt_at >= tree.grace_period: if self.depth >= tree.max_depth: # Depth-based pre-pruning self.deactivate() tree._n_inactive_leaves += 1 tree._n_active_leaves -= 1 elif self.is_active(): tree._attempt_to_split( self, parent, parent_branch, adwin_delta=tree.adwin_confidence, seed=tree.seed, ) self.last_split_attempt_at = weight_seen
def learn_one(self, x, y, sample_weight, tree, parent, parent_branch): normalized_error = 0.0 leaf = self.filter_instance_to_leaf(x, parent, parent_branch).node if leaf is not None: y_pred = leaf.leaf_prediction(x, tree=tree) else: y_pred = parent.leaf_prediction(x, tree=tree) normalized_error = normalize_error(y, y_pred, self) # Update stats as traverse the tree to improve predictions (in case split nodes are used # to provide responses) self.stats.update(y, sample_weight) if self._adwin is None: self._adwin = ADWIN(self.adwin_delta) old_error = self.error_estimation # Update ADWIN self._error_change, _ = self._adwin.update(normalized_error) if self._error_change and old_error > self.error_estimation: self._error_change = False # Condition to build a new alternate tree if self._error_change: self._alternate_tree = tree._new_learning_node(parent=self) self._alternate_tree.depth -= 1 # To ensure we do not skip a tree level tree._n_alternate_trees += 1 # Condition to replace alternate tree elif self._alternate_tree is not None and not self._alternate_tree.error_is_null( ): if self.error_width > tree.drift_window_threshold \ and self._alternate_tree.error_width > tree.drift_window_threshold: old_error_rate = self.error_estimation alt_error_rate = self._alternate_tree.error_estimation f_delta = .05 f_n = 1.0 / self._alternate_tree.error_width + 1.0 / self.error_width try: bound = math.sqrt(2.0 * old_error_rate * (1.0 - old_error_rate) * math.log(2.0 / f_delta) * f_n) except ValueError: # error rate exceeds 1, so we clip it bound = 0. if bound < (old_error_rate - alt_error_rate): tree._n_active_leaves -= self.n_leaves tree._n_active_leaves += self._alternate_tree.n_leaves self.kill_tree_children(tree) if parent is not None: parent.set_child(parent_branch, self._alternate_tree) self._alternate_tree = None else: # Switch tree root tree._tree_root = tree._tree_root._alternate_tree tree._n_switch_alternate_trees += 1 elif bound < alt_error_rate - old_error_rate: if isinstance(self._alternate_tree, SplitNode): self._alternate_tree.kill_tree_children(tree) self._alternate_tree = None tree._n_pruned_alternate_trees += 1 # Learn one sample in alternate tree and child nodes if self._alternate_tree is not None: self._alternate_tree.learn_one(x, y, sample_weight=sample_weight, tree=tree, parent=parent, parent_branch=parent_branch) child_branch = self.instance_child_index(x) child = self.get_child(child_branch) if child is not None: child.learn_one(x, y, sample_weight=sample_weight, tree=tree, parent=self, parent_branch=child_branch) elif self.split_test.branch_for_instance(x) == -1: split_feat = self.split_test.attrs_test_depends_on()[0] # Instance contains a categorical value previously unseen by the split node if self.split_test.max_branches() == -1 and split_feat in x: # Creates a new learning node to encompass the new observed feature value leaf_node = tree._new_learning_node(parent=self) branch_id = self.split_test.add_new_branch(x[split_feat]) self.set_child(branch_id, leaf_node) tree._n_active_leaves += 1 leaf_node.learn_one(x, y, sample_weight=sample_weight, tree=tree, parent=self, parent_branch=branch_id) # The split feature is missing in the instance. Hence, we pass the new example # to the most traversed path in the current subtree else: path = max(self._children, key=lambda c: self._children[c].total_weight if self._children[c] else 0.) leaf_node = self.get_child(path) # Pass instance to the most traversed path if leaf_node is None: leaf_node = tree._new_learning_node(parent=self) self.set_child(path, leaf_node) tree._n_active_leaves += 1 leaf_node.learn_one(x, y, sample_weight=sample_weight, tree=tree, parent=self, parent_branch=path)
def learn_one( self, x, y, *, sample_weight=1.0, tree=None, parent=None, parent_branch=None ): leaf = super().traverse(x, until_leaf=True) y_pred = leaf.prediction(x, tree=tree) normalized_error = normalize_error(y, y_pred, self) # Update stats as traverse the tree to improve predictions (in case split nodes are used # to provide responses) self.stats.update(y, sample_weight) if self._adwin is None: self._adwin = ADWIN(self.adwin_delta) old_error = self.error_estimation # Update ADWIN self._error_change, _ = self._adwin.update(normalized_error) if self._error_change and old_error > self.error_estimation: self._error_change = False # Condition to build a new alternate tree if self._error_change: self._alternate_tree = tree._new_leaf(parent=self) self._alternate_tree.depth -= 1 # To ensure we do not skip a tree level tree._n_alternate_trees += 1 # Condition to replace alternate tree elif ( self._alternate_tree is not None and not self._alternate_tree.error_is_null() ): if ( self.error_width > tree.drift_window_threshold and self._alternate_tree.error_width > tree.drift_window_threshold ): old_error_rate = self.error_estimation alt_error_rate = self._alternate_tree.error_estimation f_delta = 0.05 f_n = 1.0 / self._alternate_tree.error_width + 1.0 / self.error_width try: bound = math.sqrt( 2.0 * old_error_rate * (1.0 - old_error_rate) * math.log(2.0 / f_delta) * f_n ) except ValueError: # error rate exceeds 1, so we clip it bound = 0.0 if bound < (old_error_rate - alt_error_rate): tree._n_active_leaves -= self.n_leaves tree._n_active_leaves += self._alternate_tree.n_leaves self.kill_tree_children(tree) if parent is not None: parent.children[parent_branch] = self._alternate_tree self._alternate_tree = None else: # Switch tree root tree._root = tree._root._alternate_tree tree._n_switch_alternate_trees += 1 elif bound < alt_error_rate - old_error_rate: if isinstance(self._alternate_tree, DTBranch): self._alternate_tree.kill_tree_children(tree) # noqa self._alternate_tree = None tree._n_pruned_alternate_trees += 1 # Learn one sample in alternate tree and child nodes if self._alternate_tree is not None: self._alternate_tree.learn_one( x, y, sample_weight=sample_weight, tree=tree, parent=parent, parent_branch=parent_branch, ) try: child = self.next(x) except KeyError: child = None if child is not None: child.learn_one( x, y, sample_weight=sample_weight, tree=tree, parent=self, parent_branch=self.branch_no(x), ) else: # Instance contains a categorical value previously unseen by the split node if self.max_branches() == -1 and self.feature in x: # noqa # Creates a new learning node to encompass the new observed feature value leaf = tree._new_leaf(parent=self) self.add_child(x[self.feature], leaf) # noqa tree._n_active_leaves += 1 leaf.learn_one( x, y, sample_weight=sample_weight, tree=tree, parent=self, parent_branch=self.branch_no(x), ) # The split feature is missing in the instance. Hence, we pass the new example # to the most traversed path in the current subtree else: child_id, child = self.most_common_path() child.learn_one( x, y, sample_weight=sample_weight, tree=tree, parent=self, parent_branch=child_id, )