Esempio n. 1
0
    def __init__(self, stats, depth, splitter, adwin_delta, seed, **kwargs):
        super().__init__(stats, depth, splitter, **kwargs)

        self.adwin_delta = adwin_delta
        self._adwin = ADWIN(delta=self.adwin_delta)
        self._error_change = False
        self._rng = check_random_state(seed)

        # Normalization of info monitored by drift detectors (using Welford's algorithm)
        self._error_normalizer = Var(ddof=1)
Esempio n. 2
0
    def __init__(self, stats, depth, attr_obs, attr_obs_params, leaf_model, adwin_delta, seed):
        super().__init__(stats, depth, attr_obs, attr_obs_params, leaf_model)

        self.adwin_delta = adwin_delta
        self._adwin = ADWIN(delta=self.adwin_delta)
        self.error_change = False
        self._rng = check_random_state(seed)

        # Normalization of info monitored by drift detectors (using Welford's algorithm)
        self._error_normalizer = Var(ddof=1)
Esempio n. 3
0
    def __init__(self, split_test, stats, depth, adwin_delta, seed):
        stats = stats if stats else Var()
        super().__init__(split_test, stats, depth)
        self.adwin_delta = adwin_delta
        self._adwin = ADWIN(delta=self.adwin_delta)
        self._alternate_tree = None
        self._error_change = False

        self._rng = check_random_state(seed)

        # Normalization of info monitored by drift detectors (using Welford's algorithm)
        self._error_normalizer = Var(ddof=1)
Esempio n. 4
0
    def learn_one(self,
                  x,
                  y,
                  *,
                  sample_weight=1.0,
                  tree=None,
                  parent=None,
                  parent_branch=None):
        y_pred = self.prediction(x, tree=tree)
        normalized_error = normalize_error(y, y_pred, self)

        if tree.bootstrap_sampling:
            # Perform bootstrap-sampling
            k = self._rng.poisson(1.0)
            if k > 0:
                sample_weight = sample_weight * k

        if self._adwin is None:
            self._adwin = ADWIN(delta=self.adwin_delta)

        old_error = self.error_estimation

        # Update ADWIN
        self._error_change, _ = self._adwin.update(normalized_error)

        # Error is decreasing
        if self._error_change and old_error > self.error_estimation:
            self._error_change = False

        # Update learning model
        super().learn_one(x, y, sample_weight=sample_weight, tree=tree)

        weight_seen = self.total_weight

        if weight_seen - self.last_split_attempt_at >= tree.grace_period:
            if self.depth >= tree.max_depth:
                # Depth-based pre-pruning
                self.deactivate()
                tree._n_inactive_leaves += 1
                tree._n_active_leaves -= 1
            elif self.is_active():
                tree._attempt_to_split(
                    self,
                    parent,
                    parent_branch,
                    adwin_delta=tree.adwin_confidence,
                    seed=tree.seed,
                )
                self.last_split_attempt_at = weight_seen
Esempio n. 5
0
    def learn_one(self, x, y, sample_weight, tree, parent, parent_branch):
        normalized_error = 0.0

        leaf = self.filter_instance_to_leaf(x, parent, parent_branch).node
        if leaf is not None:
            y_pred = leaf.leaf_prediction(x, tree=tree)
        else:
            y_pred = parent.leaf_prediction(x, tree=tree)

        normalized_error = normalize_error(y, y_pred, self)

        # Update stats as traverse the tree to improve predictions (in case split nodes are used
        # to provide responses)
        self.stats.update(y, sample_weight)

        if self._adwin is None:
            self._adwin = ADWIN(self.adwin_delta)

        old_error = self.error_estimation

        # Update ADWIN
        self._error_change, _ = self._adwin.update(normalized_error)

        if self._error_change and old_error > self.error_estimation:
            self._error_change = False

        # Condition to build a new alternate tree
        if self._error_change:
            self._alternate_tree = tree._new_learning_node(parent=self)
            self._alternate_tree.depth -= 1  # To ensure we do not skip a tree level
            tree._n_alternate_trees += 1

        # Condition to replace alternate tree
        elif self._alternate_tree is not None and not self._alternate_tree.error_is_null(
        ):
            if self.error_width > tree.drift_window_threshold \
                    and self._alternate_tree.error_width > tree.drift_window_threshold:
                old_error_rate = self.error_estimation
                alt_error_rate = self._alternate_tree.error_estimation
                f_delta = .05
                f_n = 1.0 / self._alternate_tree.error_width + 1.0 / self.error_width

                try:
                    bound = math.sqrt(2.0 * old_error_rate *
                                      (1.0 - old_error_rate) *
                                      math.log(2.0 / f_delta) * f_n)
                except ValueError:  # error rate exceeds 1, so we clip it
                    bound = 0.
                if bound < (old_error_rate - alt_error_rate):
                    tree._n_active_leaves -= self.n_leaves
                    tree._n_active_leaves += self._alternate_tree.n_leaves
                    self.kill_tree_children(tree)

                    if parent is not None:
                        parent.set_child(parent_branch, self._alternate_tree)
                        self._alternate_tree = None
                    else:
                        # Switch tree root
                        tree._tree_root = tree._tree_root._alternate_tree
                    tree._n_switch_alternate_trees += 1
                elif bound < alt_error_rate - old_error_rate:
                    if isinstance(self._alternate_tree, SplitNode):
                        self._alternate_tree.kill_tree_children(tree)
                    self._alternate_tree = None
                    tree._n_pruned_alternate_trees += 1

        # Learn one sample in alternate tree and child nodes
        if self._alternate_tree is not None:
            self._alternate_tree.learn_one(x,
                                           y,
                                           sample_weight=sample_weight,
                                           tree=tree,
                                           parent=parent,
                                           parent_branch=parent_branch)
        child_branch = self.instance_child_index(x)
        child = self.get_child(child_branch)
        if child is not None:
            child.learn_one(x,
                            y,
                            sample_weight=sample_weight,
                            tree=tree,
                            parent=self,
                            parent_branch=child_branch)
        elif self.split_test.branch_for_instance(x) == -1:
            split_feat = self.split_test.attrs_test_depends_on()[0]
            # Instance contains a categorical value previously unseen by the split node
            if self.split_test.max_branches() == -1 and split_feat in x:
                # Creates a new learning node to encompass the new observed feature value
                leaf_node = tree._new_learning_node(parent=self)
                branch_id = self.split_test.add_new_branch(x[split_feat])
                self.set_child(branch_id, leaf_node)
                tree._n_active_leaves += 1
                leaf_node.learn_one(x,
                                    y,
                                    sample_weight=sample_weight,
                                    tree=tree,
                                    parent=self,
                                    parent_branch=branch_id)
            # The split feature is missing in the instance. Hence, we pass the new example
            # to the most traversed path in the current subtree
            else:
                path = max(self._children,
                           key=lambda c: self._children[c].total_weight
                           if self._children[c] else 0.)
                leaf_node = self.get_child(path)
                # Pass instance to the most traversed path
                if leaf_node is None:
                    leaf_node = tree._new_learning_node(parent=self)
                    self.set_child(path, leaf_node)
                    tree._n_active_leaves += 1

                leaf_node.learn_one(x,
                                    y,
                                    sample_weight=sample_weight,
                                    tree=tree,
                                    parent=self,
                                    parent_branch=path)
Esempio n. 6
0
    def learn_one(
        self, x, y, *, sample_weight=1.0, tree=None, parent=None, parent_branch=None
    ):
        leaf = super().traverse(x, until_leaf=True)
        y_pred = leaf.prediction(x, tree=tree)
        normalized_error = normalize_error(y, y_pred, self)

        # Update stats as traverse the tree to improve predictions (in case split nodes are used
        # to provide responses)
        self.stats.update(y, sample_weight)

        if self._adwin is None:
            self._adwin = ADWIN(self.adwin_delta)

        old_error = self.error_estimation

        # Update ADWIN
        self._error_change, _ = self._adwin.update(normalized_error)

        if self._error_change and old_error > self.error_estimation:
            self._error_change = False

        # Condition to build a new alternate tree
        if self._error_change:
            self._alternate_tree = tree._new_leaf(parent=self)
            self._alternate_tree.depth -= 1  # To ensure we do not skip a tree level
            tree._n_alternate_trees += 1

        # Condition to replace alternate tree
        elif (
            self._alternate_tree is not None
            and not self._alternate_tree.error_is_null()
        ):
            if (
                self.error_width > tree.drift_window_threshold
                and self._alternate_tree.error_width > tree.drift_window_threshold
            ):
                old_error_rate = self.error_estimation
                alt_error_rate = self._alternate_tree.error_estimation
                f_delta = 0.05
                f_n = 1.0 / self._alternate_tree.error_width + 1.0 / self.error_width

                try:
                    bound = math.sqrt(
                        2.0
                        * old_error_rate
                        * (1.0 - old_error_rate)
                        * math.log(2.0 / f_delta)
                        * f_n
                    )
                except ValueError:  # error rate exceeds 1, so we clip it
                    bound = 0.0
                if bound < (old_error_rate - alt_error_rate):
                    tree._n_active_leaves -= self.n_leaves
                    tree._n_active_leaves += self._alternate_tree.n_leaves
                    self.kill_tree_children(tree)

                    if parent is not None:
                        parent.children[parent_branch] = self._alternate_tree
                        self._alternate_tree = None
                    else:
                        # Switch tree root
                        tree._root = tree._root._alternate_tree
                    tree._n_switch_alternate_trees += 1
                elif bound < alt_error_rate - old_error_rate:
                    if isinstance(self._alternate_tree, DTBranch):
                        self._alternate_tree.kill_tree_children(tree)  # noqa
                    self._alternate_tree = None
                    tree._n_pruned_alternate_trees += 1

        # Learn one sample in alternate tree and child nodes
        if self._alternate_tree is not None:
            self._alternate_tree.learn_one(
                x,
                y,
                sample_weight=sample_weight,
                tree=tree,
                parent=parent,
                parent_branch=parent_branch,
            )
        try:
            child = self.next(x)
        except KeyError:
            child = None

        if child is not None:
            child.learn_one(
                x,
                y,
                sample_weight=sample_weight,
                tree=tree,
                parent=self,
                parent_branch=self.branch_no(x),
            )
        else:
            # Instance contains a categorical value previously unseen by the split node
            if self.max_branches() == -1 and self.feature in x:  # noqa
                # Creates a new learning node to encompass the new observed feature value
                leaf = tree._new_leaf(parent=self)
                self.add_child(x[self.feature], leaf)  # noqa
                tree._n_active_leaves += 1
                leaf.learn_one(
                    x,
                    y,
                    sample_weight=sample_weight,
                    tree=tree,
                    parent=self,
                    parent_branch=self.branch_no(x),
                )
            # The split feature is missing in the instance. Hence, we pass the new example
            # to the most traversed path in the current subtree
            else:
                child_id, child = self.most_common_path()
                child.learn_one(
                    x,
                    y,
                    sample_weight=sample_weight,
                    tree=tree,
                    parent=self,
                    parent_branch=child_id,
                )