예제 #1
0
    def learn_one(self, X, y, weight, tree, parent, parent_branch):
        true_class = y

        if tree.bootstrap_sampling:
            # Perform bootstrap-sampling
            k = self._random_state.poisson(1.0)
            if k > 0:
                weight = weight * k

        class_prediction = get_max_value_key(self.predict_one(X, tree=tree))

        is_correct = (true_class == class_prediction)

        if self._adwin is None:
            self._adwin = ADWIN()

        old_error = self.error_estimation

        # Add element to ADWIN
        self._adwin.add_element(0.0 if is_correct else 1.0)
        # Detect change with Adwin
        self.error_change = self._adwin.detected_change()

        if self.error_change and old_error > self.error_estimation:
            self.error_change = False

        # Update statistics
        super().learn_one(X, y, weight=weight, tree=tree)

        weight_seen = self.total_weight

        if weight_seen - self.last_split_attempt_at >= tree.grace_period:
            tree._attempt_to_split(self, parent, parent_branch)
            self.last_split_attempt_at = weight_seen
예제 #2
0
    def learn_from_instance(self, X, y, weight, hat, parent, parent_branch):
        true_class = y

        k = self._classifier_random.poisson(1.0)
        if k > 0:
            weight = weight * k

        class_prediction = get_max_value_key(self.get_class_votes(X, hat))

        bl_correct = (true_class == class_prediction)

        if self._estimation_error_weight is None:
            self._estimation_error_weight = ADWIN()

        old_error = self.get_error_estimation()

        # Add element to Adwin
        add = 0.0 if (bl_correct is True) else 1.0

        self._estimation_error_weight.add_element(add)
        # Detect change with Adwin
        self.error_change = self._estimation_error_weight.detected_change()

        if self.error_change is True and old_error > self.get_error_estimation():
            self.error_change = False

        # Update statistics
        super().learn_from_instance(X, y, weight, hat)

        # call ActiveLearningNode
        weight_seen = self.get_weight_seen()

        if weight_seen - self.get_weight_seen_at_last_split_evaluation() >= hat.grace_period:
            hat._attempt_to_split(self, parent, parent_branch)
            self.set_weight_seen_at_last_split_evaluation(weight_seen)
예제 #3
0
    def learn_from_instance(self, X, y, weight, hat, parent, parent_branch):
        true_class = y
        class_prediction = 0

        leaf = self.filter_instance_to_leaf(X, parent, parent_branch)
        if leaf.node is not None:
            class_prediction = get_max_value_key(
                leaf.node.get_class_votes(X, hat))

        bl_correct = (true_class == class_prediction)

        if self._estimation_error_weight is None:
            self._estimation_error_weight = ADWIN()

        old_error = self.get_error_estimation()

        # Add element to ADWIN
        add = 0.0 if (bl_correct is True) else 1.0

        self._estimation_error_weight.add_element(add)
        # Detect change with ADWIN
        self.error_change = self._estimation_error_weight.detected_change()

        if self.error_change is True and old_error > self.get_error_estimation(
        ):
            self.error_change = False

        # Check condition to build a new alternate tree
        if self.error_change is True:
            self._alternate_tree = hat._new_learning_node()
            hat.alternate_trees_cnt += 1

        # Condition to replace alternate tree
        elif self._alternate_tree is not None and self._alternate_tree.is_null_error(
        ) is False:
            if self.get_error_width() > ERROR_WIDTH_THRESHOLD \
                    and self._alternate_tree.get_error_width() > ERROR_WIDTH_THRESHOLD:
                old_error_rate = self.get_error_estimation()
                alt_error_rate = self._alternate_tree.get_error_estimation()
                fDelta = .05
                fN = 1.0 / self._alternate_tree.get_error_width() + 1.0 / (
                    self.get_error_width())

                bound = math.sqrt(2.0 * old_error_rate *
                                  (1.0 - old_error_rate) *
                                  math.log(2.0 / fDelta) * fN)
                # To check, bound never less than (old_error_rate - alt_error_rate)
                if bound < (old_error_rate - alt_error_rate):
                    hat._active_leaf_node_cnt -= self.number_leaves()
                    hat._active_leaf_node_cnt += self._alternate_tree.number_leaves(
                    )
                    self.kill_tree_children(hat)

                    if parent is not None:
                        parent.set_child(parent_branch, self._alternate_tree)
                    else:
                        # Switch tree root
                        hat._tree_root = hat._tree_root.alternateTree
                    hat.switch_alternate_trees_cnt += 1
                elif bound < alt_error_rate - old_error_rate:
                    if isinstance(self._alternate_tree, ActiveLearningNode):
                        self._alternate_tree = None
                    elif isinstance(self._alternate_tree,
                                    InactiveLearningNode):
                        self._alternate_tree = None
                    else:
                        self._alternate_tree.kill_tree_children(hat)
                    hat.pruned_alternate_trees_cnt += 1  # hat.pruned_alternate_trees_cnt to check

        # Learn_From_Instance alternate Tree and Child nodes
        if self._alternate_tree is not None:
            self._alternate_tree.learn_from_instance(X, y, weight, hat, parent,
                                                     parent_branch)
        child_branch = self.instance_child_index(X)
        child = self.get_child(child_branch)
        if child is not None:
            child.learn_from_instance(X, y, weight, hat, self, child_branch)
        # Instance contains a categorical value previously unseen by the split
        # node
        elif isinstance(self.get_split_test(), NominalAttributeMultiwayTest) and \
                self.get_split_test().branch_for_instance(X) < 0:
            # Creates a new learning node to encompass the new observed feature
            # value
            leaf_node = hat._new_learning_node()
            branch_id = self.get_split_test().add_new_branch(
                X[self.get_split_test().get_atts_test_depends_on()[0]])
            self.set_child(branch_id, leaf_node)
            hat._active_leaf_node_cnt += 1
            leaf_node.learn_from_instance(X, y, weight, hat, parent,
                                          parent_branch)
예제 #4
0
    def learn_one(self, X, y, weight, tree, parent, parent_branch):
        true_class = y
        class_prediction = 0

        leaf = self.filter_instance_to_leaf(X, parent, parent_branch)
        if leaf.node is not None:
            class_prediction = get_max_value_key(
                leaf.node.predict_one(X, tree=tree))

        is_correct = (true_class == class_prediction)

        if self._adwin is None:
            self._adwin = ADWIN()

        old_error = self.error_estimation

        # Add element to ADWIN
        add = 0.0 if is_correct else 1.0

        self._adwin.add_element(add)
        # Detect change with ADWIN
        self.error_change = self._adwin.detected_change()

        if self.error_change and old_error > self.error_estimation:
            self.error_change = False

        # Check condition to build a new alternate tree
        if self.error_change:
            self._alternate_tree = tree._new_learning_node()
            tree.alternate_trees_cnt += 1

        # Condition to replace alternate tree
        elif self._alternate_tree is not None and not self._alternate_tree.error_is_null(
        ):
            if self.error_width > tree._ERROR_WIDTH_THRESHOLD \
                    and self._alternate_tree.error_width > tree._ERROR_WIDTH_THRESHOLD:
                old_error_rate = self.error_estimation
                alt_error_rate = self._alternate_tree.error_estimation
                fDelta = .05
                fN = 1.0 / self._alternate_tree.error_width + 1.0 / self.error_width

                bound = math.sqrt(2.0 * old_error_rate *
                                  (1.0 - old_error_rate) *
                                  math.log(2.0 / fDelta) * fN)
                # To check, bound never less than (old_error_rate - alt_error_rate)
                if bound < (old_error_rate - alt_error_rate):
                    tree._active_leaf_node_cnt -= self.n_leaves
                    tree._active_leaf_node_cnt += self._alternate_tree.n_leaves
                    self.kill_tree_children(tree)

                    if parent is not None:
                        parent.set_child(parent_branch, self._alternate_tree)
                    else:
                        # Switch tree root
                        tree._tree_root = tree._tree_root._alternate_tree
                    tree.switch_alternate_trees_cnt += 1
                elif bound < alt_error_rate - old_error_rate:
                    if isinstance(self._alternate_tree, SplitNode):
                        self._alternate_tree.kill_tree_children(tree)
                    else:
                        self._alternate_tree = None
                    tree.pruned_alternate_trees_cnt += 1  # hat.pruned_alternate_trees_cnt to check

        # Learn one sample in alternate tree and child nodes
        if self._alternate_tree is not None:
            self._alternate_tree.learn_one(X, y, weight, tree, parent,
                                           parent_branch)
        child_branch = self.instance_child_index(X)
        child = self.get_child(child_branch)
        if child is not None:
            child.learn_one(X,
                            y,
                            weight,
                            tree,
                            parent=self,
                            parent_branch=child_branch)
        # Instance contains a categorical value previously unseen by the split
        # node
        elif isinstance(self.split_test, NominalAttributeMultiwayTest) and \
                self.split_test.branch_for_instance(X) < 0:
            # Creates a new learning node to encompass the new observed feature
            # value
            leaf_node = tree._new_learning_node()
            branch_id = self.split_test.add_new_branch(
                X[self.split_test.get_atts_test_depends_on()[0]])
            self.set_child(branch_id, leaf_node)
            tree._active_leaf_node_cnt += 1
            leaf_node.learn_one(X, y, weight, tree, parent, parent_branch)
    def learn_from_instance(self, X, y, weight, hat, parent, parent_branch):
        true_class = y
        class_prediction = 0

        leaf = self.filter_instance_to_leaf(X, parent, parent_branch)
        if leaf.node is not None:
            class_prediction = get_max_value_key(leaf.node.get_class_votes(X, hat))

        bl_correct = (true_class == class_prediction)

        if self._estimation_error_weight is None:
            self._estimation_error_weight = ADWIN()

        old_error = self.get_error_estimation()

        # Add element to ADWIN
        add = 0.0 if (bl_correct is True) else 1.0

        self._estimation_error_weight.add_element(add)
        # Detect change with ADWIN
        self.error_change = self._estimation_error_weight.detected_change()

        if self.error_change is True and old_error > self.get_error_estimation():
            self.error_change = False

        # Check condition to build a new alternate tree
        if self.error_change is True:
            self._alternate_tree = hat._new_learning_node()
            hat.alternate_trees_cnt += 1

        # Condition to replace alternate tree
        elif self._alternate_tree is not None and self._alternate_tree.is_null_error() is False:
            if self.get_error_width() > ERROR_WIDTH_THRESHOLD \
                    and self._alternate_tree.get_error_width() > ERROR_WIDTH_THRESHOLD:
                old_error_rate = self.get_error_estimation()
                alt_error_rate = self._alternate_tree.get_error_estimation()
                fDelta = .05
                fN = 1.0 / self._alternate_tree.get_error_width() + 1.0 / (self.get_error_width())

                bound = math.sqrt(2.0 * old_error_rate * (1.0 - old_error_rate) * math.log(2.0 / fDelta) * fN)
                # To check, bound never less than (old_error_rate - alt_error_rate)
                if bound < (old_error_rate - alt_error_rate):
                    hat._active_leaf_node_cnt -= self.number_leaves()
                    hat._active_leaf_node_cnt += self._alternate_tree.number_leaves()
                    self.kill_tree_children(hat)

                    if parent is not None:
                        parent.set_child(parent_branch, self._alternate_tree)
                    else:
                        # Switch tree root
                        hat._tree_root = hat._tree_root.alternateTree
                    hat.switch_alternate_trees_cnt += 1
                elif bound < alt_error_rate - old_error_rate:
                    if isinstance(self._alternate_tree, ActiveLearningNode):
                        self._alternate_tree = None
                    elif isinstance(self._alternate_tree, InactiveLearningNode):
                        self._alternate_tree = None
                    else:
                        self._alternate_tree.kill_tree_children(hat)
                    hat.pruned_alternate_trees_cnt += 1  # hat.pruned_alternate_trees_cnt to check

        # Learn_From_Instance alternate Tree and Child nodes
        if self._alternate_tree is not None:
            self._alternate_tree.learn_from_instance(X, y, weight, hat, parent, parent_branch)
        child_branch = self.instance_child_index(X)
        child = self.get_child(child_branch)
        if child is not None:
            child.learn_from_instance(X, y, weight, hat, parent, parent_branch)