Ejemplo n.º 1
0
    def fit_tree(self, X, y, depth=0):
        """Fit a decision tree with recursive splitting on nodes.

        Args:
            X (ndarray): training set without class labels.
            y (ndarray) : class labels for training set.
            depth (int) : starting depth of decision tree.
        Returns:
            tree (Node): root node of learned decision tree.
        """
        # Get number of training observations in current node with each class label 0 and 1.
        class_distribution = [np.sum(y == i) for i in range(self.n_classes)]

        # Instantiate node to grow the decision tree.
        tree = Node(n=y.size,
                    class_distribution=class_distribution,
                    gini_index=_gini(y, self.n_classes))

        # Perform recursive splitting to max depth.
        if depth < self.max_depth:
            gini_index, split_index = self.get_split(X, y)
            # Get indices for data and class labels to go to the left child, send the rest to the right child.
            if split_index is not None:
                index_left = (X[:, split_index] == 1)
                X_left, y_left = X[index_left], y[index_left]
                X_right, y_right = X[~index_left], y[~index_left]
                tree.gini_index = gini_index
                tree.feature_index = split_index
                depth += 1
                tree.left = self.fit_tree(X_left, y_left, depth=depth)
                tree.right = self.fit_tree(X_right, y_right, depth=depth)
        return tree
Ejemplo n.º 2
0
    def fit_tree(self, X, y, weights, depth=0):
        """Fit a decision tree with recursive splitting on nodes, takes additional weight argument for AdaBoost.

        Args:
            X (ndarray): training set without class labels.
            y (ndarray): class labels for training set.
            weights (ndarray): weights for each training instance.
            depth (int): starting depth of decision tree.
        Returns:
            tree (Node): root node of learned decision tree.
        """
        # Get sum of weights from each class for the class distribution in current node.
        D = weights
        class_weights = [np.sum(D * (y == i)) for i in range(self.n_classes)]

        # Instantiate node to grow the decision tree.
        tree = Node(n=y.size,
                    class_distribution=class_weights,
                    gini_index=_gini(y, self.n_classes, weights=D))

        # Perform recursive splitting to max depth.
        if depth < self.max_depth:
            gini_index, split_index = self.get_split(X, y, weights=D)
            # Get indices for data, class labels, and weights to go to the left child, send the rest to the right child.
            if split_index is not None:
                index_left = (X[:, split_index] == 1)
                X_left, y_left, D_left = X[index_left], y[index_left], D[
                    index_left]
                X_right, y_right, D_right = X[~index_left], y[~index_left], D[
                    ~index_left]
                tree.gini_index = gini_index
                tree.feature_index = split_index
                depth += 1
                tree.left = self.fit_tree(X_left,
                                          y_left,
                                          weights=D_left,
                                          depth=depth)
                tree.right = self.fit_tree(X_right,
                                           y_right,
                                           weights=D_right,
                                           depth=depth)
        return tree