コード例 #1
0
ファイル: DecisionTree.py プロジェクト: yangjici/GalvanizeDSI
    def _build_tree(self, X, y):
        '''
        INPUT:
            - X: 2d numpy array
            - y: 1d numpy array
        OUTPUT:
            - TreeNode

        Recursively build the decision tree. Return the root node.
        '''

        node = TreeNode()
        index, value, splits = self._choose_split_index(X, y)

        if index is None or len(np.unique(y)) == 1:
            node.leaf = True
            node.classes = Counter(y)
            node.name = node.classes.most_common(1)[0][0]
        else:
            X1, y1, X2, y2 = splits
            node.column = index
            node.name = self.feature_names[index]
            node.value = value
            node.categorical = self.categorical[index]
            node.left = self._build_tree(X1, y1)
            node.right = self._build_tree(X2, y2)
        return node
コード例 #2
0
    def _build_tree(self, X, y):
        '''
        INPUT:
            - X: 2d numpy array
            - y: 1d numpy array
        OUTPUT:
            - TreeNode

        Recursively build the decision tree. Return the root node.
        '''

        node = TreeNode()
        index, value, splits = self._choose_split_index(X, y)

        if index is None or len(np.unique(y)) == 1:
            node.leaf = True
            node.classes = Counter(y)
            node.name = node.classes.most_common(1)[0][0]
        else:
            X1, y1, X2, y2 = splits
            node.column = index
            node.name = self.feature_names[index]
            node.value = value
            node.categorical = self.categorical[index]
            node.left = self._build_tree(X1, y1)
            node.right = self._build_tree(X2, y2)
        return node
コード例 #3
0
    def _build_tree(self, X, y):
        """Recursively build the decision tree.
        Return the root node.
        Parameters
        ----------
        X: 2d numpy array, shape = [n_samples, n_features]
            The training data.
        y: 1d numpy array, shape = [n_samples]
        Returns
        -------
        TreeNode
        """
        node = TreeNode()
        index, value, splits = self._choose_split_index(X, y)

        if index is None or len(np.unique(y)) == 1:
            node.leaf = True
            node.classes = Counter(y)
            node.name = node.classes.most_common(1)[0][0]
        else:
            X1, y1, X2, y2 = splits
            node.column = index
            node.name = self.feature_names[index]
            node.value = value
            node.categorical = self.categorical[index]
            node.left = self._build_tree(X1, y1)
            node.right = self._build_tree(X2, y2)
        return node
コード例 #4
0
    def _build_tree(self, X, y):
        '''
        INPUT:
            - X: 2d numpy array
            - y: 1d numpy array
        OUTPUT:
            - TreeNode
        Recursively build the decision tree. Return the root node.
        '''

        #  * initialize a root TreeNode
        node = TreeNode()

        # * set index, value, splits as the output of self._choose_split_index(X,y)
        index, value, splits = self._choose_split_index(X, y)

        # * if splits is not None
        if splits is not None:
            # * self._pre_prune the tree and set the output flag to preprune parameter
            preprune = self._pre_prune(y, splits, depth=self.level)
        # * else
        else:
            # the preprune flag is False
            preprune = False

        # if no index is returned from the split index or we cannot split or
        # self.preprune = True
        if index is None or len(np.unique(y)) == 1 or preprune:
            # * set the node to be a leaf
            node.leaf = True
            # * set the classes attribute to the number of classes
            # * we have in this leaf with Counter()
            node.classes = Counter(y)
            # * set the name of the node to be the most common class in it
            node.name = node.classes.most_common(1)[0][0]

        else:  # otherwise we can split (again this comes out of choose_split_index
            # * set X1, y1, X2, y2 to be the splits
            X1, y1, X2, y2 = splits

            # * the node column should be set to the index coming from split_index
            node.column = index

            # * the node name is the feature name as determined by
            #   the index (column name)
            node.name = self.feature_names[index]

            # * set the node value to be the value of the split
            node.value = value

            # * set the categorical flag of the node to be the category of the column
            node.categorical = self.categorical[index]

            # * now continue recursing down both branches of the split
            node.left = self._build_tree(X1, y1)
            node.right = self._build_tree(X2, y2)
        self.tree_level += 1
        return node
コード例 #5
0
    def _build_tree(self, X, y):
        '''
        INPUT:
            - X: 2d numpy array
            - y: 1d numpy array
        OUTPUT:
            - TreeNode
        Recursively build the decision tree. Return the root node.
        '''

        #  * initialize a root TreeNode
        node = TreeNode()
        # * set index, value, splits as the output of self._choose_split_index(X,y)
        index, value, splits = self._choose_split_index(X,y)
        # if no index is returned from the split index or we cannot split
        if index is None or len(np.unique(y)) == 1:
            # * set the node to be a leaf
            node.leaf = True
            # * set the classes attribute to the number of classes
            node.classes = Counter(y)
            # * we have in this leaf with Counter()
            # * set the name of the node to be the most common class in it
            node.name = node.classes.most_common(1)[0][0]
        else: # otherwise we can split (again this comes out of choose_split_index
            # * set X1, y1, X2, y2 to be the splits
            X1, y1, X2, y2 = splits
            # * the node column should be set to the index coming from split_index
            node.column = index
            # * the node name is the feature name as determined by
            #   the index (column name)
            node.name = self.feature_names[index]
            # * set the node value to be the value of the split
            node.value = value
            # * set the categorical flag of the node to be the category of the column
            node.categorical = self.categorical[index]
            # * now continue recursing down both branches of the split
            node.left = self._build_tree(X1, y1)
            node.right = self._build_tree(X2, y2)
        return node
コード例 #6
0
    def _build_tree(self, X, y, pre_prune_type, pre_prune_size):
        '''
        INPUT:
            - X: 2d numpy array
            - y: 1d numpy array
        OUTPUT:
            - TreeNode

        Recursively build the decision tree. Return the root node.
        '''

        if pre_prune_type == 'leaf_size':
            leaf_size = pre_prune_size
        else:
            leaf_size = 1

        if pre_prune_type == 'depth':
            tree_depth = pre_prune_size
        else:
            tree_depth = X.shape[0]*X.shape[1]

        node = TreeNode()
        index, value, splits = self._choose_split_index(X, y)

        if index is None or len(np.unique(y)) == 1 or len(y) < leaf_size or \
        self.depth > tree_depth:
            node.leaf = True
            node.classes = Counter(y)
            node.name = node.classes.most_common(1)[0][0]
        else:
            self.depth += 1
            X1, y1, X2, y2 = splits
            node.column = index
            node.name = self.feature_names[index]
            node.value = value
            node.categorical = self.categorical[index]
            node.left = self._build_tree(X1, y1, pre_prune_type, pre_prune_size)
            node.right = self._build_tree(X2, y2, pre_prune_type, pre_prune_size)
        return node