def _build_tree(self, X, y): ''' INPUT: - X: 2d numpy array - y: 1d numpy array OUTPUT: - TreeNode Recursively build the decision tree. Return the root node. ''' node = TreeNode() index, value, splits = self._choose_split_index(X, y) if index is None or len(np.unique(y)) == 1: node.leaf = True node.classes = Counter(y) node.name = node.classes.most_common(1)[0][0] else: X1, y1, X2, y2 = splits node.column = index node.name = self.feature_names[index] node.value = value node.categorical = self.categorical[index] node.left = self._build_tree(X1, y1) node.right = self._build_tree(X2, y2) return node
def _build_tree(self, X, y): """Recursively build the decision tree. Return the root node. Parameters ---------- X: 2d numpy array, shape = [n_samples, n_features] The training data. y: 1d numpy array, shape = [n_samples] Returns ------- TreeNode """ node = TreeNode() index, value, splits = self._choose_split_index(X, y) if index is None or len(np.unique(y)) == 1: node.leaf = True node.classes = Counter(y) node.name = node.classes.most_common(1)[0][0] else: X1, y1, X2, y2 = splits node.column = index node.name = self.feature_names[index] node.value = value node.categorical = self.categorical[index] node.left = self._build_tree(X1, y1) node.right = self._build_tree(X2, y2) return node
def _build_tree(self, X, y): ''' INPUT: - X: 2d numpy array - y: 1d numpy array OUTPUT: - TreeNode Recursively build the decision tree. Return the root node. ''' # * initialize a root TreeNode node = TreeNode() # * set index, value, splits as the output of self._choose_split_index(X,y) index, value, splits = self._choose_split_index(X, y) # * if splits is not None if splits is not None: # * self._pre_prune the tree and set the output flag to preprune parameter preprune = self._pre_prune(y, splits, depth=self.level) # * else else: # the preprune flag is False preprune = False # if no index is returned from the split index or we cannot split or # self.preprune = True if index is None or len(np.unique(y)) == 1 or preprune: # * set the node to be a leaf node.leaf = True # * set the classes attribute to the number of classes # * we have in this leaf with Counter() node.classes = Counter(y) # * set the name of the node to be the most common class in it node.name = node.classes.most_common(1)[0][0] else: # otherwise we can split (again this comes out of choose_split_index # * set X1, y1, X2, y2 to be the splits X1, y1, X2, y2 = splits # * the node column should be set to the index coming from split_index node.column = index # * the node name is the feature name as determined by # the index (column name) node.name = self.feature_names[index] # * set the node value to be the value of the split node.value = value # * set the categorical flag of the node to be the category of the column node.categorical = self.categorical[index] # * now continue recursing down both branches of the split node.left = self._build_tree(X1, y1) node.right = self._build_tree(X2, y2) self.tree_level += 1 return node
def _build_tree(self, X, y): ''' INPUT: - X: 2d numpy array - y: 1d numpy array OUTPUT: - TreeNode Recursively build the decision tree. Return the root node. ''' # * initialize a root TreeNode node = TreeNode() # * set index, value, splits as the output of self._choose_split_index(X,y) index, value, splits = self._choose_split_index(X,y) # if no index is returned from the split index or we cannot split if index is None or len(np.unique(y)) == 1: # * set the node to be a leaf node.leaf = True # * set the classes attribute to the number of classes node.classes = Counter(y) # * we have in this leaf with Counter() # * set the name of the node to be the most common class in it node.name = node.classes.most_common(1)[0][0] else: # otherwise we can split (again this comes out of choose_split_index # * set X1, y1, X2, y2 to be the splits X1, y1, X2, y2 = splits # * the node column should be set to the index coming from split_index node.column = index # * the node name is the feature name as determined by # the index (column name) node.name = self.feature_names[index] # * set the node value to be the value of the split node.value = value # * set the categorical flag of the node to be the category of the column node.categorical = self.categorical[index] # * now continue recursing down both branches of the split node.left = self._build_tree(X1, y1) node.right = self._build_tree(X2, y2) return node
def _build_tree(self, X, y, pre_prune_type, pre_prune_size): ''' INPUT: - X: 2d numpy array - y: 1d numpy array OUTPUT: - TreeNode Recursively build the decision tree. Return the root node. ''' if pre_prune_type == 'leaf_size': leaf_size = pre_prune_size else: leaf_size = 1 if pre_prune_type == 'depth': tree_depth = pre_prune_size else: tree_depth = X.shape[0]*X.shape[1] node = TreeNode() index, value, splits = self._choose_split_index(X, y) if index is None or len(np.unique(y)) == 1 or len(y) < leaf_size or \ self.depth > tree_depth: node.leaf = True node.classes = Counter(y) node.name = node.classes.most_common(1)[0][0] else: self.depth += 1 X1, y1, X2, y2 = splits node.column = index node.name = self.feature_names[index] node.value = value node.categorical = self.categorical[index] node.left = self._build_tree(X1, y1, pre_prune_type, pre_prune_size) node.right = self._build_tree(X2, y2, pre_prune_type, pre_prune_size) return node