def _find_best_split(self, X, target, n_features): """Find best feature and value for a split. Greedy algorithm.""" # Sample random subset of features subset = random.sample(list(range(0, X.shape[1])), n_features) max_gain, max_col, max_val = None, None, None for column in subset: split_values = self._find_splits(X[:, column]) for value in split_values: if self.loss is None: # Random forest splits = split(X[:, column], target["y"], value) gain = self.criterion(target["y"], splits) else: # Gradient boosting left, right = split_dataset(X, target, column, value, return_X=False) gain = xgb_criterion(target, left, right, self.loss) if (max_gain is None) or (gain > max_gain): max_col, max_val, max_gain = column, value, gain return max_col, max_val, max_gain
def _find_best_split(self, X, target, n_features): """Find best feature and value for a split. Greedy algorithm.""" # Sample random subset of features subset = random.sample(list(range(0, X.shape[1])), n_features) max_gain, max_col, max_val = None, None, None for column in subset: split_values = self._find_splits(X[:, column]) for value in split_values: if self.loss is None: # Random forest splits = split(X[:, column], target['y'], value) gain = self.criterion(target['y'], splits) else: # Gradient boosting left, right = split_dataset(X, target, column, value, return_X=False) gain = xgb_criterion(target, left, right, self.loss) if (max_gain is None) or (gain > max_gain): max_col, max_val, max_gain = column, value, gain return max_col, max_val, max_gain