Example #1
0
        def fit(self, X, y, sample_weight=None, check_input=True,
                X_idx_sorted=None):

            random_state = check_random_state(self.random_state)

            if self.ccp_alpha < 0.0:
                raise ValueError(
                    "ccp_alpha must be greater than or equal to 0")

            if check_input:
                # Need to validate separately here.
                # We can't pass multi_ouput=True because that would allow y to be
                # csr.
                check_X_params = dict(dtype=DTYPE, accept_sparse="csc")
                check_y_params = dict(ensure_2d=False, dtype=None)
                X, y = self._validate_data(X, y,
                                           validate_separately=(check_X_params,
                                                                check_y_params))
                if issparse(X):
                    X.sort_indices()

                    if X.indices.dtype != np.intc or X.indptr.dtype != np.intc:
                        raise ValueError("No support for np.int64 index based "
                                         "sparse matrices")

            # Determine output settings
            n_samples, self.n_features_ = X.shape
            is_classification = is_classifier(self)

            y = np.atleast_1d(y)
            expanded_class_weight = None

            if y.ndim == 1:
                # reshape is necessary to preserve the data contiguity against vs
                # [:, np.newaxis] that does not.
                y = np.reshape(y, (-1, 1))

            self.n_outputs_ = y.shape[1]

            if is_classification:
                check_classification_targets(y)
                y = np.copy(y)
                # print(y)
                self.classes_ = []
                self.n_classes_ = []

                if self.class_weight is not None:
                    y_original = np.copy(y)

                y_encoded = np.zeros(y.shape, dtype=np.int)
                for k in range(self.n_outputs_):
                    classes_k, y_encoded[:, k] = np.unique(y[:, k],
                                                           return_inverse=True)
                    self.classes_.append(classes_k)
                    self.n_classes_.append(classes_k.shape[0])
                y = y_encoded

                if self.class_weight is not None:
                    expanded_class_weight = compute_sample_weight(
                        self.class_weight, y_original)

                self.n_classes_ = np.array(self.n_classes_, dtype=np.intp)

            if getattr(y, "dtype", None) != DOUBLE or not y.flags.contiguous:
                y = np.ascontiguousarray(y, dtype=DOUBLE)

            # Check parameters
            max_depth = (np.iinfo(np.int32).max if self.max_depth is None
                         else self.max_depth)
            max_leaf_nodes = (-1 if self.max_leaf_nodes is None
                              else self.max_leaf_nodes)

            if isinstance(self.min_samples_leaf, numbers.Integral):
                if not 1 <= self.min_samples_leaf:
                    raise ValueError("min_samples_leaf must be at least 1 "
                                     "or in (0, 0.5], got %s"
                                     % self.min_samples_leaf)
                min_samples_leaf = self.min_samples_leaf
            else:  # float
                if not 0. < self.min_samples_leaf <= 0.5:
                    raise ValueError("min_samples_leaf must be at least 1 "
                                     "or in (0, 0.5], got %s"
                                     % self.min_samples_leaf)
                min_samples_leaf = int(ceil(self.min_samples_leaf * n_samples))

            if isinstance(self.min_samples_split, numbers.Integral):
                if not 2 <= self.min_samples_split:
                    raise ValueError("min_samples_split must be an integer "
                                     "greater than 1 or a float in (0.0, 1.0]; "
                                     "got the integer %s"
                                     % self.min_samples_split)
                min_samples_split = self.min_samples_split
            else:  # float
                if not 0. < self.min_samples_split <= 1.:
                    raise ValueError("min_samples_split must be an integer "
                                     "greater than 1 or a float in (0.0, 1.0]; "
                                     "got the float %s"
                                     % self.min_samples_split)
                min_samples_split = int(
                    ceil(self.min_samples_split * n_samples))
                min_samples_split = max(2, min_samples_split)

            min_samples_split = max(min_samples_split, 2 * min_samples_leaf)

            if isinstance(self.max_features, str):
                if self.max_features == "auto":
                    if is_classification:
                        max_features = max(1, int(np.sqrt(self.n_features_)))
                    else:
                        max_features = self.n_features_
                elif self.max_features == "sqrt":
                    max_features = max(1, int(np.sqrt(self.n_features_)))
                elif self.max_features == "log2":
                    max_features = max(1, int(np.log2(self.n_features_)))
                else:
                    raise ValueError("Invalid value for max_features. "
                                     "Allowed string values are 'auto', "
                                     "'sqrt' or 'log2'.")
            elif self.max_features is None:
                max_features = self.n_features_
            elif isinstance(self.max_features, numbers.Integral):
                max_features = self.max_features
            else:  # float
                if self.max_features > 0.0:
                    max_features = max(1,
                                       int(self.max_features * self.n_features_))
                else:
                    max_features = 0

            self.max_features_ = max_features

            if len(y) != n_samples:
                raise ValueError("Number of labels=%d does not match "
                                 "number of samples=%d" % (len(y), n_samples))
            if not 0 <= self.min_weight_fraction_leaf <= 0.5:
                raise ValueError("min_weight_fraction_leaf must in [0, 0.5]")
            if max_depth <= 0:
                raise ValueError("max_depth must be greater than zero. ")
            if not (0 < max_features <= self.n_features_):
                raise ValueError("max_features must be in (0, n_features]")
            if not isinstance(max_leaf_nodes, numbers.Integral):
                raise ValueError("max_leaf_nodes must be integral number but was "
                                 "%r" % max_leaf_nodes)
            if -1 < max_leaf_nodes < 2:
                raise ValueError(("max_leaf_nodes {0} must be either None "
                                  "or larger than 1").format(max_leaf_nodes))

            if sample_weight is not None:
                sample_weight = _check_sample_weight(sample_weight, X, DOUBLE)

            if expanded_class_weight is not None:
                if sample_weight is not None:
                    sample_weight = sample_weight * expanded_class_weight
                else:
                    sample_weight = expanded_class_weight

            # Set min_weight_leaf from min_weight_fraction_leaf
            if sample_weight is None:
                min_weight_leaf = (self.min_weight_fraction_leaf *
                                   n_samples)
            else:
                min_weight_leaf = (self.min_weight_fraction_leaf *
                                   np.sum(sample_weight))

            min_impurity_split = self.min_impurity_split
            if min_impurity_split is not None:
                warnings.warn("The min_impurity_split parameter is deprecated. "
                              "Its default value has changed from 1e-7 to 0 in "
                              "version 0.23, and it will be removed in 0.25. "
                              "Use the min_impurity_decrease parameter instead.",
                              FutureWarning)

                if min_impurity_split < 0.:
                    raise ValueError("min_impurity_split must be greater than "
                                     "or equal to 0")
            else:
                min_impurity_split = 0

            if self.min_impurity_decrease < 0.:
                raise ValueError("min_impurity_decrease must be greater than "
                                 "or equal to 0")

            if self.presort != 'deprecated':
                warnings.warn("The parameter 'presort' is deprecated and has no "
                              "effect. It will be removed in v0.24. You can "
                              "suppress this warning by not passing any value "
                              "to the 'presort' parameter.",
                              FutureWarning)

            # Build tree
            criterion = self.criterion
            if not isinstance(criterion, Criterion):
                if is_classification:
                    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
                                                             self.n_classes_)
                else:
                    criterion = CRITERIA_REG[self.criterion](self.n_outputs_,
                                                             n_samples)

            SPLITTERS = SPARSE_SPLITTERS if issparse(X) else DENSE_SPLITTERS

            splitter = self.splitter
            if not isinstance(self.splitter, Splitter):
                splitter = SPLITTERS[self.splitter](criterion,
                                                    self.max_features_,
                                                    min_samples_leaf,
                                                    min_weight_leaf,
                                                    random_state)

            if is_classifier(self):
                self.tree_ = Tree(self.n_features_,
                                  self.n_classes_, self.n_outputs_)

            else:
                self.tree_ = Tree(self.n_features_,
                                  # TODO: tree should't need this in this case
                                  np.array([1] * self.n_outputs_,
                                           dtype=np.intp),
                                  self.n_outputs_)

            # Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
            if max_leaf_nodes < 0:
                builder = DepthFirstTreeBuilder(splitter, min_samples_split,
                                                min_samples_leaf,
                                                min_weight_leaf,
                                                max_depth,
                                                self.min_impurity_decrease,
                                                min_impurity_split)
            else:
                builder = BestFirstTreeBuilder(splitter, min_samples_split,
                                               min_samples_leaf,
                                               min_weight_leaf,
                                               max_depth,
                                               max_leaf_nodes,
                                               self.min_impurity_decrease,
                                               min_impurity_split)

            builder.build(self.tree_, X, y, sample_weight, X_idx_sorted)
            # print(self.tree_.children_left.shape)
            if self.n_outputs_ == 1 and is_classifier(self):
                self.n_classes_ = self.n_classes_[0]
                self.classes_ = self.classes_[0]
            # print(self.tree_.weighted_n_node_samples)

            e = self.e
            # print(e)
            # for i in range(self.tree_.value.shape[0]):
            #
            # 	for j in range(self.tree_.value.shape[2]):
            #
            # 		self.e = e /((self.tree_.value[i][0][j] + max_depth))
            # 		#print(self.tree_.value[i][0][j])
            # 		self.tree_.value[i][0][j] = self.addNoise(self.tree_.value[i][0][j])
            # 		#print(self.tree_.value[i][0][j])

            # print(self.tree_.value[0][0])

            for i in range(self.tree_.value.shape[0]):
                fr = np.sum(self.tree_.value[i][0])
                self.e = e / (fr + max_depth)
                self.tree_.value[i][0] = self.addNoise(self.tree_.value[i][0])

            self._prune_tree()
            # print(self.tree_.value[0][0])
            return self
Example #2
0
max_depth = (np.iinfo(np.int32).max if max_depth is None else max_depth)
max_leaf_nodes = (-1 if max_leaf_nodes is None else max_leaf_nodes)

max_features = max(1, int(np.sqrt(n_features_)))

criterion = CRITERIA_CLF[criterion](n_outputs_, n_classes_)

SPLITTERS = DENSE_SPLITTERS

splitter = SPLITTERS[splitter](criterion, max_features, min_samples_leaf,
                               min_weight_leaf, random_state)

tree_ = Tree(n_features_, n_classes_, n_outputs_)

builder = DepthFirstTreeBuilder(splitter, min_samples_split, min_samples_leaf,
                                min_weight_leaf, max_depth,
                                min_impurity_decrease, min_impurity_split)

builder.build(tree_, X_train, y_train)

classes_ = classes_[0]

n_classes_ = np.atleast_1d(n_classes_)
pruned_tree = Tree(n_features_, n_classes_, n_outputs_)
_build_pruned_tree_ccp(pruned_tree, tree_, 0)
tree_ = pruned_tree

X_test = check_array(X_test, dtype=DTYPE, accept_sparse="csr")
proba = tree_.predict(X_test)
n_samples = X_test.shape[0]
predictions = classes_.take(np.argmax(proba, axis=1), axis=0)
Example #3
0
    def fit(self, X, y, sample_weight=None, check_input=True,
            X_idx_sorted="deprecated"):
        """Build a survival tree from the training set (X, y).

        Parameters
        ----------
        X : array-like, shape = (n_samples, n_features)
            Data matrix

        y : structured array, shape = (n_samples,)
            A structured array containing the binary event indicator
            as first field, and time of event or time of censoring as
            second field.

        check_input : boolean, default: True
            Allow to bypass several input checking.
            Don't use this parameter unless you know what you do.

        X_idx_sorted : deprecated, default="deprecated"
            This parameter is deprecated and has no effect

        Returns
        -------
        self
        """
        random_state = check_random_state(self.random_state)

        if check_input:
            X, event, time = check_arrays_survival(X, y)
            time = time.astype(np.float64)
            self.event_times_ = np.unique(time[event])

            y_numeric = np.empty((X.shape[0], 2), dtype=np.float64)
            y_numeric[:, 0] = time
            y_numeric[:, 1] = event.astype(np.float64)
        else:
            y_numeric, self.event_times_ = y

        n_samples, self.n_features_ = X.shape
        self.n_features_in_ = self.n_features_
        params = self._check_params(n_samples)

        if not isinstance(X_idx_sorted, str) or X_idx_sorted != "deprecated":
            warnings.warn(
                "The parameter 'X_idx_sorted' is deprecated and has no "
                "effect. It will be removed in sklearn 1.1 (renaming of 0.26). "
                "You can suppress this warning by not passing any value to the "
                "'X_idx_sorted' parameter.",
                FutureWarning
            )

        self.n_outputs_ = self.event_times_.shape[0]
        # one "class" for CHF, one for survival function
        self.n_classes_ = np.ones(self.n_outputs_, dtype=np.intp) * 2

        # Build tree
        criterion = LogrankCriterion(self.n_outputs_, n_samples, self.event_times_)

        splitter = self.splitter
        if not isinstance(self.splitter, Splitter):
            splitter = DENSE_SPLITTERS[self.splitter](
                criterion,
                self.max_features_,
                params["min_samples_leaf"],
                params["min_weight_leaf"],
                random_state)

        self.tree_ = Tree(self.n_features_, self.n_classes_, self.n_outputs_)

        # Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
        if params["max_leaf_nodes"] < 0:
            builder = DepthFirstTreeBuilder(splitter,
                                            params["min_samples_split"],
                                            params["min_samples_leaf"],
                                            params["min_weight_leaf"],
                                            params["max_depth"],
                                            0.0,  # min_impurity_decrease
                                            params["min_impurity_split"])
        else:
            builder = BestFirstTreeBuilder(splitter,
                                           params["min_samples_split"],
                                           params["min_samples_leaf"],
                                           params["min_weight_leaf"],
                                           params["max_depth"],
                                           params["max_leaf_nodes"],
                                           0.0,  # min_impurity_decrease
                                           params["min_impurity_split"])

        builder.build(self.tree_, X, y_numeric, sample_weight)

        return self
Example #4
0
    def fit(self,
            X,
            y,
            sample_weight=None,
            check_input=True,
            X_idx_sorted=None):
        """Build a survival tree from the training set (X, y).

        Parameters
        ----------
        X : array-like, shape = (n_samples, n_features)
            Data matrix

        y : structured array, shape = (n_samples,)
            A structured array containing the binary event indicator
            as first field, and time of event or time of censoring as
            second field.

        check_input : boolean, default: True
            Allow to bypass several input checking.
            Don't use this parameter unless you know what you do.

        X_idx_sorted : array-like, shape = (n_samples, n_features), optional
            The indexes of the sorted training input samples. If many tree
            are grown on the same dataset, this allows the ordering to be
            cached between trees. If None, the data will be sorted here.
            Don't use this parameter unless you know what to do.

        Returns
        -------
        self
        """
        random_state = check_random_state(self.random_state)

        if check_input:
            X, event, time = check_arrays_survival(X, y)
            time = time.astype(np.float64)
            self.event_times_ = np.unique(time[event])

            y_numeric = np.empty((X.shape[0], 2), dtype=np.float64)
            y_numeric[:, 0] = time
            y_numeric[:, 1] = event.astype(np.float64)
        else:
            y_numeric, self.event_times_ = y

        n_samples, self.n_features_ = X.shape
        params = self._check_params(n_samples)

        self.n_outputs_ = self.event_times_.shape[0]
        # one "class" for CHF, one for survival function
        self.n_classes_ = np.ones(self.n_outputs_, dtype=np.intp) * 2

        # Build tree
        criterion = LogrankCriterion(self.n_outputs_, n_samples,
                                     self.event_times_)

        splitter = self.splitter
        if not isinstance(self.splitter, Splitter):
            splitter = DENSE_SPLITTERS[self.splitter](
                criterion, self.max_features_, params["min_samples_leaf"],
                params["min_weight_leaf"], random_state)

        self.tree_ = Tree(self.n_features_, self.n_classes_, self.n_outputs_)

        # Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
        if params["max_leaf_nodes"] < 0:
            builder = DepthFirstTreeBuilder(
                splitter,
                params["min_samples_split"],
                params["min_samples_leaf"],
                params["min_weight_leaf"],
                params["max_depth"],
                0.0,  # min_impurity_decrease
                params["min_impurity_split"])
        else:
            builder = BestFirstTreeBuilder(
                splitter,
                params["min_samples_split"],
                params["min_samples_leaf"],
                params["min_weight_leaf"],
                params["max_depth"],
                params["max_leaf_nodes"],
                0.0,  # min_impurity_decrease
                params["min_impurity_split"])

        builder.build(self.tree_, X, y_numeric, sample_weight, X_idx_sorted)

        return self
    def fit(self,
            X,
            y,
            sample_weight=None,
            check_input=True,
            X_idx_sorted=None):
        """Build a newsvendor decision tree regressor from the training set (X, y).

        Method is based on [1] and was adapted to enable usage of the newsvendor criterion

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The training input samples. Internally, it will be converted to
            ``dtype=np.float32`` and if a sparse matrix is provided
            to a sparse ``csc_matrix``.
        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            The target values (real numbers). Use ``dtype=np.float64`` and
            ``order='C'`` for maximum efficiency.
        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights. If None, then samples are equally weighted. Splits
            that would create child nodes with net zero or negative weight are
            ignored while searching for a split in each node.
        check_input : bool, default=True
            Allow to bypass several input checking.
            Don't use this parameter unless you know what you do.
        X_idx_sorted : array-like of shape (n_samples, n_features), \
            default=None
            The indexes of the sorted training input samples. If many tree
            are grown on the same dataset, this allows the ordering to be
            cached between trees. If None, the data will be sorted here.
            Don't use this parameter unless you know what to do.
        Returns
        -------
        self : NewsvendorDecisionTreeRegressor
            Fitted estimator.

        References
        ----------
        [1] scikit-learn, BaseDecisionTree.fit()
            <https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/tree/_classes.py>
        """

        random_state = check_random_state(self.random_state)

        if self.ccp_alpha < 0.0:
            raise ValueError("ccp_alpha must be greater than or equal to 0")

        # Need to validate separately here.
        # We can't pass multi_ouput=True because that would allow y to be
        # csr.
        check_X_params = dict(dtype=DTYPE, accept_sparse="csc")
        check_y_params = dict(ensure_2d=False, dtype=None)

        X, y = self._validate_data(X,
                                   y,
                                   validate_separately=(check_X_params,
                                                        check_y_params))

        if issparse(X):
            X.sort_indices()
            if X.indices.dtype != np.intc or X.indptr.dtype != np.intc:
                raise ValueError("No support for np.int64 index based "
                                 "sparse matrices")

        # Determine output settings
        n_samples, self.n_features_ = X.shape

        y = np.atleast_1d(y)
        expanded_class_weight = None

        if y.ndim == 1:
            # reshape is necessary to preserve the data contiguity against vs
            # [:, np.newaxis] that does not.
            y = np.reshape(y, (-1, 1))

        self.n_outputs_ = y.shape[1]

        if getattr(y, "dtype", None) != DOUBLE or not y.flags.contiguous:
            y = np.ascontiguousarray(y, dtype=DOUBLE)

        # Check parameters
        self.cu_, self.co_ = check_cu_co(self.cu, self.co, self.n_outputs_)

        max_depth = (np.iinfo(np.int32).max
                     if self.max_depth is None else self.max_depth)
        max_leaf_nodes = (-1 if self.max_leaf_nodes is None else
                          self.max_leaf_nodes)

        if isinstance(self.min_samples_leaf, numbers.Integral):
            if not 1 <= self.min_samples_leaf:
                raise ValueError("min_samples_leaf must be at least 1 "
                                 "or in (0, 0.5], got %s" %
                                 self.min_samples_leaf)
            min_samples_leaf = self.min_samples_leaf
        else:  # float
            if not 0. < self.min_samples_leaf <= 0.5:
                raise ValueError("min_samples_leaf must be at least 1 "
                                 "or in (0, 0.5], got %s" %
                                 self.min_samples_leaf)
            min_samples_leaf = int(ceil(self.min_samples_leaf * n_samples))

        if isinstance(self.min_samples_split, numbers.Integral):
            if not 2 <= self.min_samples_split:
                raise ValueError("min_samples_split must be an integer "
                                 "greater than 1 or a float in (0.0, 1.0]; "
                                 "got the integer %s" % self.min_samples_split)
            min_samples_split = self.min_samples_split
        else:  # float
            if not 0. < self.min_samples_split <= 1.:
                raise ValueError("min_samples_split must be an integer "
                                 "greater than 1 or a float in (0.0, 1.0]; "
                                 "got the float %s" % self.min_samples_split)
            min_samples_split = int(ceil(self.min_samples_split * n_samples))
            min_samples_split = max(2, min_samples_split)

        min_samples_split = max(min_samples_split, 2 * min_samples_leaf)

        if isinstance(self.max_features, str):
            if self.max_features == "auto":
                max_features = self.n_features_
            elif self.max_features == "sqrt":
                max_features = max(1, int(np.sqrt(self.n_features_)))
            elif self.max_features == "log2":
                max_features = max(1, int(np.log2(self.n_features_)))
            else:
                raise ValueError("Invalid value for max_features. "
                                 "Allowed string values are 'auto', "
                                 "'sqrt' or 'log2'.")
        elif self.max_features is None:
            max_features = self.n_features_
        elif isinstance(self.max_features, numbers.Integral):
            max_features = self.max_features
        else:  # float
            if self.max_features > 0.0:
                max_features = max(1,
                                   int(self.max_features * self.n_features_))
            else:
                max_features = 0

        self.max_features_ = max_features

        if len(y) != n_samples:
            raise ValueError("Number of labels=%d does not match "
                             "number of samples=%d" % (len(y), n_samples))
        if not 0 <= self.min_weight_fraction_leaf <= 0.5:
            raise ValueError("min_weight_fraction_leaf must in [0, 0.5]")
        if max_depth <= 0:
            raise ValueError("max_depth must be greater than zero. ")
        if not (0 < max_features <= self.n_features_):
            raise ValueError("max_features must be in (0, n_features]")
        if not isinstance(max_leaf_nodes, numbers.Integral):
            raise ValueError("max_leaf_nodes must be integral number but was "
                             "%r" % max_leaf_nodes)
        if -1 < max_leaf_nodes < 2:
            raise ValueError(("max_leaf_nodes {0} must be either None "
                              "or larger than 1").format(max_leaf_nodes))

        if sample_weight is not None:
            sample_weight = _check_sample_weight(sample_weight, X, DOUBLE)

        if expanded_class_weight is not None:
            if sample_weight is not None:
                sample_weight = sample_weight * expanded_class_weight
            else:
                sample_weight = expanded_class_weight

        # Set min_weight_leaf from min_weight_fraction_leaf
        if sample_weight is None:
            min_weight_leaf = (self.min_weight_fraction_leaf * n_samples)
        else:
            min_weight_leaf = (self.min_weight_fraction_leaf *
                               np.sum(sample_weight))

        min_impurity_split = self.min_impurity_split
        if min_impurity_split is not None:
            warnings.warn(
                "The min_impurity_split parameter is deprecated. "
                "Its default value has changed from 1e-7 to 0 in "
                "version 0.23, and it will be removed in 0.25. "
                "Use the min_impurity_decrease parameter instead.",
                FutureWarning)

            if min_impurity_split < 0.:
                raise ValueError("min_impurity_split must be greater than "
                                 "or equal to 0")
        else:
            min_impurity_split = 0

        if self.min_impurity_decrease < 0.:
            raise ValueError("min_impurity_decrease must be greater than "
                             "or equal to 0")

        # Build tree
        criterion = NewsvendorCriterion(self.n_outputs_, n_samples, self.cu_,
                                        self.co_)

        SPLITTERS = SPARSE_SPLITTERS if issparse(X) else DENSE_SPLITTERS

        splitter = self.splitter
        if not isinstance(self.splitter, Splitter):
            splitter = SPLITTERS[self.splitter](criterion, self.max_features_,
                                                min_samples_leaf,
                                                min_weight_leaf, random_state)

        self.tree_ = Tree(
            self.n_features_,
            # TODO: tree should't need this in this case
            np.array([1] * self.n_outputs_, dtype=np.intp),
            self.n_outputs_)

        # Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
        if max_leaf_nodes < 0:
            builder = DepthFirstTreeBuilder(splitter, min_samples_split,
                                            min_samples_leaf, min_weight_leaf,
                                            max_depth,
                                            self.min_impurity_decrease,
                                            min_impurity_split)
        else:
            builder = BestFirstTreeBuilder(splitter, min_samples_split,
                                           min_samples_leaf, min_weight_leaf,
                                           max_depth, max_leaf_nodes,
                                           self.min_impurity_decrease,
                                           min_impurity_split)

        builder.build(self.tree_, X, y, sample_weight, X_idx_sorted=None)

        self._prune_tree()

        return self