Example #1
0
def test_warm_start_smaller_n_estimators():
    # Test if warm start'ed second fit with smaller n_estimators raises error.
    X, y = make_hastie_10_2(n_samples=20, random_state=1)
    clf = BaggingClassifier(n_estimators=5, warm_start=True)
    clf.fit(X, y)
    clf.set_params(n_estimators=4)
    assert_raises(ValueError, clf.fit, X, y)
class RFsLDA(BaseEstimator, ClassifierMixin):
    def __init__(self, n_sLDA_estimators=200, n_estimators_per_RF=20):
        self.n_sLDA_estimators_ = n_sLDA_estimators
        self.n_estimators_per_RF_ = n_estimators_per_RF
        self.estimator_ = BaggingClassifier(
            base_estimator=IntWeightedShrinkageLDA(),
            n_estimators=n_sLDA_estimators,
            warm_start=True)

    def fit(self, X, y):
        Xc = self.normalise_(X)

        self.estimator_.fit(Xc, y)

        self.estimator_.set_params(
            base_estimator=RandomForestClassifier(
                n_estimators=self.n_estimators_per_RF_, ),
            bootstrap=False,
        )

        for k in range(self.n_sLDA_estimators_):
            self.estimator_.set_params(n_estimators=self.n_sLDA_estimators_ +
                                       1 + k)
            wrong = self.estimator_.estimators_[k] != y
            self.estimator_.fit(Xc[wrong, :], y[wrong])

    def predict(self, X):
        return self.estimator_.predict(self.normalise_(X))

    def normalise_(self, X):
        return (X - np.median(X, axis=1, keepdims=True)) / \
            np.median(np.abs(X - np.median(X, axis=1, keepdims=True)),
                      axis=1,
                      keepdims=True)
def test_warm_start_smaller_n_estimators():
    # Test if warm start'ed second fit with smaller n_estimators raises error.
    X, y = make_hastie_10_2(n_samples=20, random_state=1)
    clf = BaggingClassifier(n_estimators=5, warm_start=True)
    clf.fit(X, y)
    clf.set_params(n_estimators=4)
    assert_raises(ValueError, clf.fit, X, y)
Example #4
0
class Bagging(object):
    def __init__(self, dataset_x, dataset_y):
        self.dataset_x = dataset_x
        self.dataset_y = dataset_y

        self.clf = BaggingClassifier()
        self.best_parameter = {}

    def startBagging(self):
        print("------------------ Bagging Classifier -------------------")
        # self.findBestParameters()
        # self.gridSearch()
        self.randomSearch()


    def findBestParameters(self):
        """
        Try different parameters for finding the best score
        :return:
        """
        self.clf = BaggingClassifier()
        scores = cross_val_score(self.clf, self.dataset_x, self.dataset_y, cv=10, scoring="accuracy")
        print(scores)
        print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))


    def test(self):
        """
        Test the model with best parameters found in randomSearch() or gridSearch()
        :return:
        """
        # self.clf = BaggingClassifier(base_estimator=DecisionTreeClassifier(), n_estimators=30, bootstrap=False, max_samples=0.9)
        self.clf = BaggingClassifier()
        self.clf.set_params(**self.best_parameter)
        print("*** Test Result for Bagging ***")
        ModelEvaluation.evaluateModelWithCV(self.clf, self.dataset_x, self.dataset_y, cv=10)

    def randomSearch(self):
        tuned_parameters = {'base_estimator': [DecisionTreeClassifier(), SVC(), LogisticRegression(), KNeighborsClassifier(), MultinomialNB()],
                            'n_estimators': [5, 10, 15, 20, 30, 50],
                            'max_samples': [0.5, 0.7, 0.9],
                            'bootstrap': [True, False]
                            }
        self.best_parameter = SearchParameters.randomSearch(classifier=self.clf, parameters=tuned_parameters, cv=10, n_iter=30, train_x=self.dataset_x, train_y=self.dataset_y)


    def gridSearch(self):
        tuned_parameters = {
            'base_estimator': [DecisionTreeClassifier(), SVC(), LogisticRegression(), KNeighborsClassifier(),
                               MultinomialNB()],
            'n_estimators': [5, 10, 15, 20],
            'max_samples': [0.3, 0.5, 0.7, 0.9],
            'max_features': [0.5, 1.0],
            'bootstrap': [True, False]
            }
        self.best_parameter = SearchParameters.gridSearch(classifier=self.clf, parameters=tuned_parameters, cv=10, train_x=self.dataset_x, train_y=self.dataset_y)
def test_oob_score_removed_on_warm_start():
    X, y = make_hastie_10_2(n_samples=2000, random_state=1)

    clf = BaggingClassifier(n_estimators=50, oob_score=True)
    clf.fit(X, y)

    clf.set_params(warm_start=True, oob_score=False, n_estimators=100)
    clf.fit(X, y)

    assert_raises(AttributeError, getattr, clf, "oob_score_")
Example #6
0
def test_oob_score_removed_on_warm_start():
    X, y = make_hastie_10_2(n_samples=2000, random_state=1)

    clf = BaggingClassifier(n_estimators=50, oob_score=True)
    clf.fit(X, y)

    clf.set_params(warm_start=True, oob_score=False, n_estimators=100)
    clf.fit(X, y)

    assert_raises(AttributeError, getattr, clf, "oob_score_")
Example #7
0
def test_parallel_classification():
    # Check parallel classification.
    rng = check_random_state(0)

    # Classification
    X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng)

    ensemble = BaggingClassifier(DecisionTreeClassifier(), n_jobs=3, random_state=0).fit(X_train, y_train)

    # predict_proba
    ensemble.set_params(n_jobs=1)
    y1 = ensemble.predict_proba(X_test)
    ensemble.set_params(n_jobs=2)
    y2 = ensemble.predict_proba(X_test)
    assert_array_almost_equal(y1, y2)

    ensemble = BaggingClassifier(DecisionTreeClassifier(), n_jobs=1, random_state=0).fit(X_train, y_train)

    y3 = ensemble.predict_proba(X_test)
    assert_array_almost_equal(y1, y3)

    # decision_function
    ensemble = BaggingClassifier(SVC(decision_function_shape="ovr"), n_jobs=3, random_state=0).fit(X_train, y_train)

    ensemble.set_params(n_jobs=1)
    decisions1 = ensemble.decision_function(X_test)
    ensemble.set_params(n_jobs=2)
    decisions2 = ensemble.decision_function(X_test)
    assert_array_almost_equal(decisions1, decisions2)

    ensemble = BaggingClassifier(SVC(decision_function_shape="ovr"), n_jobs=1, random_state=0).fit(X_train, y_train)

    decisions3 = ensemble.decision_function(X_test)
    assert_array_almost_equal(decisions1, decisions3)
Example #8
0
def test_warm_start_equivalence():
    # warm started classifier with 5+5 estimators should be equivalent to
    # one classifier with 10 estimators
    X, y = make_hastie_10_2(n_samples=20, random_state=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43)

    clf_ws = BaggingClassifier(n_estimators=5, warm_start=True, random_state=3141)
    clf_ws.fit(X_train, y_train)
    clf_ws.set_params(n_estimators=10)
    clf_ws.fit(X_train, y_train)
    y1 = clf_ws.predict(X_test)

    clf = BaggingClassifier(n_estimators=10, warm_start=False, random_state=3141)
    clf.fit(X_train, y_train)
    y2 = clf.predict(X_test)

    assert_array_almost_equal(y1, y2)
Example #9
0
def test_warm_start(random_state=42):
    # Test if fitting incrementally with warm start gives a forest of the
    # right size and the same results as a normal fit.
    X, y = make_hastie_10_2(n_samples=20, random_state=1)

    clf_ws = None
    for n_estimators in [5, 10]:
        if clf_ws is None:
            clf_ws = BaggingClassifier(n_estimators=n_estimators, random_state=random_state, warm_start=True)
        else:
            clf_ws.set_params(n_estimators=n_estimators)
        clf_ws.fit(X, y)
        assert_equal(len(clf_ws), n_estimators)

    clf_no_ws = BaggingClassifier(n_estimators=10, random_state=random_state, warm_start=False)
    clf_no_ws.fit(X, y)

    assert_equal(set([tree.random_state for tree in clf_ws]), set([tree.random_state for tree in clf_no_ws]))
Example #10
0
def test_parallel_classification():
    # Check parallel classification.
    rng = check_random_state(0)

    # Classification
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)

    ensemble = BaggingClassifier(DecisionTreeClassifier(),
                                 n_jobs=3,
                                 random_state=0).fit(X_train, y_train)

    # predict_proba
    ensemble.set_params(n_jobs=1)
    y1 = ensemble.predict_proba(X_test)
    ensemble.set_params(n_jobs=2)
    y2 = ensemble.predict_proba(X_test)
    assert_array_almost_equal(y1, y2)

    ensemble = BaggingClassifier(DecisionTreeClassifier(),
                                 n_jobs=1,
                                 random_state=0).fit(X_train, y_train)

    y3 = ensemble.predict_proba(X_test)
    assert_array_almost_equal(y1, y3)

    # decision_function
    ensemble = BaggingClassifier(SVC(gamma='scale',
                                     decision_function_shape='ovr'),
                                 n_jobs=3,
                                 random_state=0).fit(X_train, y_train)

    ensemble.set_params(n_jobs=1)
    decisions1 = ensemble.decision_function(X_test)
    ensemble.set_params(n_jobs=2)
    decisions2 = ensemble.decision_function(X_test)
    assert_array_almost_equal(decisions1, decisions2)

    X_err = np.hstack((X_test, np.zeros((X_test.shape[0], 1))))
    assert_raise_message(
        ValueError, "Number of features of the model "
        "must match the input. Model n_features is {0} "
        "and input n_features is {1} "
        "".format(X_test.shape[1], X_err.shape[1]), ensemble.decision_function,
        X_err)

    ensemble = BaggingClassifier(SVC(gamma='scale',
                                     decision_function_shape='ovr'),
                                 n_jobs=1,
                                 random_state=0).fit(X_train, y_train)

    decisions3 = ensemble.decision_function(X_test)
    assert_array_almost_equal(decisions1, decisions3)
def test_warm_start_equivalence():
    # warm started classifier with 5+5 estimators should be equivalent to
    # one classifier with 10 estimators
    X, y = make_hastie_10_2(n_samples=20, random_state=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43)

    clf_ws = BaggingClassifier(n_estimators=5, warm_start=True,
                               random_state=3141)
    clf_ws.fit(X_train, y_train)
    clf_ws.set_params(n_estimators=10)
    clf_ws.fit(X_train, y_train)
    y1 = clf_ws.predict(X_test)

    clf = BaggingClassifier(n_estimators=10, warm_start=False,
                            random_state=3141)
    clf.fit(X_train, y_train)
    y2 = clf.predict(X_test)

    assert_array_almost_equal(y1, y2)
Example #12
0
def test_parallel_classification():
    # Check parallel classification.
    rng = check_random_state(0)

    # Classification
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)

    ensemble = BaggingClassifier(DecisionTreeClassifier(),
                                 n_jobs=3,
                                 random_state=0).fit(X_train, y_train)

    # predict_proba
    ensemble.set_params(n_jobs=1)
    y1 = ensemble.predict_proba(X_test)
    ensemble.set_params(n_jobs=2)
    y2 = ensemble.predict_proba(X_test)
    assert_array_almost_equal(y1, y2)

    ensemble = BaggingClassifier(DecisionTreeClassifier(),
                                 n_jobs=1,
                                 random_state=0).fit(X_train, y_train)

    y3 = ensemble.predict_proba(X_test)
    assert_array_almost_equal(y1, y3)

    # decision_function
    ensemble = BaggingClassifier(SVC(gamma='scale',
                                     decision_function_shape='ovr'),
                                 n_jobs=3,
                                 random_state=0).fit(X_train, y_train)

    ensemble.set_params(n_jobs=1)
    decisions1 = ensemble.decision_function(X_test)
    ensemble.set_params(n_jobs=2)
    decisions2 = ensemble.decision_function(X_test)
    assert_array_almost_equal(decisions1, decisions2)

    X_err = np.hstack((X_test, np.zeros((X_test.shape[0], 1))))
    assert_raise_message(ValueError, "Number of features of the model "
                         "must match the input. Model n_features is {0} "
                         "and input n_features is {1} "
                         "".format(X_test.shape[1], X_err.shape[1]),
                         ensemble.decision_function, X_err)

    ensemble = BaggingClassifier(SVC(gamma='scale',
                                     decision_function_shape='ovr'),
                                 n_jobs=1,
                                 random_state=0).fit(X_train, y_train)

    decisions3 = ensemble.decision_function(X_test)
    assert_array_almost_equal(decisions1, decisions3)
Example #13
0
def test_parallel_classification():
    # Check parallel classification.
    rng = check_random_state(0)

    # Classification
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)

    ensemble = BaggingClassifier(DecisionTreeClassifier(),
                                 n_jobs=3,
                                 random_state=0).fit(X_train, y_train)

    # predict_proba
    ensemble.set_params(n_jobs=1)
    y1 = ensemble.predict_proba(X_test)
    ensemble.set_params(n_jobs=2)
    y2 = ensemble.predict_proba(X_test)
    assert_array_almost_equal(y1, y2)

    ensemble = BaggingClassifier(DecisionTreeClassifier(),
                                 n_jobs=1,
                                 random_state=0).fit(X_train, y_train)

    y3 = ensemble.predict_proba(X_test)
    assert_array_almost_equal(y1, y3)

    # decision_function
    ensemble = BaggingClassifier(SVC(decision_function_shape='ovr'),
                                 n_jobs=3,
                                 random_state=0).fit(X_train, y_train)

    ensemble.set_params(n_jobs=1)
    decisions1 = ensemble.decision_function(X_test)
    ensemble.set_params(n_jobs=2)
    decisions2 = ensemble.decision_function(X_test)
    assert_array_almost_equal(decisions1, decisions2)

    X_err = np.hstack((X_test, np.zeros((X_test.shape[0], 1))))
    err_msg = (f"Number of features of the model must match the input. Model "
               f"n_features is {X_test.shape[1]} and input n_features is "
               f"{X_err.shape[1]} ")
    with pytest.raises(ValueError, match=err_msg):
        ensemble.decision_function(X_err)

    ensemble = BaggingClassifier(SVC(decision_function_shape='ovr'),
                                 n_jobs=1,
                                 random_state=0).fit(X_train, y_train)

    decisions3 = ensemble.decision_function(X_test)
    assert_array_almost_equal(decisions1, decisions3)
def test_warm_start(random_state=42):
    # Test if fitting incrementally with warm start gives a forest of the
    # right size and the same results as a normal fit.
    X, y = make_hastie_10_2(n_samples=20, random_state=1)

    clf_ws = None
    for n_estimators in [5, 10]:
        if clf_ws is None:
            clf_ws = BaggingClassifier(n_estimators=n_estimators,
                                       random_state=random_state,
                                       warm_start=True)
        else:
            clf_ws.set_params(n_estimators=n_estimators)
        clf_ws.fit(X, y)
        assert len(clf_ws) == n_estimators

    clf_no_ws = BaggingClassifier(n_estimators=10, random_state=random_state,
                                  warm_start=False)
    clf_no_ws.fit(X, y)

    assert (set([tree.random_state for tree in clf_ws]) ==
                 set([tree.random_state for tree in clf_no_ws]))
def test_parallel_classification():
    # Check parallel classification.
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=0)

    ensemble = BaggingClassifier(DecisionTreeClassifier(),
                                 n_jobs=3,
                                 random_state=0).fit(X_train, y_train)

    # predict_proba
    y1 = ensemble.predict_proba(X_test)
    ensemble.set_params(n_jobs=1)
    y2 = ensemble.predict_proba(X_test)
    assert_array_almost_equal(y1, y2)

    ensemble = BaggingClassifier(DecisionTreeClassifier(),
                                 n_jobs=1,
                                 random_state=0).fit(X_train, y_train)

    y3 = ensemble.predict_proba(X_test)
    assert_array_almost_equal(y1, y3)

    # decision_function
    ensemble = BaggingClassifier(SVC(decision_function_shape="ovr"),
                                 n_jobs=3,
                                 random_state=0).fit(X_train, y_train)

    decisions1 = ensemble.decision_function(X_test)
    ensemble.set_params(n_jobs=1)
    decisions2 = ensemble.decision_function(X_test)
    assert_array_almost_equal(decisions1, decisions2)

    ensemble = BaggingClassifier(SVC(decision_function_shape="ovr"),
                                 n_jobs=1,
                                 random_state=0).fit(X_train, y_train)

    decisions3 = ensemble.decision_function(X_test)
    assert_array_almost_equal(decisions1, decisions3)
Example #16
0
class BaggedDecisionTreeClassifier():
    def __init__(self,
                 n_estimators=20,
                 bootstrap=True,
                 bootstrap_features=False,
                 oob_score=False,
                 max_depth=None,
                 min_samples_leaf=20,
                 warm_start=False,
                 n_jobs=None,
                 early_stopping='auto',
                 verbose=0,
                 random_state=None):
        self.tree = DecisionTreeClassifier(max_depth=max_depth,
                                           min_samples_leaf=min_samples_leaf)
        self.BagDT = BaggingClassifier(base_estimator=self.tree,
                                       n_estimators=n_estimators,
                                       bootstrap=bootstrap,
                                       bootstrap_features=bootstrap_features,
                                       oob_score=oob_score,
                                       warm_start=warm_start,
                                       n_jobs=n_jobs,
                                       random_state=random_state,
                                       verbose=verbose)

    def decision_function(self, X):
        return self.BagDT.decision_function(X)

    def fit(self, X, y, sample_weight=None):
        self.BagDT.fit(X, y, sample_weight=sample_weight)
        return self.BagDT

    def get_params(self, deep=True):
        return self.BagDT.get_params(deep=deep)

    def predict(self, X):
        return self.BagDT.predict(X)

    def predict_log_proba(self, X):
        return self.BagDT.predict_log_proba(X)

    def predict_proba(self, X):
        return self.BagDT.predict_proba(X)

    def score(self, X, y, sample_weight=None):
        return self.BagDT.score(X, y, sample_weight=sample_weight)

    def set_params(self, **params):
        return self.BagDT.set_params(**params)
Example #17
0
def test_parallel():
    """Check parallel computations."""
    rng = check_random_state(0)

    # Classification
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)

    for n_jobs in [-1, 3]:
        ensemble = BaggingClassifier(DecisionTreeClassifier(),
                                     n_jobs=n_jobs,
                                     random_state=0).fit(X_train, y_train)

        # predict_proba
        ensemble.set_params(n_jobs=1)
        y1 = ensemble.predict_proba(X_test)
        ensemble.set_params(n_jobs=2)
        y2 = ensemble.predict_proba(X_test)
        assert_array_almost_equal(y1, y2)

        ensemble = BaggingClassifier(DecisionTreeClassifier(),
                                     n_jobs=1,
                                     random_state=0).fit(X_train, y_train)

        y3 = ensemble.predict_proba(X_test)
        assert_array_almost_equal(y1, y3)

        # decision_function
        ensemble = BaggingClassifier(SVC(), n_jobs=n_jobs,
                                     random_state=0).fit(X_train, y_train)

        ensemble.set_params(n_jobs=1)
        decisions1 = ensemble.decision_function(X_test)
        ensemble.set_params(n_jobs=2)
        decisions2 = ensemble.decision_function(X_test)
        assert_array_almost_equal(decisions1, decisions2)

        ensemble = BaggingClassifier(SVC(), n_jobs=1,
                                     random_state=0).fit(X_train, y_train)

        decisions3 = ensemble.decision_function(X_test)
        assert_array_almost_equal(decisions1, decisions3)

    # Regression
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    for n_jobs in [-1, 3]:
        ensemble = BaggingRegressor(DecisionTreeRegressor(),
                                    n_jobs=3,
                                    random_state=0).fit(X_train, y_train)

        ensemble.set_params(n_jobs=1)
        y1 = ensemble.predict(X_test)
        ensemble.set_params(n_jobs=2)
        y2 = ensemble.predict(X_test)
        assert_array_almost_equal(y1, y2)

        ensemble = BaggingRegressor(DecisionTreeRegressor(),
                                    n_jobs=1,
                                    random_state=0).fit(X_train, y_train)

        y3 = ensemble.predict(X_test)
        assert_array_almost_equal(y1, y3)
Example #18
0
class Bagging(Classifier):
    r"""Implementation of bagging classifier.
    
    Date:
        2020

    Author:
        Luka Pečnik

    License:
        MIT
    
    Reference:
        L. Breiman, “Bagging predictors”, Machine Learning, 24(2), 123-140, 1996.
    
    Documentation:
        https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.BaggingClassifier.html

    See Also:
        * :class:`niaaml.classifiers.Classifier`
    """
    Name = 'Bagging'

    def __init__(self, **kwargs):
        r"""Initialize Bagging instance.
        """
        warnings.filterwarnings(action='ignore',
                                category=ChangedBehaviorWarning)
        warnings.filterwarnings(action='ignore', category=ConvergenceWarning)
        warnings.filterwarnings(action='ignore',
                                category=DataConversionWarning)
        warnings.filterwarnings(action='ignore',
                                category=DataDimensionalityWarning)
        warnings.filterwarnings(action='ignore', category=EfficiencyWarning)
        warnings.filterwarnings(action='ignore', category=FitFailedWarning)
        warnings.filterwarnings(action='ignore', category=NonBLASDotWarning)
        warnings.filterwarnings(action='ignore',
                                category=UndefinedMetricWarning)

        self._params = dict(
            n_estimators=ParameterDefinition(MinMax(min=10, max=111), np.uint),
            bootstrap=ParameterDefinition([True, False]),
            bootstrap_features=ParameterDefinition([True, False]))
        self.__bagging_classifier = BaggingClassifier()

    def set_parameters(self, **kwargs):
        r"""Set the parameters/arguments of the algorithm.
        """
        self.__bagging_classifier.set_params(**kwargs)

    def fit(self, x, y, **kwargs):
        r"""Fit Bagging.

        Arguments:
            x (pandas.core.frame.DataFrame): n samples to classify.
            y (pandas.core.series.Series): n classes of the samples in the x array.

        Returns:
            None
        """
        self.__bagging_classifier.fit(x, y)

    def predict(self, x, **kwargs):
        r"""Predict class for each sample (row) in x.

        Arguments:
            x (pandas.core.frame.DataFrame): n samples to classify.

        Returns:
            pandas.core.series.Series: n predicted classes.
        """
        return self.__bagging_classifier.predict(x)

    def to_string(self):
        r"""User friendly representation of the object.

        Returns:
            str: User friendly representation of the object.
        """
        return Classifier.to_string(self).format(
            name=self.Name,
            args=self._parameters_to_string(
                self.__bagging_classifier.get_params()))
Example #19
0
class HistRandomForestClassifier():
    def __init__(self,
                 loss='auto',
                 max_leaf_nodes=31,
                 max_depth=None,
                 min_samples_leaf=20,
                 l2_regularization=0,
                 max_bins=255,
                 n_estimators=20,
                 max_samples=1.0,
                 bootstrap=True,
                 bootstrap_features=False,
                 oob_score=False,
                 categorical_features=None,
                 monotonic_cst=None,
                 warm_start=False,
                 n_jobs=None,
                 early_stopping='auto',
                 scoring='loss',
                 validation_fraction=0.1,
                 n_iter_no_change=10,
                 tol=1e-7,
                 verbose=0,
                 random_state=None):
        self.loss = loss
        self.max_leaf_nodes = max_leaf_nodes
        self.max_depth = max_depth
        self.min_samples_leaf = min_samples_leaf
        self.l2_regularization = l2_regularization
        self.max_bins = max_bins
        self.n_estimators = n_estimators
        self.max_samples = max_samples
        self.bootstrap = bootstrap
        self.bootstrap_features = bootstrap_features
        self.oob_score = oob_score
        self.categorical_features = categorical_features
        self.monotonic_cst = monotonic_cst
        self.warm_start = warm_start
        self.n_jobs = n_jobs
        self.early_stopping = early_stopping
        self.scoring = scoring
        self.validation_fraction = validation_fraction
        self.n_iter_no_change = n_iter_no_change
        self.tol = tol
        self.verbose = verbose
        self.random_state = random_state

        self.tree = HistGradientBoostingClassifier(
            loss=loss,
            learning_rate=1,
            max_iter=1,
            max_leaf_nodes=max_leaf_nodes,
            max_depth=max_depth,
            min_samples_leaf=min_samples_leaf,
            l2_regularization=l2_regularization,
            max_bins=max_bins,
            categorical_features=categorical_features,
            monotonic_cst=monotonic_cst,
            early_stopping=early_stopping,
            scoring=scoring,
            validation_fraction=validation_fraction,
            n_iter_no_change=n_iter_no_change,
            tol=tol,
            verbose=verbose,
            random_state=random_state)
        self.HistRF = BaggingClassifier(base_estimator=self.tree,
                                        n_estimators=n_estimators,
                                        bootstrap=bootstrap,
                                        bootstrap_features=bootstrap_features,
                                        oob_score=oob_score,
                                        warm_start=warm_start,
                                        n_jobs=n_jobs,
                                        random_state=random_state,
                                        verbose=verbose)

    def decision_function(self, X):
        return self.HistRF.decision_function(X)

    def fit(self, X, y, sample_weight=None):
        self.HistRF.fit(X, y, sample_weight=sample_weight)
        return self.HistRF

    def get_params(self, deep=True):
        return self.HistRF.get_params(deep=deep)

    def predict(self, X):
        return self.HistRF.predict(X)

    def predict_log_proba(self, X):
        return self.HistRF.predict_log_proba(X)

    def predict_proba(self, X):
        return self.HistRF.predict_proba(X)

    def score(self, X, y, sample_weight=None):
        return self.HistRF.score(X, y, sample_weight=sample_weight)

    def set_params(self, **params):
        return self.HistRF.set_params(**params)
Example #20
0
def test_parallel():
    """Check parallel computations."""
    rng = check_random_state(0)

    # Classification
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)

    for n_jobs in [-1, 3]:
        ensemble = BaggingClassifier(DecisionTreeClassifier(),
                                     n_jobs=n_jobs,
                                     random_state=0).fit(X_train, y_train)

        # predict_proba
        ensemble.set_params(n_jobs=1)
        y1 = ensemble.predict_proba(X_test)
        ensemble.set_params(n_jobs=2)
        y2 = ensemble.predict_proba(X_test)
        assert_array_almost_equal(y1, y2)

        ensemble = BaggingClassifier(DecisionTreeClassifier(),
                                     n_jobs=1,
                                     random_state=0).fit(X_train, y_train)

        y3 = ensemble.predict_proba(X_test)
        assert_array_almost_equal(y1, y3)

        # decision_function
        ensemble = BaggingClassifier(SVC(),
                                     n_jobs=n_jobs,
                                     random_state=0).fit(X_train, y_train)

        ensemble.set_params(n_jobs=1)
        decisions1 = ensemble.decision_function(X_test)
        ensemble.set_params(n_jobs=2)
        decisions2 = ensemble.decision_function(X_test)
        assert_array_almost_equal(decisions1, decisions2)

        ensemble = BaggingClassifier(SVC(),
                                     n_jobs=1,
                                     random_state=0).fit(X_train, y_train)

        decisions3 = ensemble.decision_function(X_test)
        assert_array_almost_equal(decisions1, decisions3)

    # Regression
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    for n_jobs in [-1, 3]:
        ensemble = BaggingRegressor(DecisionTreeRegressor(),
                                    n_jobs=3,
                                    random_state=0).fit(X_train, y_train)

        ensemble.set_params(n_jobs=1)
        y1 = ensemble.predict(X_test)
        ensemble.set_params(n_jobs=2)
        y2 = ensemble.predict(X_test)
        assert_array_almost_equal(y1, y2)

        ensemble = BaggingRegressor(DecisionTreeRegressor(),
                                    n_jobs=1,
                                    random_state=0).fit(X_train, y_train)

        y3 = ensemble.predict(X_test)
        assert_array_almost_equal(y1, y3)