Exemple #1
0
 def test_mtry(self, iris_X, iris_y, mtry):
     forest = GRFForestClassifier(mtry=mtry)
     forest.fit(iris_X, iris_y)
     if mtry is not None:
         assert forest.mtry_ == mtry
     else:
         assert forest.mtry_ == 4
Exemple #2
0
 def test_with_X_nan(self, iris_X, iris_y):
     iris_X_nan = iris_X.copy()
     index = np.random.choice(iris_X_nan.size, 100, replace=False)
     iris_X_nan.ravel()[index] = np.nan
     assert np.sum(np.isnan(iris_X_nan)) == 100
     forest = GRFForestClassifier()
     forest.fit(iris_X_nan, iris_y)
     pred = forest.predict(iris_X_nan)
     assert len(pred) == iris_X_nan.shape[0]
Exemple #3
0
 def test_fit(self, iris_X, iris_y):
     forest = GRFForestClassifier()
     with pytest.raises(NotFittedError):
         check_is_fitted(forest)
     forest.fit(iris_X, iris_y)
     check_is_fitted(forest)
     assert hasattr(forest, "grf_forest_")
     assert hasattr(forest, "mtry_")
     assert forest.criterion == "gini"
Exemple #4
0
    def test_check_estimator(self):
        # using honesty here means that the test
        # `check_classifiers_predictions` will fail, because
        # the test dataset is very small. the failure occurs
        # when comparing y == y_pred on binary classification
        check_estimator(GRFForestClassifier(honesty=False))

        with pytest.raises(AssertionError) as exc:
            check_estimator(GRFForestClassifier(honesty=True))
            assert "Arrays are not equal" in exc
Exemple #5
0
def test_shap_classifier(iris_X, iris_y):
    from shap import TreeExplainer

    forest = GRFForestClassifier(enable_tree_details=True)
    forest.fit(iris_X, iris_y)

    with shap_patch():
        explainer = TreeExplainer(model=forest, data=iris_X)
    shap_values = explainer.shap_values(iris_X, check_additivity=False)
    print(shap_values)
Exemple #6
0
 def test_serialize(self, iris_X, iris_y):
     forest = GRFForestClassifier()
     # not fitted
     tf = tempfile.TemporaryFile()
     pickle.dump(forest, tf)
     tf.seek(0)
     forest = pickle.load(tf)
     forest.fit(iris_X, iris_y)
     # fitted
     tf = tempfile.TemporaryFile()
     pickle.dump(forest, tf)
     tf.seek(0)
     new_forest = pickle.load(tf)
     pred = new_forest.predict(iris_X)
     assert len(pred) == iris_X.shape[0]
Exemple #7
0
 def test_estimators_(self, iris_X, iris_y):
     forest = GRFForestClassifier(n_estimators=10)
     with pytest.raises(AttributeError):
         _ = forest.estimators_
     forest.fit(iris_X, iris_y)
     with pytest.raises(ValueError):
         _ = forest.estimators_
     forest = GRFForestClassifier(n_estimators=10, enable_tree_details=True)
     forest.fit(iris_X, iris_y)
     estimators = forest.estimators_
     assert len(estimators) == 10
     assert isinstance(estimators[0], GRFTreeClassifier)
     check_is_fitted(estimators[0])
Exemple #8
0
    def test_accuracy(self, iris_X, iris_y):
        X_train, X_test, y_train, y_test = train_test_split(iris_X,
                                                            iris_y,
                                                            test_size=0.33,
                                                            random_state=42)

        # train and test a random forest classifier
        rf = RandomForestClassifier()
        rf.fit(X_train, y_train)
        y_pred_rf = rf.predict(X_test)
        rf_acc = accuracy_score(y_test, y_pred_rf)

        # train and test a ranger classifier
        ra = GRFForestClassifier()
        ra.fit(X_train, y_train)
        y_pred_ra = ra.predict(X_test)
        ranger_acc = accuracy_score(y_test, y_pred_ra)

        # the accuracy should be good
        assert rf_acc > 0.9
        assert ranger_acc > 0.9
Exemple #9
0
 def test_alpha(self, iris_X, iris_y, alpha):
     forest = GRFForestClassifier(alpha=alpha)
     if alpha <= 0 or alpha >= 0.25:
         with pytest.raises(ValueError):
             forest.fit(iris_X, iris_y)
     else:
         forest.fit(iris_X, iris_y)
Exemple #10
0
 def test_honesty_fraction(self, iris_X, iris_y, honesty_fraction):
     forest = GRFForestClassifier(honesty=True,
                                  honesty_fraction=honesty_fraction,
                                  honesty_prune_leaves=True)
     if honesty_fraction <= 0 or honesty_fraction >= 1:
         with pytest.raises(RuntimeError):
             forest.fit(iris_X, iris_y)
     else:
         forest.fit(iris_X, iris_y)
Exemple #11
0
    def test_equalize_cluster_weights(self, iris_X, iris_y, iris_cluster,
                                      equalize_cluster_weights):
        forest = GRFForestClassifier(
            equalize_cluster_weights=equalize_cluster_weights)
        forest.fit(iris_X, iris_y, cluster=iris_cluster)
        if equalize_cluster_weights:
            assert forest.samples_per_cluster_ == 20
        else:
            assert forest.samples_per_cluster_ == iris_y.shape[0] - 20

        if equalize_cluster_weights:
            with pytest.raises(ValueError):
                forest.fit(iris_X,
                           iris_y,
                           cluster=iris_cluster,
                           sample_weight=iris_y)

        forest.fit(iris_X, iris_y, cluster=None)
        assert forest.samples_per_cluster_ == 0
Exemple #12
0
 def test_get_kernel_weights(self, iris_X, iris_y):
     X_train, X_test, y_train, y_test = train_test_split(iris_X,
                                                         iris_y,
                                                         test_size=0.33,
                                                         random_state=42)
     forest = GRFForestClassifier()
     forest.fit(X_train, y_train)
     weights = forest.get_kernel_weights(X_test)
     assert weights.shape[0] == X_test.shape[0]
     assert weights.shape[1] == X_train.shape[0]
     oob_weights = forest.get_kernel_weights(X_train, True)
     assert oob_weights.shape[0] == X_train.shape[0]
     assert oob_weights.shape[1] == X_train.shape[0]
Exemple #13
0
 def test_get_split_frequencies(self, iris_X, iris_y):
     forest = GRFForestClassifier()
     forest.fit(iris_X, iris_y)
     sf = forest.get_split_frequencies()
     assert sf.shape[1] == iris_X.shape[1]
Exemple #14
0
 def test_clone(self, iris_X, iris_y):
     forest = GRFForestClassifier()
     forest.fit(iris_X, iris_y)
     clone(forest)
Exemple #15
0
    def test_sample_fraction(self, iris_X, iris_y,
                             sample_fraction):  # and ci_group_size
        forest = GRFForestClassifier(sample_fraction=sample_fraction,
                                     ci_group_size=1)
        if sample_fraction <= 0 or sample_fraction > 1:
            with pytest.raises(ValueError):
                forest.fit(iris_X, iris_y)
        else:
            forest.fit(iris_X, iris_y)

        forest = GRFForestClassifier(sample_fraction=sample_fraction,
                                     ci_group_size=2)
        if sample_fraction <= 0 or sample_fraction > 0.5:
            with pytest.raises(ValueError):
                forest.fit(iris_X, iris_y)
        else:
            forest.fit(iris_X, iris_y)
Exemple #16
0
 def test_predict_log_proba(self, iris_X, iris_y):
     forest = GRFForestClassifier()
     forest.fit(iris_X, iris_y)
     pred_log_proba = forest.predict_log_proba(iris_X)
     assert pred_log_proba.shape == (iris_X.shape[0], forest.n_classes_)
Exemple #17
0
 def test_predict_oob(self, iris_X, iris_y):
     forest = GRFForestClassifier()
     forest.fit(iris_X, iris_y, compute_oob_predictions=True)
     pred = np.atleast_1d(
         np.squeeze(np.array(forest.grf_forest_["predictions"])))
     assert len(pred) == iris_X.shape[0]
Exemple #18
0
 def test_honesty(self, iris_X, iris_y, honesty):
     forest = GRFForestClassifier(honesty=honesty)
     forest.fit(iris_X, iris_y)
Exemple #19
0
 def test_predict(self, iris_X, iris_y):
     forest = GRFForestClassifier()
     forest.fit(iris_X, iris_y)
     pred = forest.predict(iris_X)
     assert len(pred) == iris_X.shape[0]
Exemple #20
0
 def test_honesty_prune_leaves(self, iris_X, iris_y, honesty_prune_leaves):
     forest = GRFForestClassifier(honesty=True,
                                  honesty_prune_leaves=honesty_prune_leaves)
     forest.fit(iris_X, iris_y)
Exemple #21
0
 def test_init(self):
     _ = GRFForestClassifier()
Exemple #22
0
 def test_get_feature_importances(self, iris_X, iris_y):
     forest = GRFForestClassifier()
     forest.fit(iris_X, iris_y)
     fi = forest.get_feature_importances()
     assert len(fi) == iris_X.shape[1]
Exemple #23
0
 def test_from_forest(self, iris_X, iris_y):
     forest = GRFForestClassifier()
     forest.fit(iris_X, iris_y)
     tree = GRFTreeClassifier.from_forest(forest=forest, idx=0)
     tree.predict(iris_X)
Exemple #24
0
 def test_get_estimator(self, iris_X, iris_y):
     forest = GRFForestClassifier(n_estimators=10)
     with pytest.raises(NotFittedError):
         _ = forest.get_estimator(idx=0)
     forest.fit(iris_X, iris_y)
     with pytest.raises(ValueError):
         _ = forest.get_estimator(idx=0)
     forest = GRFForestClassifier(n_estimators=10, enable_tree_details=True)
     forest.fit(iris_X, iris_y)
     estimator = forest.get_estimator(idx=0)
     check_is_fitted(estimator)
     assert isinstance(estimator, GRFTreeClassifier)
     with pytest.raises(IndexError):
         _ = forest.get_estimator(idx=20)