def test_kfold(self):
        cv = cross_validation.RandomStratifiedKFold(valid_size=1 / 50)

        self.assertRaises(ValueError, cv.get_n_splits, X, y)

        for split in [1, 2, 5]:
            cv = cross_validation.RandomStratifiedKFold(n_splits=1 + split,
                                                        n_repeats=split,
                                                        verbose=split)
            assert (cv.get_n_splits(X, y) == split * split + split)
            assert (cv.verbose == split)

            for idx, [tr, vl] in enumerate(cv.split(X, y)):
                assert (int(tr.size / vl.size) == split)
                assert (np.unique(y[vl], return_counts=True)[0].size == 5)

            assert (idx + 1 == split * split + split)
    def test_compare_loo_kf(self):
        cv_loo = cross_validation.LeaveOneOut(random_state=12, verbose=2)
        cv_kf_as_loo = cross_validation.RandomStratifiedKFold(n_splits=False,
                                                              valid_size=1,
                                                              random_state=12,
                                                              verbose=2)
        for trvl_loo, trvl_kf in zip(cv_loo.split(X, y),
                                     cv_kf_as_loo.split(X, y)):
            assert (np.all(trvl_loo[0] == trvl_kf[0]))
            assert (np.all(trvl_loo[1] == trvl_kf[1]))
            assert (len(trvl_kf[1]) == n_class)
            assert (np.unique(y[trvl_kf[1]]).size == n_class)

        #to print extensions
        cv_loo.get_supported_extensions()
Beispiel #3
0
raster, vector = datasets.load_historical_data(low_res=True)
field = 'Class'
group = 'uniquefid'
X, y, g = extract_ROI(raster, vector, field, group)
##############################################################################
# Initialize Random-Forest
# ---------------------------

classifier = RandomForestClassifier(random_state=12, n_jobs=1)

##############################################################################
# Create list of different CV
# ---------------------------

CVs = [
    cross_validation.RandomStratifiedKFold(n_splits=2),
    cross_validation.LeavePSubGroupOut(valid_size=0.5),
    cross_validation.LeaveOneSubGroupOut(),
    StratifiedKFold(n_splits=2, shuffle=True)  #from sklearn
]

kappas = []

for cv in CVs:
    SL = SuperLearner(classifier=classifier,
                      param_grid=dict(n_estimators=[50, 100]),
                      n_jobs=1)
    SL.fit(X, y, group=g, cv=cv)
    print('Kappa for ' + str(type(cv).__name__))
    cvKappa = []