def test_LeaveOneSubGroupOut(self):
        cv = cross_validation.LeaveOneSubGroupOut(verbose=2)
        # if only one subgroup
        tempG = np.copy(g)
        tempG[np.where(y == 5)] = 1
        self.assertRaises(Exception, cv.get_n_splits, X, y, tempG)

        # if all is ok
        cv = cross_validation.LeaveOneSubGroupOut(verbose=2)
        y_vl = np.array([])
        for tr, vl in cv.split(X, y, g):
            y_vl = np.concatenate((y_vl, vl))
            assert (not np.unique(np.in1d([1, 2], [3, 4]))[0])
        assert (np.all(
            np.unique(np.asarray(y_vl), return_counts=True)[1] == 1))

        list_files = cv.save_to_vector(vector,
                                       'Class',
                                       group='uniquefid',
                                       out_vector='/tmp/cv_g.gpkg')

        assert (len(list_files) == cv.get_n_splits(X, y, g))
Example #2
0
group = 'uniquefid'
X, y, g = extract_ROI(raster, vector, field, group)
##############################################################################
# Initialize Random-Forest
# ---------------------------

classifier = RandomForestClassifier(random_state=12, n_jobs=1)

##############################################################################
# Create list of different CV
# ---------------------------

CVs = [
    cross_validation.RandomStratifiedKFold(n_splits=2),
    cross_validation.LeavePSubGroupOut(valid_size=0.5),
    cross_validation.LeaveOneSubGroupOut(),
    StratifiedKFold(n_splits=2, shuffle=True)  #from sklearn
]

kappas = []

for cv in CVs:
    SL = SuperLearner(classifier=classifier,
                      param_grid=dict(n_estimators=[50, 100]),
                      n_jobs=1)
    SL.fit(X, y, group=g, cv=cv)
    print('Kappa for ' + str(type(cv).__name__))
    cvKappa = []

    for stats in SL.get_stats_from_cv(confusion_matrix=False, kappa=True):
        print(stats['kappa'])