def test_LeaveOneSubGroupOut(self): cv = cross_validation.LeaveOneSubGroupOut(verbose=2) # if only one subgroup tempG = np.copy(g) tempG[np.where(y == 5)] = 1 self.assertRaises(Exception, cv.get_n_splits, X, y, tempG) # if all is ok cv = cross_validation.LeaveOneSubGroupOut(verbose=2) y_vl = np.array([]) for tr, vl in cv.split(X, y, g): y_vl = np.concatenate((y_vl, vl)) assert (not np.unique(np.in1d([1, 2], [3, 4]))[0]) assert (np.all( np.unique(np.asarray(y_vl), return_counts=True)[1] == 1)) list_files = cv.save_to_vector(vector, 'Class', group='uniquefid', out_vector='/tmp/cv_g.gpkg') assert (len(list_files) == cv.get_n_splits(X, y, g))
group = 'uniquefid' X, y, g = extract_ROI(raster, vector, field, group) ############################################################################## # Initialize Random-Forest # --------------------------- classifier = RandomForestClassifier(random_state=12, n_jobs=1) ############################################################################## # Create list of different CV # --------------------------- CVs = [ cross_validation.RandomStratifiedKFold(n_splits=2), cross_validation.LeavePSubGroupOut(valid_size=0.5), cross_validation.LeaveOneSubGroupOut(), StratifiedKFold(n_splits=2, shuffle=True) #from sklearn ] kappas = [] for cv in CVs: SL = SuperLearner(classifier=classifier, param_grid=dict(n_estimators=[50, 100]), n_jobs=1) SL.fit(X, y, group=g, cv=cv) print('Kappa for ' + str(type(cv).__name__)) cvKappa = [] for stats in SL.get_stats_from_cv(confusion_matrix=False, kappa=True): print(stats['kappa'])