############################################################################## # Create CV # ------------------------------------------- RSKF = RandomStratifiedKFold(n_splits=2, random_state=12, verbose=False) ############################################################################## # Initialize Random-Forest # --------------------------- classifier = RandomForestClassifier() ############################################################################## # Start learning # --------------------------- SL = SuperLearner(classifier=classifier, param_grid=dict(n_estimators=[10, 100])) SL.fit(X, y, cv=RSKF) ############################################################################## # Get kappa from each fold # --------------------------- for stats in SL.get_stats_from_cv(confusion_matrix=False, kappa=True): print(stats['kappa']) ############################################################################## # Get each confusion matrix from folds # ----------------------------------------------- cms = [] for stats in SL.get_stats_from_cv(confusion_matrix=True): cms.append(stats['confusion_matrix'])
# Initialize Random-Forest and metrics # -------------------------------------- classifier = RandomForestClassifier(random_state=12,n_jobs=1) # kappa = metrics.make_scorer(metrics.cohen_kappa_score) f1_mean = metrics.make_scorer(metrics.f1_score,average='micro') scoring = dict(kappa=kappa,f1_mean=f1_mean,accuracy='accuracy') ############################################################################## # Start learning # --------------------------- # sklearn will compute different metrics, but will keep best results from kappa (refit='kappa') SL = SuperLearner(classifier=classifier,param_grid = dict(n_estimators=[10]),n_jobs=1,verbose=1) SL.fit(X,y,cv=SKF,scoring=kappa) # ============================================================================= # ############################################################################## # # Read the model # # ------------------- # print(SL.model) # print(SL.model.cv_results_) # print(SL.model.best_score_) # # ############################################################################## # # Get F1 for every class from best params # # -----------------------------------------------
############################################################################## # Create list of different CV # --------------------------- CVs = [ cross_validation.RandomStratifiedKFold(n_splits=2), cross_validation.LeavePSubGroupOut(valid_size=0.5), cross_validation.LeaveOneSubGroupOut(), StratifiedKFold(n_splits=2, shuffle=True) #from sklearn ] kappas = [] for cv in CVs: SL = SuperLearner(classifier=classifier, param_grid=dict(n_estimators=[50, 100]), n_jobs=1) SL.fit(X, y, group=g, cv=cv) print('Kappa for ' + str(type(cv).__name__)) cvKappa = [] for stats in SL.get_stats_from_cv(confusion_matrix=False, kappa=True): print(stats['kappa']) cvKappa.append(stats['kappa']) kappas.append(cvKappa) print(20 * '=') ########################## # Plot example
############################################################################## # Initialize Random-Forest and metrics # -------------------------------------- classifier = RandomForestClassifier(random_state=12, n_jobs=1) kappa = metrics.make_scorer(metrics.cohen_kappa_score) f1_mean = metrics.make_scorer(metrics.f1_score, average='micro') scoring = dict(kappa=kappa, f1_mean=f1_mean, accuracy='accuracy') ############################################################################## # Start learning # --------------------------- # sklearn will compute different metrics, but will keep best results from kappa (refit='kappa') SL = SuperLearner(classifier=classifier, param_grid=dict(n_estimators=[10]), n_jobs=1, verbose=1) ############################################################################## # Create or use custom function def reduceBands(X, bandToKeep=[0, 2]): # this function get the first and the last band X = X[:, bandToKeep].reshape(-1, len(bandToKeep)) return X # add this function to learnAndPredict class SL.customize_array(reduceBands)