def test_oob_calibration(self): # ----------------------------------------------------------------------------- # Classification # ----------------------------------------------------------------------------- data = load_iris() icp = OobCpClassifier( ClassifierNc( OobClassifierAdapter( RandomForestClassifier(n_estimators=100, oob_score=True)))) icp_cv = ClassIcpCvHelper(icp) scores = cross_val_score( icp_cv, data.data, data.target, iterations=5, folds=5, scoring_funcs=[class_mean_errors, class_avg_c], significance_levels=[0.05, 0.1, 0.2], ) print("Classification: iris") scores = scores.drop(["fold", "iter"], axis=1) print(scores.groupby(["significance"]).mean()) # ----------------------------------------------------------------------------- # Regression, absolute error # ----------------------------------------------------------------------------- data = load_diabetes() icp = OobCpRegressor( RegressorNc( OobRegressorAdapter( RandomForestRegressor(n_estimators=100, oob_score=True)))) icp_cv = RegIcpCvHelper(icp) scores = cross_val_score( icp_cv, data.data, data.target, iterations=5, folds=5, scoring_funcs=[reg_mean_errors, reg_median_size], significance_levels=[0.05, 0.1, 0.2], ) print("Absolute error regression: diabetes") scores = scores.drop(["fold", "iter"], axis=1) print(scores.groupby(["significance"]).mean())
score_model( icp_cv, "IcpClassifier (OOB, normalized)", data, "iris", [class_mean_errors, class_avg_c], ) # ----------------------------------------------------------------------------- # Regression # ----------------------------------------------------------------------------- data = load_diabetes() nc = NcFactory.create_nc(RandomForestRegressor(n_estimators=100)) icp = IcpRegressor(nc) icp_cv = RegIcpCvHelper(icp) score_model(icp_cv, "IcpRegressor", data, "diabetes", [reg_mean_errors, reg_median_size]) # ----------------------------------------------------------------------------- # Regression (normalized) # ----------------------------------------------------------------------------- data = load_diabetes() nc = NcFactory.create_nc(RandomForestRegressor(n_estimators=100), normalizer_model=KNeighborsRegressor()) icp = IcpRegressor(nc) icp_cv = RegIcpCvHelper(icp) score_model(
def test_nc_factory(self): def score_model(icp, icp_name, ds, ds_name, scoring_funcs): scores = cross_val_score( icp, ds.data, ds.target, iterations=10, folds=10, scoring_funcs=scoring_funcs, significance_levels=[0.05, 0.1, 0.2], ) print("\n{}: {}".format(icp_name, ds_name)) scores = scores.drop(["fold", "iter"], axis=1) print(scores.groupby(["significance"]).mean()) # ----------------------------------------------------------------------------- # Classification # ----------------------------------------------------------------------------- data = load_iris() nc = NcFactory.create_nc(RandomForestClassifier(n_estimators=100)) icp = IcpClassifier(nc) icp_cv = ClassIcpCvHelper(icp) score_model(icp_cv, "IcpClassifier", data, "iris", [class_mean_errors, class_avg_c]) # ----------------------------------------------------------------------------- # Classification (normalized) # ----------------------------------------------------------------------------- data = load_iris() nc = NcFactory.create_nc(RandomForestClassifier(n_estimators=100), normalizer_model=KNeighborsRegressor()) icp = IcpClassifier(nc) icp_cv = ClassIcpCvHelper(icp) score_model(icp_cv, "IcpClassifier (normalized)", data, "iris", [class_mean_errors, class_avg_c]) # ----------------------------------------------------------------------------- # Classification OOB # ----------------------------------------------------------------------------- data = load_iris() nc = NcFactory.create_nc(RandomForestClassifier(n_estimators=100, oob_score=True), oob=True) icp_cv = OobCpClassifier(nc) score_model(icp_cv, "IcpClassifier (OOB)", data, "iris", [class_mean_errors, class_avg_c]) # ----------------------------------------------------------------------------- # Classification OOB normalized # ----------------------------------------------------------------------------- data = load_iris() nc = NcFactory.create_nc( RandomForestClassifier(n_estimators=100, oob_score=True), oob=True, normalizer_model=KNeighborsRegressor(), ) icp_cv = OobCpClassifier(nc) score_model( icp_cv, "IcpClassifier (OOB, normalized)", data, "iris", [class_mean_errors, class_avg_c], ) # ----------------------------------------------------------------------------- # Regression # ----------------------------------------------------------------------------- data = load_diabetes() nc = NcFactory.create_nc(RandomForestRegressor(n_estimators=100)) icp = IcpRegressor(nc) icp_cv = RegIcpCvHelper(icp) score_model(icp_cv, "IcpRegressor", data, "diabetes", [reg_mean_errors, reg_median_size]) # ----------------------------------------------------------------------------- # Regression (normalized) # ----------------------------------------------------------------------------- data = load_diabetes() nc = NcFactory.create_nc(RandomForestRegressor(n_estimators=100), normalizer_model=KNeighborsRegressor()) icp = IcpRegressor(nc) icp_cv = RegIcpCvHelper(icp) score_model( icp_cv, "IcpRegressor (normalized)", data, "diabetes", [reg_mean_errors, reg_median_size], ) # ----------------------------------------------------------------------------- # Regression OOB # ----------------------------------------------------------------------------- data = load_diabetes() nc = NcFactory.create_nc(RandomForestRegressor(n_estimators=100, oob_score=True), oob=True) icp_cv = OobCpRegressor(nc) score_model(icp_cv, "IcpRegressor (OOB)", data, "diabetes", [reg_mean_errors, reg_median_size]) # ----------------------------------------------------------------------------- # Regression OOB normalized # ----------------------------------------------------------------------------- data = load_diabetes() nc = NcFactory.create_nc( RandomForestRegressor(n_estimators=100, oob_score=True), oob=True, normalizer_model=KNeighborsRegressor(), ) icp_cv = OobCpRegressor(nc) score_model( icp_cv, "IcpRegressor (OOB, normalized)", data, "diabetes", [reg_mean_errors, reg_median_size], )
def test_cross_validation(self): # ----------------------------------------------------------------------------- # Classification # ----------------------------------------------------------------------------- data = load_iris() icp = IcpClassifier( ClassifierNc( ClassifierAdapter(RandomForestClassifier(n_estimators=100)), MarginErrFunc())) icp_cv = ClassIcpCvHelper(icp) scores = cross_val_score( icp_cv, data.data, data.target, iterations=5, folds=5, scoring_funcs=[class_mean_errors, class_avg_c], significance_levels=[0.05, 0.1, 0.2], ) print("Classification: iris") scores = scores.drop(["fold", "iter"], axis=1) print(scores.groupby(["significance"]).mean()) # ----------------------------------------------------------------------------- # Regression, absolute error # ----------------------------------------------------------------------------- data = load_diabetes() icp = IcpRegressor( RegressorNc( RegressorAdapter(RandomForestRegressor(n_estimators=100)), AbsErrorErrFunc())) icp_cv = RegIcpCvHelper(icp) scores = cross_val_score( icp_cv, data.data, data.target, iterations=5, folds=5, scoring_funcs=[reg_mean_errors, reg_median_size], significance_levels=[0.05, 0.1, 0.2], ) print("Absolute error regression: diabetes") scores = scores.drop(["fold", "iter"], axis=1) print(scores.groupby(["significance"]).mean()) # ----------------------------------------------------------------------------- # Regression, normalized absolute error # ----------------------------------------------------------------------------- data = load_diabetes() underlying_model = RegressorAdapter( RandomForestRegressor(n_estimators=100)) normalizer_model = RegressorAdapter( RandomForestRegressor(n_estimators=100)) normalizer = RegressorNormalizer(underlying_model, normalizer_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) icp = IcpRegressor(nc) icp_cv = RegIcpCvHelper(icp) scores = cross_val_score( icp_cv, data.data, data.target, iterations=5, folds=5, scoring_funcs=[reg_mean_errors, reg_median_size], significance_levels=[0.05, 0.1, 0.2], ) print("Normalized absolute error regression: diabetes") scores = scores.drop(["fold", "iter"], axis=1) print(scores.groupby(["significance"]).mean()) # ----------------------------------------------------------------------------- # Regression, normalized signed error # ----------------------------------------------------------------------------- data = load_diabetes() icp = IcpRegressor( RegressorNc( RegressorAdapter(RandomForestRegressor(n_estimators=100)), SignErrorErrFunc())) icp_cv = RegIcpCvHelper(icp) scores = cross_val_score( icp_cv, data.data, data.target, iterations=5, folds=5, scoring_funcs=[reg_mean_errors, reg_median_size], significance_levels=[0.05, 0.1, 0.2], ) print("Signed error regression: diabetes") scores = scores.drop(["fold", "iter"], axis=1) print(scores.groupby(["significance"]).mean()) # ----------------------------------------------------------------------------- # Regression, signed error # ----------------------------------------------------------------------------- data = load_diabetes() underlying_model = RegressorAdapter( RandomForestRegressor(n_estimators=100)) normalizer_model = RegressorAdapter( RandomForestRegressor(n_estimators=100)) # The normalization model can use a different error function than is # used to measure errors on the underlying model normalizer = RegressorNormalizer(underlying_model, normalizer_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, SignErrorErrFunc(), normalizer) icp = IcpRegressor(nc) icp_cv = RegIcpCvHelper(icp) scores = cross_val_score( icp_cv, data.data, data.target, iterations=5, folds=5, scoring_funcs=[reg_mean_errors, reg_median_size], significance_levels=[0.05, 0.1, 0.2], ) print("Normalized signed error regression: diabetes") scores = scores.drop(["fold", "iter"], axis=1) print(scores.groupby(["significance"]).mean())