예제 #1
0
    def test_oob_calibration(self):
        # -----------------------------------------------------------------------------
        # Classification
        # -----------------------------------------------------------------------------
        data = load_iris()

        icp = OobCpClassifier(
            ClassifierNc(
                OobClassifierAdapter(
                    RandomForestClassifier(n_estimators=100, oob_score=True))))
        icp_cv = ClassIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[class_mean_errors, class_avg_c],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Classification: iris")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())

        # -----------------------------------------------------------------------------
        # Regression, absolute error
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        icp = OobCpRegressor(
            RegressorNc(
                OobRegressorAdapter(
                    RandomForestRegressor(n_estimators=100, oob_score=True))))
        icp_cv = RegIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[reg_mean_errors, reg_median_size],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Absolute error regression: diabetes")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())
예제 #2
0
score_model(
    icp_cv,
    "IcpClassifier (OOB, normalized)",
    data,
    "iris",
    [class_mean_errors, class_avg_c],
)

# -----------------------------------------------------------------------------
# Regression
# -----------------------------------------------------------------------------
data = load_diabetes()

nc = NcFactory.create_nc(RandomForestRegressor(n_estimators=100))
icp = IcpRegressor(nc)
icp_cv = RegIcpCvHelper(icp)

score_model(icp_cv, "IcpRegressor", data, "diabetes",
            [reg_mean_errors, reg_median_size])

# -----------------------------------------------------------------------------
# Regression (normalized)
# -----------------------------------------------------------------------------
data = load_diabetes()

nc = NcFactory.create_nc(RandomForestRegressor(n_estimators=100),
                         normalizer_model=KNeighborsRegressor())
icp = IcpRegressor(nc)
icp_cv = RegIcpCvHelper(icp)

score_model(
예제 #3
0
    def test_nc_factory(self):
        def score_model(icp, icp_name, ds, ds_name, scoring_funcs):
            scores = cross_val_score(
                icp,
                ds.data,
                ds.target,
                iterations=10,
                folds=10,
                scoring_funcs=scoring_funcs,
                significance_levels=[0.05, 0.1, 0.2],
            )

            print("\n{}: {}".format(icp_name, ds_name))
            scores = scores.drop(["fold", "iter"], axis=1)
            print(scores.groupby(["significance"]).mean())

        # -----------------------------------------------------------------------------
        # Classification
        # -----------------------------------------------------------------------------
        data = load_iris()

        nc = NcFactory.create_nc(RandomForestClassifier(n_estimators=100))
        icp = IcpClassifier(nc)
        icp_cv = ClassIcpCvHelper(icp)
        score_model(icp_cv, "IcpClassifier", data, "iris",
                    [class_mean_errors, class_avg_c])

        # -----------------------------------------------------------------------------
        # Classification (normalized)
        # -----------------------------------------------------------------------------
        data = load_iris()

        nc = NcFactory.create_nc(RandomForestClassifier(n_estimators=100),
                                 normalizer_model=KNeighborsRegressor())
        icp = IcpClassifier(nc)
        icp_cv = ClassIcpCvHelper(icp)

        score_model(icp_cv, "IcpClassifier (normalized)", data, "iris",
                    [class_mean_errors, class_avg_c])

        # -----------------------------------------------------------------------------
        # Classification OOB
        # -----------------------------------------------------------------------------
        data = load_iris()

        nc = NcFactory.create_nc(RandomForestClassifier(n_estimators=100,
                                                        oob_score=True),
                                 oob=True)
        icp_cv = OobCpClassifier(nc)

        score_model(icp_cv, "IcpClassifier (OOB)", data, "iris",
                    [class_mean_errors, class_avg_c])

        # -----------------------------------------------------------------------------
        # Classification OOB normalized
        # -----------------------------------------------------------------------------
        data = load_iris()

        nc = NcFactory.create_nc(
            RandomForestClassifier(n_estimators=100, oob_score=True),
            oob=True,
            normalizer_model=KNeighborsRegressor(),
        )
        icp_cv = OobCpClassifier(nc)

        score_model(
            icp_cv,
            "IcpClassifier (OOB, normalized)",
            data,
            "iris",
            [class_mean_errors, class_avg_c],
        )

        # -----------------------------------------------------------------------------
        # Regression
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        nc = NcFactory.create_nc(RandomForestRegressor(n_estimators=100))
        icp = IcpRegressor(nc)
        icp_cv = RegIcpCvHelper(icp)

        score_model(icp_cv, "IcpRegressor", data, "diabetes",
                    [reg_mean_errors, reg_median_size])

        # -----------------------------------------------------------------------------
        # Regression (normalized)
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        nc = NcFactory.create_nc(RandomForestRegressor(n_estimators=100),
                                 normalizer_model=KNeighborsRegressor())
        icp = IcpRegressor(nc)
        icp_cv = RegIcpCvHelper(icp)

        score_model(
            icp_cv,
            "IcpRegressor (normalized)",
            data,
            "diabetes",
            [reg_mean_errors, reg_median_size],
        )

        # -----------------------------------------------------------------------------
        # Regression OOB
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        nc = NcFactory.create_nc(RandomForestRegressor(n_estimators=100,
                                                       oob_score=True),
                                 oob=True)
        icp_cv = OobCpRegressor(nc)

        score_model(icp_cv, "IcpRegressor (OOB)", data, "diabetes",
                    [reg_mean_errors, reg_median_size])

        # -----------------------------------------------------------------------------
        # Regression OOB normalized
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        nc = NcFactory.create_nc(
            RandomForestRegressor(n_estimators=100, oob_score=True),
            oob=True,
            normalizer_model=KNeighborsRegressor(),
        )
        icp_cv = OobCpRegressor(nc)

        score_model(
            icp_cv,
            "IcpRegressor (OOB, normalized)",
            data,
            "diabetes",
            [reg_mean_errors, reg_median_size],
        )
예제 #4
0
    def test_cross_validation(self):
        # -----------------------------------------------------------------------------
        # Classification
        # -----------------------------------------------------------------------------
        data = load_iris()

        icp = IcpClassifier(
            ClassifierNc(
                ClassifierAdapter(RandomForestClassifier(n_estimators=100)),
                MarginErrFunc()))
        icp_cv = ClassIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[class_mean_errors, class_avg_c],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Classification: iris")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())

        # -----------------------------------------------------------------------------
        # Regression, absolute error
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        icp = IcpRegressor(
            RegressorNc(
                RegressorAdapter(RandomForestRegressor(n_estimators=100)),
                AbsErrorErrFunc()))
        icp_cv = RegIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[reg_mean_errors, reg_median_size],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Absolute error regression: diabetes")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())

        # -----------------------------------------------------------------------------
        # Regression, normalized absolute error
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        underlying_model = RegressorAdapter(
            RandomForestRegressor(n_estimators=100))
        normalizer_model = RegressorAdapter(
            RandomForestRegressor(n_estimators=100))
        normalizer = RegressorNormalizer(underlying_model, normalizer_model,
                                         AbsErrorErrFunc())
        nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer)

        icp = IcpRegressor(nc)
        icp_cv = RegIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[reg_mean_errors, reg_median_size],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Normalized absolute error regression: diabetes")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())

        # -----------------------------------------------------------------------------
        # Regression, normalized signed error
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        icp = IcpRegressor(
            RegressorNc(
                RegressorAdapter(RandomForestRegressor(n_estimators=100)),
                SignErrorErrFunc()))
        icp_cv = RegIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[reg_mean_errors, reg_median_size],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Signed error regression: diabetes")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())

        # -----------------------------------------------------------------------------
        # Regression, signed error
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        underlying_model = RegressorAdapter(
            RandomForestRegressor(n_estimators=100))
        normalizer_model = RegressorAdapter(
            RandomForestRegressor(n_estimators=100))

        # The normalization model can use a different error function than is
        # used to measure errors on the underlying model
        normalizer = RegressorNormalizer(underlying_model, normalizer_model,
                                         AbsErrorErrFunc())
        nc = RegressorNc(underlying_model, SignErrorErrFunc(), normalizer)

        icp = IcpRegressor(nc)
        icp_cv = RegIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[reg_mean_errors, reg_median_size],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Normalized signed error regression: diabetes")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())