def test_abs_error_normalized(self): tab = Table('housing') normalizer = Normalize(zero_based=True, norm_type=Normalize.NormalizeBySpan) tab = normalizer(tab) icr = InductiveRegressor(AbsError(LinearRegressionLearner())) icr_knn = InductiveRegressor(AbsError(KNNRegressionLearner(4))) icr_norm = InductiveRegressor( AbsErrorNormalized(KNNRegressionLearner(4), Euclidean, 4, exp=False)) icr_norm_exp = InductiveRegressor( AbsErrorNormalized(KNNRegressionLearner(4), Euclidean, 4, exp=True)) icr_norm_rf = InductiveRegressor( AbsErrorNormalized(KNNRegressionLearner(4), Euclidean, 4, rf=RandomForestRegressor())) r, r_knn, r_norm, r_norm_exp, r_norm_rf = ResultsRegr(), ResultsRegr( ), ResultsRegr(), ResultsRegr(), ResultsRegr() eps = 0.05 for rep in range(10): for train, test in CrossSampler(tab, 10): train, calibrate = next( RandomSampler(train, len(train) - 100, 100)) r.concatenate(run_train_test(icr, eps, train, test, calibrate)) r_knn.concatenate( run_train_test(icr_knn, eps, train, test, calibrate)) r_norm.concatenate( run_train_test(icr_norm, eps, train, test, calibrate)) r_norm_exp.concatenate( run_train_test(icr_norm_exp, eps, train, test, calibrate)) r_norm_rf.concatenate( run_train_test(icr_norm_rf, eps, train, test, calibrate)) print(r.median_range(), r.interdecile_mean(), 1 - r.accuracy()) print(r_knn.median_range(), r_knn.interdecile_mean(), 1 - r_knn.accuracy()) print(r_norm.median_range(), r_norm.interdecile_mean(), 1 - r_norm.accuracy()) print(r_norm_exp.median_range(), r_norm_exp.interdecile_mean(), 1 - r_norm_exp.accuracy()) print(r_norm_rf.median_range(), r_norm_rf.interdecile_mean(), 1 - r_norm_rf.accuracy()) self.assertGreater(r.accuracy(), 1 - eps - 0.03) self.assertGreater(r_knn.accuracy(), 1 - eps - 0.03) self.assertGreater(r_norm.accuracy(), 1 - eps - 0.03) self.assertGreater(r_norm_exp.accuracy(), 1 - eps - 0.03) self.assertGreater(r_norm_rf.accuracy(), 1 - eps - 0.03) """
def test_nc_type(self): nc_regr = AbsError(LinearRegressionLearner()) nc_class = InverseProbability(LogisticRegressionLearner()) InductiveRegressor(nc_regr) self.assertRaises(AssertionError, InductiveRegressor, nc_class) CrossRegressor(nc_regr, 5) self.assertRaises(AssertionError, CrossRegressor, nc_class, 5)
def test_error_model(self): for loo in [False, True]: icr = InductiveRegressor( ErrorModelNC(LinearRegressionLearner(), LinearRegressionLearner(), loo=loo), self.train, self.calibrate) lo, hi = icr(self.test.x, 0.1) self.assertLess(hi - lo, 30.0) icr = InductiveRegressor(AbsError(RandomForestRegressionLearner())) r = run(icr, 0.1, CrossSampler(Table('housing'), 20)) self.assertGreater(r.accuracy(), 0.85) print(r.accuracy(), r.median_range(), r.interdecile_mean()) icr = InductiveRegressor( ErrorModelNC(RandomForestRegressionLearner(), LinearRegressionLearner())) r = run(icr, 0.1, CrossSampler(Table('housing'), 20)) self.assertGreater(r.accuracy(), 0.85) print(r.accuracy(), r.median_range(), r.interdecile_mean()) icr = InductiveRegressor( ErrorModelNC(RandomForestRegressionLearner(), LinearRegressionLearner(), loo=True)) r = run(icr, 0.1, CrossSampler(Table('housing'), 20)) self.assertGreater(r.accuracy(), 0.85) print(r.accuracy(), r.median_range(), r.interdecile_mean())
def test_run(self): tab = Table('iris') cp = CrossClassifier(InverseProbability(LogisticRegressionLearner()), 5) r = run(cp, 0.1, RandomSampler(tab, 4, 1), rep=3) self.assertEqual(len(r.preds), 3 * 1 / 5 * len(tab)) tab = Table('housing') cr = InductiveRegressor(AbsError(LinearRegressionLearner())) r = run(cr, 0.1, CrossSampler(tab, 4), rep=3) self.assertEqual(len(r.preds), 3 * len(tab))
def test_loo(self): train, test = get_instance(Table('iris'), 0) loocp = LOOClassifier(InverseProbability(LogisticRegressionLearner()), train) pred = loocp(test.x, 0.1) self.assertEqual(pred, ['Iris-setosa']) train, test = get_instance(Table('housing'), 0) loocr = LOORegressor(AbsError(LinearRegressionLearner()), train) lo, hi = loocr(test.x, 0.1) self.assertLess(hi - lo, 20)
def test_validate_cross_regression(self): tab = shuffle_data(Table('housing')) eps = 0.1 correct, num, all = 0, 0, len(tab) for i in range(all): train, test = get_instance(tab, i) ccr = CrossRegressor(AbsError(LinearRegressionLearner()), 5, shuffle_data(train)) y_min, y_max = ccr(test.x, eps) if y_min <= test.y <= y_max: correct += 1 num += y_max - y_min self.assertAlmostEqual(correct / all, 1.0 - eps, delta=0.02)
def test_validate_regression(self): tab = Table('housing') eps = 0.1 correct, num, all = 0, 0, len(tab) for i in range(all): train, test = get_instance(tab, i) train, calibrate = split_data(shuffle_data(train), 2, 1) icr = InductiveRegressor(AbsError(LinearRegressionLearner()), train, calibrate) y_min, y_max = icr(test.x, eps) if y_min <= test.y <= y_max: correct += 1 num += y_max - y_min self.assertAlmostEqual(correct / all, 1.0 - eps, delta=0.02)