コード例 #1
0
    def test_error_model(self):
        for loo in [False, True]:
            icr = InductiveRegressor(
                ErrorModelNC(LinearRegressionLearner(),
                             LinearRegressionLearner(),
                             loo=loo), self.train, self.calibrate)
            lo, hi = icr(self.test.x, 0.1)
            self.assertLess(hi - lo, 30.0)

        icr = InductiveRegressor(AbsError(RandomForestRegressionLearner()))
        r = run(icr, 0.1, CrossSampler(Table('housing'), 20))
        self.assertGreater(r.accuracy(), 0.85)
        print(r.accuracy(), r.median_range(), r.interdecile_mean())

        icr = InductiveRegressor(
            ErrorModelNC(RandomForestRegressionLearner(),
                         LinearRegressionLearner()))
        r = run(icr, 0.1, CrossSampler(Table('housing'), 20))
        self.assertGreater(r.accuracy(), 0.85)
        print(r.accuracy(), r.median_range(), r.interdecile_mean())

        icr = InductiveRegressor(
            ErrorModelNC(RandomForestRegressionLearner(),
                         LinearRegressionLearner(),
                         loo=True))
        r = run(icr, 0.1, CrossSampler(Table('housing'), 20))
        self.assertGreater(r.accuracy(), 0.85)
        print(r.accuracy(), r.median_range(), r.interdecile_mean())
コード例 #2
0
    def test_cross(self):
        folds = 7
        s = CrossSampler(self.data, k=folds)
        l = [(len(train), len(test)) for train, test in s]
        self.assertEqual(len(l), folds)
        self.assertTrue(all(a + b == len(self.data) for a, b in l))
        t = [b for a, b in l]
        self.assertLessEqual(max(t) - min(t), 1)

        s = CrossSampler(self.data, k=folds)
        ids = frozenset(
            frozenset(inst.id for inst in test) for train, test in s)
        self.assertEqual(len(ids), folds)
コード例 #3
0
def evaluate_ncs(tab, cp, nc_strs, id):
    res = {}
    for nc_str in nc_strs:
        nc = eval(nc_str)
        r = run(cp(nc), 0.1, CrossSampler(tab, 5), rep=5)
        res[nc_str] = r
        print(id, nc_str)
    with open('results/%s.p' % id, 'wb') as f:
        pickle.dump(res, f)
コード例 #4
0
    def test_abs_error_normalized(self):
        tab = Table('housing')
        normalizer = Normalize(zero_based=True,
                               norm_type=Normalize.NormalizeBySpan)
        tab = normalizer(tab)

        icr = InductiveRegressor(AbsError(LinearRegressionLearner()))
        icr_knn = InductiveRegressor(AbsError(KNNRegressionLearner(4)))
        icr_norm = InductiveRegressor(
            AbsErrorNormalized(KNNRegressionLearner(4),
                               Euclidean,
                               4,
                               exp=False))
        icr_norm_exp = InductiveRegressor(
            AbsErrorNormalized(KNNRegressionLearner(4), Euclidean, 4,
                               exp=True))
        icr_norm_rf = InductiveRegressor(
            AbsErrorNormalized(KNNRegressionLearner(4),
                               Euclidean,
                               4,
                               rf=RandomForestRegressor()))

        r, r_knn, r_norm, r_norm_exp, r_norm_rf = ResultsRegr(), ResultsRegr(
        ), ResultsRegr(), ResultsRegr(), ResultsRegr()
        eps = 0.05
        for rep in range(10):
            for train, test in CrossSampler(tab, 10):
                train, calibrate = next(
                    RandomSampler(train,
                                  len(train) - 100, 100))
                r.concatenate(run_train_test(icr, eps, train, test, calibrate))
                r_knn.concatenate(
                    run_train_test(icr_knn, eps, train, test, calibrate))
                r_norm.concatenate(
                    run_train_test(icr_norm, eps, train, test, calibrate))
                r_norm_exp.concatenate(
                    run_train_test(icr_norm_exp, eps, train, test, calibrate))
                r_norm_rf.concatenate(
                    run_train_test(icr_norm_rf, eps, train, test, calibrate))

        print(r.median_range(), r.interdecile_mean(), 1 - r.accuracy())
        print(r_knn.median_range(), r_knn.interdecile_mean(),
              1 - r_knn.accuracy())
        print(r_norm.median_range(), r_norm.interdecile_mean(),
              1 - r_norm.accuracy())
        print(r_norm_exp.median_range(), r_norm_exp.interdecile_mean(),
              1 - r_norm_exp.accuracy())
        print(r_norm_rf.median_range(), r_norm_rf.interdecile_mean(),
              1 - r_norm_rf.accuracy())
        self.assertGreater(r.accuracy(), 1 - eps - 0.03)
        self.assertGreater(r_knn.accuracy(), 1 - eps - 0.03)
        self.assertGreater(r_norm.accuracy(), 1 - eps - 0.03)
        self.assertGreater(r_norm_exp.accuracy(), 1 - eps - 0.03)
        self.assertGreater(r_norm_rf.accuracy(), 1 - eps - 0.03)
        """
コード例 #5
0
    def test_run(self):
        tab = Table('iris')
        cp = CrossClassifier(InverseProbability(LogisticRegressionLearner()),
                             5)
        r = run(cp, 0.1, RandomSampler(tab, 4, 1), rep=3)
        self.assertEqual(len(r.preds), 3 * 1 / 5 * len(tab))

        tab = Table('housing')
        cr = InductiveRegressor(AbsError(LinearRegressionLearner()))
        r = run(cr, 0.1, CrossSampler(tab, 4), rep=3)
        self.assertEqual(len(r.preds), 3 * len(tab))
コード例 #6
0
    def test_repeat(self):
        rep = 5
        s = RandomSampler(self.data, 3, 2)
        ids = frozenset(
            frozenset(inst.id for inst in test) for train, test in s.repeat(5))
        self.assertEqual(len(ids), 5)

        s = CrossSampler(self.data, 3)
        ids = frozenset(
            frozenset(inst.id for inst in test) for train, test in s.repeat(5))
        self.assertEqual(len(ids), 15)
コード例 #7
0
def evaluate_datasets(datasets, cp, nc_str, id):
    res = {}
    for dataset in datasets:
        dataset_id = dataset.split('/')[-1].split('.')[0]
        imp = Orange.preprocess.Impute()
        rc = Orange.preprocess.preprocess.RemoveConstant()
        tab = rc(imp(Table(dataset)))
        nc = eval(nc_str)
        r = run(cp(nc), 0.1, CrossSampler(tab, 5), rep=5)
        res[dataset_id] = r
        print(nc_str, dataset_id)
    print(nc_str.upper())
    with open('results/nc/%d.p' % id, 'wb') as f:
        pickle.dump(res, f)
コード例 #8
0
    def test_abs_error_rf(self):
        icr = InductiveRegressor(
            AbsErrorRF(RandomForestRegressionLearner(),
                       RandomForestRegressor()), self.train, self.calibrate)
        lo, hi = icr(self.test.x, 0.1)
        self.assertLess(hi - lo, 30.0)

        icr = InductiveRegressor(
            AbsErrorRF(LinearRegressionLearner(), RandomForestRegressor()),
            self.train, self.calibrate)
        lo, hi = icr(self.test.x, 0.1)
        self.assertLess(hi - lo, 30.0)

        icr = InductiveRegressor(
            AbsErrorRF(RandomForestRegressionLearner(),
                       RandomForestRegressor()))
        r = run(icr, 0.1, CrossSampler(Table('housing'), 10))
        self.assertGreater(r.accuracy(), 0.85)
        print(r.median_range(), r.interdecile_mean())
コード例 #9
0
    def test_LOOClassNC(self):
        for incl in [False, True]:
            for rel in [False, True]:
                for neigh in ['fixed', 'variable']:
                    nc = LOOClassNC(NaiveBayesLearner(),
                                    Euclidean,
                                    20,
                                    relative=rel,
                                    include=incl,
                                    neighbourhood=neigh)
                    icp = InductiveClassifier(nc, self.train, self.calibrate)
                    pred = icp(self.test.x, 0.1)
                    print(pred)
                    self.assertEqual(pred, ['Iris-setosa'])

        icp = InductiveClassifier(
            LOOClassNC(NaiveBayesLearner(), Euclidean, 20))
        r = run(icp, 0.1, CrossSampler(Table('iris'), 4))
        self.assertGreater(r.accuracy(), 0.85)
        self.assertGreater(r.singleton_criterion(), 0.8)
コード例 #10
0
    def test_LOORegrNC(self):
        for incl in [False, True]:
            for rel in [False, True]:
                for neigh in ['fixed', 'variable']:
                    nc = LOORegrNC(LinearRegressionLearner,
                                   Euclidean,
                                   150,
                                   relative=rel,
                                   include=incl,
                                   neighbourhood=neigh)
                    icr = InductiveRegressor(nc, self.train, self.calibrate)
                    lo, hi = icr(self.test.x, 0.1)
                    print(lo, hi)
                    self.assertLess(hi - lo, 20.0)

        tab = Table('housing')
        icr = InductiveRegressor(
            LOORegrNC(LinearRegressionLearner, Euclidean, 150))
        r = run(icr, 0.1, CrossSampler(tab, 4))
        self.assertGreater(r.accuracy(), 0.85)
        self.assertLess(r.mean_range(), 15.0)
コード例 #11
0
 def test_experimental(self):
     icr = InductiveRegressor(
         ExperimentalNC(RandomForestRegressor(n_estimators=20)), self.train,
         self.calibrate)
     r = run(icr, 0.1, CrossSampler(Table('housing'), 10))
     print(r.accuracy(), r.median_range())