Ejemplo n.º 1
0
    def test_error_model(self):
        for loo in [False, True]:
            icr = InductiveRegressor(
                ErrorModelNC(LinearRegressionLearner(),
                             LinearRegressionLearner(),
                             loo=loo), self.train, self.calibrate)
            lo, hi = icr(self.test.x, 0.1)
            self.assertLess(hi - lo, 30.0)

        icr = InductiveRegressor(AbsError(RandomForestRegressionLearner()))
        r = run(icr, 0.1, CrossSampler(Table('housing'), 20))
        self.assertGreater(r.accuracy(), 0.85)
        print(r.accuracy(), r.median_range(), r.interdecile_mean())

        icr = InductiveRegressor(
            ErrorModelNC(RandomForestRegressionLearner(),
                         LinearRegressionLearner()))
        r = run(icr, 0.1, CrossSampler(Table('housing'), 20))
        self.assertGreater(r.accuracy(), 0.85)
        print(r.accuracy(), r.median_range(), r.interdecile_mean())

        icr = InductiveRegressor(
            ErrorModelNC(RandomForestRegressionLearner(),
                         LinearRegressionLearner(),
                         loo=True))
        r = run(icr, 0.1, CrossSampler(Table('housing'), 20))
        self.assertGreater(r.accuracy(), 0.85)
        print(r.accuracy(), r.median_range(), r.interdecile_mean())
Ejemplo n.º 2
0
 def test_results_regr(self):
     tab = Table('housing')
     cr = CrossRegressor(AbsErrorKNN(Euclidean, 10, average=True), 5)
     r1 = run(cr, 0.1, RandomSampler(tab, 2, 1))
     r5 = run(cr, 0.5, RandomSampler(tab, 2, 1))
     self.assertGreater(r1.median_range(), r5.median_range())
     self.assertGreater(r1.mean_range(), r5.mean_range())
     self.assertGreater(r1.interdecile_range(), r5.interdecile_range())
     self.assertGreater(r1.interdecile_mean(), r5.interdecile_mean())
     self.assertGreater(r1.std_dev(), r5.std_dev())
Ejemplo n.º 3
0
    def test_run(self):
        tab = Table('iris')
        cp = CrossClassifier(InverseProbability(LogisticRegressionLearner()),
                             5)
        r = run(cp, 0.1, RandomSampler(tab, 4, 1), rep=3)
        self.assertEqual(len(r.preds), 3 * 1 / 5 * len(tab))

        tab = Table('housing')
        cr = InductiveRegressor(AbsError(LinearRegressionLearner()))
        r = run(cr, 0.1, CrossSampler(tab, 4), rep=3)
        self.assertEqual(len(r.preds), 3 * len(tab))
Ejemplo n.º 4
0
    def test_results_class(self):
        tab = Table('iris')
        cp = CrossClassifier(InverseProbability(LogisticRegressionLearner()),
                             5)
        empty = run(cp, 0.5, RandomSampler(tab, 2, 1)).empty_criterion()
        self.assertGreater(empty, 0.0)
        single = run(cp, 0.1, RandomSampler(tab, 2, 1)).singleton_criterion()
        self.assertGreater(single, 0.8)
        multiple = run(cp, 0.01, RandomSampler(tab, 2, 1)).multiple_criterion()
        self.assertGreater(multiple, 0.1)

        results = run(cp, 0.1, RandomSampler(tab, 2, 1))
        self.assertGreater(results.singleton_correct(), 0.8)
        self.assertGreater(results.confidence(), 0.9)
        self.assertGreater(results.credibility(), 0.4)
Ejemplo n.º 5
0
 def test_cross(self):
     cpm = CrossClassifier(InverseProbability(LogisticRegressionLearner()),
                           4,
                           mondrian=True)
     rm = run(cpm, 0.1, LOOSampler(Table('iris')))
     self.assertGreater(rm.accuracy(), 0.85)
     self.assertGreater(rm.singleton_criterion(), 0.85)
Ejemplo n.º 6
0
def evaluate_nc_dataset_eps(nc_str, dataset, eps, id):
    nc = eval(nc_str)
    tab = Table(dataset)
    res = None
    for rep in range(100):
        if isinstance(nc, ClassNC):
            r = run(InductiveClassifier(nc), eps, RandomSampler(tab, 2, 1))
        else:
            r = run(InductiveRegressor(nc), eps, RandomSampler(tab, 2, 1))
        if res is None:
            res = r
        else:
            res.concatenate(r)
        print(rep + 1, nc_str, dataset, eps)
        with open('results/qsar/%d.p' % id, 'wb') as f:
            pickle.dump((res, rep + 1), f)
Ejemplo n.º 7
0
 def test_accuracy(self):
     tab = Table('iris')
     cp = CrossClassifier(InverseProbability(LogisticRegressionLearner()),
                          5)
     eps = 0.1
     r = run(cp, eps, LOOSampler(tab))
     acc = r.accuracy()
     self.assertAlmostEqual(acc, 1 - eps, delta=0.03)
Ejemplo n.º 8
0
def evaluate_ncs(tab, cp, nc_strs, id):
    res = {}
    for nc_str in nc_strs:
        nc = eval(nc_str)
        r = run(cp(nc), 0.1, CrossSampler(tab, 5), rep=5)
        res[nc_str] = r
        print(id, nc_str)
    with open('results/%s.p' % id, 'wb') as f:
        pickle.dump(res, f)
Ejemplo n.º 9
0
def evaluate_datasets(datasets, cp, nc_str, id):
    res = {}
    for dataset in datasets:
        dataset_id = dataset.split('/')[-1].split('.')[0]
        imp = Orange.preprocess.Impute()
        rc = Orange.preprocess.preprocess.RemoveConstant()
        tab = rc(imp(Table(dataset)))
        nc = eval(nc_str)
        r = run(cp(nc), 0.1, CrossSampler(tab, 5), rep=5)
        res[dataset_id] = r
        print(nc_str, dataset_id)
    print(nc_str.upper())
    with open('results/nc/%d.p' % id, 'wb') as f:
        pickle.dump(res, f)
Ejemplo n.º 10
0
 def test_accuracy(self):
     tab = Table('iris')[:120]
     results = []
     for m in [False, True]:
         cp = CrossClassifier(InverseProbability(
             LogisticRegressionLearner()),
                              2,
                              mondrian=m)
         r = run(cp, 0.2, LOOSampler(tab), rep=10)
         res = [r.accuracy(y) for y in range(3)]
         results.append(res)
         print(r.accuracy(), res)
     span = max(results[0]) - min(results[0])
     span_mondrian = max(results[1]) - min(results[1])
     self.assertLess(span_mondrian, span)
Ejemplo n.º 11
0
    def test_abs_error_rf(self):
        icr = InductiveRegressor(
            AbsErrorRF(RandomForestRegressionLearner(),
                       RandomForestRegressor()), self.train, self.calibrate)
        lo, hi = icr(self.test.x, 0.1)
        self.assertLess(hi - lo, 30.0)

        icr = InductiveRegressor(
            AbsErrorRF(LinearRegressionLearner(), RandomForestRegressor()),
            self.train, self.calibrate)
        lo, hi = icr(self.test.x, 0.1)
        self.assertLess(hi - lo, 30.0)

        icr = InductiveRegressor(
            AbsErrorRF(RandomForestRegressionLearner(),
                       RandomForestRegressor()))
        r = run(icr, 0.1, CrossSampler(Table('housing'), 10))
        self.assertGreater(r.accuracy(), 0.85)
        print(r.median_range(), r.interdecile_mean())
Ejemplo n.º 12
0
    def test_LOOClassNC(self):
        for incl in [False, True]:
            for rel in [False, True]:
                for neigh in ['fixed', 'variable']:
                    nc = LOOClassNC(NaiveBayesLearner(),
                                    Euclidean,
                                    20,
                                    relative=rel,
                                    include=incl,
                                    neighbourhood=neigh)
                    icp = InductiveClassifier(nc, self.train, self.calibrate)
                    pred = icp(self.test.x, 0.1)
                    print(pred)
                    self.assertEqual(pred, ['Iris-setosa'])

        icp = InductiveClassifier(
            LOOClassNC(NaiveBayesLearner(), Euclidean, 20))
        r = run(icp, 0.1, CrossSampler(Table('iris'), 4))
        self.assertGreater(r.accuracy(), 0.85)
        self.assertGreater(r.singleton_criterion(), 0.8)
Ejemplo n.º 13
0
    def test_LOORegrNC(self):
        for incl in [False, True]:
            for rel in [False, True]:
                for neigh in ['fixed', 'variable']:
                    nc = LOORegrNC(LinearRegressionLearner,
                                   Euclidean,
                                   150,
                                   relative=rel,
                                   include=incl,
                                   neighbourhood=neigh)
                    icr = InductiveRegressor(nc, self.train, self.calibrate)
                    lo, hi = icr(self.test.x, 0.1)
                    print(lo, hi)
                    self.assertLess(hi - lo, 20.0)

        tab = Table('housing')
        icr = InductiveRegressor(
            LOORegrNC(LinearRegressionLearner, Euclidean, 150))
        r = run(icr, 0.1, CrossSampler(tab, 4))
        self.assertGreater(r.accuracy(), 0.85)
        self.assertLess(r.mean_range(), 15.0)
Ejemplo n.º 14
0
    def test_avg_error_knn(self):
        ncm = AvgErrorKNN(Euclidean)
        self.assertEqual(ncm.avg_abs_inv(6 / 5, [1, 2, 3, 4, 5]), (3, 3))
        for odd in [0, 1]:
            ys = np.random.uniform(0, 1, 10 + odd)
            nc = 0.4
            lo, hi = ncm.avg_abs_inv(nc, ys)
            self.assertGreater(ncm.avg_abs(lo - 0.001, ys), nc)
            self.assertLess(ncm.avg_abs(lo + 0.001, ys), nc)
            self.assertLess(ncm.avg_abs(hi - 0.001, ys), nc)
            self.assertGreater(ncm.avg_abs(hi + 0.001, ys), nc)

        icr = InductiveRegressor(AvgErrorKNN(Euclidean, 10), self.train,
                                 self.calibrate)
        lo, hi = icr(self.test.x, 0.1)
        self.assertLess(hi - lo, 30.0)

        r = run(InductiveRegressor(AvgErrorKNN(Euclidean, 10)),
                0.1,
                RandomSampler(Table("housing"), 2, 1),
                rep=10)
        self.assertFalse(any([np.isnan(w) for w in r.widths()]))
Ejemplo n.º 15
0
 def test_time(self):
     tab = Table('iris')
     cp = CrossClassifier(InverseProbability(LogisticRegressionLearner()),
                          5)
     r = run(cp, 0.1, LOOSampler(tab))
     r.time()
Ejemplo n.º 16
0
 def test_experimental(self):
     icr = InductiveRegressor(
         ExperimentalNC(RandomForestRegressor(n_estimators=20)), self.train,
         self.calibrate)
     r = run(icr, 0.1, CrossSampler(Table('housing'), 10))
     print(r.accuracy(), r.median_range())