예제 #1
0
 def test_results_regr(self):
     tab = Table('housing')
     cr = CrossRegressor(AbsErrorKNN(Euclidean, 10, average=True), 5)
     r1 = run(cr, 0.1, RandomSampler(tab, 2, 1))
     r5 = run(cr, 0.5, RandomSampler(tab, 2, 1))
     self.assertGreater(r1.median_range(), r5.median_range())
     self.assertGreater(r1.mean_range(), r5.mean_range())
     self.assertGreater(r1.interdecile_range(), r5.interdecile_range())
     self.assertGreater(r1.interdecile_mean(), r5.interdecile_mean())
     self.assertGreater(r1.std_dev(), r5.std_dev())
예제 #2
0
    def test_repeat(self):
        rep = 5
        s = RandomSampler(self.data, 3, 2)
        ids = frozenset(
            frozenset(inst.id for inst in test) for train, test in s.repeat(5))
        self.assertEqual(len(ids), 5)

        s = CrossSampler(self.data, 3)
        ids = frozenset(
            frozenset(inst.id for inst in test) for train, test in s.repeat(5))
        self.assertEqual(len(ids), 15)
예제 #3
0
 def test_pickle(self):
     import pickle
     train, test = next(RandomSampler(Table('iris'), 2, 1))
     train, cal = next(RandomSampler(train, 2, 1))
     ic = InductiveClassifier(InverseProbability(NaiveBayesLearner()))
     ic.fit(train, cal)
     print(ic(test[0].x, 0.1))
     with open('temp.cp', 'wb') as f:
         pickle.dump(ic, f)
     with open('temp.cp', 'rb') as f:
         ic2 = pickle.load(f)
     print(ic2(test[0].x, 0.1))
예제 #4
0
    def test_results_class(self):
        tab = Table('iris')
        cp = CrossClassifier(InverseProbability(LogisticRegressionLearner()),
                             5)
        empty = run(cp, 0.5, RandomSampler(tab, 2, 1)).empty_criterion()
        self.assertGreater(empty, 0.0)
        single = run(cp, 0.1, RandomSampler(tab, 2, 1)).singleton_criterion()
        self.assertGreater(single, 0.8)
        multiple = run(cp, 0.01, RandomSampler(tab, 2, 1)).multiple_criterion()
        self.assertGreater(multiple, 0.1)

        results = run(cp, 0.1, RandomSampler(tab, 2, 1))
        self.assertGreater(results.singleton_correct(), 0.8)
        self.assertGreater(results.confidence(), 0.9)
        self.assertGreater(results.credibility(), 0.4)
예제 #5
0
def evaluate_nc_dataset_eps(nc_str, dataset, eps, id):
    nc = eval(nc_str)
    tab = Table(dataset)
    res = None
    for rep in range(100):
        if isinstance(nc, ClassNC):
            r = run(InductiveClassifier(nc), eps, RandomSampler(tab, 2, 1))
        else:
            r = run(InductiveRegressor(nc), eps, RandomSampler(tab, 2, 1))
        if res is None:
            res = r
        else:
            res.concatenate(r)
        print(rep + 1, nc_str, dataset, eps)
        with open('results/qsar/%d.p' % id, 'wb') as f:
            pickle.dump((res, rep + 1), f)
예제 #6
0
    def test_abs_error_normalized(self):
        tab = Table('housing')
        normalizer = Normalize(zero_based=True,
                               norm_type=Normalize.NormalizeBySpan)
        tab = normalizer(tab)

        icr = InductiveRegressor(AbsError(LinearRegressionLearner()))
        icr_knn = InductiveRegressor(AbsError(KNNRegressionLearner(4)))
        icr_norm = InductiveRegressor(
            AbsErrorNormalized(KNNRegressionLearner(4),
                               Euclidean,
                               4,
                               exp=False))
        icr_norm_exp = InductiveRegressor(
            AbsErrorNormalized(KNNRegressionLearner(4), Euclidean, 4,
                               exp=True))
        icr_norm_rf = InductiveRegressor(
            AbsErrorNormalized(KNNRegressionLearner(4),
                               Euclidean,
                               4,
                               rf=RandomForestRegressor()))

        r, r_knn, r_norm, r_norm_exp, r_norm_rf = ResultsRegr(), ResultsRegr(
        ), ResultsRegr(), ResultsRegr(), ResultsRegr()
        eps = 0.05
        for rep in range(10):
            for train, test in CrossSampler(tab, 10):
                train, calibrate = next(
                    RandomSampler(train,
                                  len(train) - 100, 100))
                r.concatenate(run_train_test(icr, eps, train, test, calibrate))
                r_knn.concatenate(
                    run_train_test(icr_knn, eps, train, test, calibrate))
                r_norm.concatenate(
                    run_train_test(icr_norm, eps, train, test, calibrate))
                r_norm_exp.concatenate(
                    run_train_test(icr_norm_exp, eps, train, test, calibrate))
                r_norm_rf.concatenate(
                    run_train_test(icr_norm_rf, eps, train, test, calibrate))

        print(r.median_range(), r.interdecile_mean(), 1 - r.accuracy())
        print(r_knn.median_range(), r_knn.interdecile_mean(),
              1 - r_knn.accuracy())
        print(r_norm.median_range(), r_norm.interdecile_mean(),
              1 - r_norm.accuracy())
        print(r_norm_exp.median_range(), r_norm_exp.interdecile_mean(),
              1 - r_norm_exp.accuracy())
        print(r_norm_rf.median_range(), r_norm_rf.interdecile_mean(),
              1 - r_norm_rf.accuracy())
        self.assertGreater(r.accuracy(), 1 - eps - 0.03)
        self.assertGreater(r_knn.accuracy(), 1 - eps - 0.03)
        self.assertGreater(r_norm.accuracy(), 1 - eps - 0.03)
        self.assertGreater(r_norm_exp.accuracy(), 1 - eps - 0.03)
        self.assertGreater(r_norm_rf.accuracy(), 1 - eps - 0.03)
        """
예제 #7
0
    def test_run(self):
        tab = Table('iris')
        cp = CrossClassifier(InverseProbability(LogisticRegressionLearner()),
                             5)
        r = run(cp, 0.1, RandomSampler(tab, 4, 1), rep=3)
        self.assertEqual(len(r.preds), 3 * 1 / 5 * len(tab))

        tab = Table('housing')
        cr = InductiveRegressor(AbsError(LinearRegressionLearner()))
        r = run(cr, 0.1, CrossSampler(tab, 4), rep=3)
        self.assertEqual(len(r.preds), 3 * len(tab))
예제 #8
0
 def test_SVM(self):
     iris = Table('iris')
     tab = Table(iris.X[50:], iris.Y[50:] - 1)  # versicolor, virginica
     # clear cases
     train, test = get_instance(tab, 30)
     train, calibrate = next(RandomSampler(train, 2, 1))
     icp = InductiveClassifier(SVMDistance(skl_svm.SVC()), train, calibrate)
     pred = icp(test.x, 0.1)
     self.assertEqual(pred, ['v1'])
     train, test = get_instance(tab, 85)
     train, calibrate = next(RandomSampler(train, 2, 1))
     icp = InductiveClassifier(SVMDistance(skl_svm.SVC()), train, calibrate)
     pred = icp(test.x, 0.1)
     self.assertEqual(pred, ['v2'])
     # border case
     train, test = get_instance(tab, 27)
     train, calibrate = next(RandomSampler(train, 2, 1))
     icp = InductiveClassifier(SVMDistance(skl_svm.SVC()), train, calibrate)
     pred = icp(test.x, 0.2)
     self.assertEqual(pred, [])
     pred = icp(test.x, 0.01)
     self.assertEqual(pred, ['v1', 'v2'])
예제 #9
0
    def test_avg_error_knn(self):
        ncm = AvgErrorKNN(Euclidean)
        self.assertEqual(ncm.avg_abs_inv(6 / 5, [1, 2, 3, 4, 5]), (3, 3))
        for odd in [0, 1]:
            ys = np.random.uniform(0, 1, 10 + odd)
            nc = 0.4
            lo, hi = ncm.avg_abs_inv(nc, ys)
            self.assertGreater(ncm.avg_abs(lo - 0.001, ys), nc)
            self.assertLess(ncm.avg_abs(lo + 0.001, ys), nc)
            self.assertLess(ncm.avg_abs(hi - 0.001, ys), nc)
            self.assertGreater(ncm.avg_abs(hi + 0.001, ys), nc)

        icr = InductiveRegressor(AvgErrorKNN(Euclidean, 10), self.train,
                                 self.calibrate)
        lo, hi = icr(self.test.x, 0.1)
        self.assertLess(hi - lo, 30.0)

        r = run(InductiveRegressor(AvgErrorKNN(Euclidean, 10)),
                0.1,
                RandomSampler(Table("housing"), 2, 1),
                rep=10)
        self.assertFalse(any([np.isnan(w) for w in r.widths()]))
예제 #10
0
import os

from Orange.classification import LogisticRegressionLearner
from Orange.data import Table

from cp.classification import TransductiveClassifier, CrossClassifier
from cp.evaluation import RandomSampler, LOOSampler, ResultsClass, run_train_test
from cp.nonconformity import InverseProbability

tab = Table(os.path.join(os.path.dirname(__file__), './dataSets/MitoToxStdzdRDkitDescForModelling.tab'))
trains, tests = [], []
lo, hi = 10, 40
for rep in range(30):
    train, test = next(RandomSampler(tab, 100, len(tab)-100))
    trains.append(train)
    tests.append(test)
    for a, b in LOOSampler(train[:lo]):
        assert(len(set(inst.get_class() for inst in a)) > 1)
for n in range(lo, hi, 2):
    print(n)
    tcp = TransductiveClassifier(InverseProbability(LogisticRegressionLearner()))
    ccp = CrossClassifier(InverseProbability(LogisticRegressionLearner()), n)
    tr, cr = ResultsClass(), ResultsClass()
    for train, test in zip(trains, tests):
        tr.concatenate(run_train_test(tcp, 0.1, train[:n], test))
        cr.concatenate(run_train_test(ccp, 0.1, train[:n], test))
    print(tr.accuracy(), tr.multiple_criterion(), tr.time())
    print(cr.accuracy(), cr.multiple_criterion(), cr.time())
예제 #11
0
 def test_random(self):
     a, b = 3, 2
     s = RandomSampler(self.data, a, b)
     train, test = next(s)
     self.assertTrue(isinstance(train[0], RowInstance))
     self.assertAlmostEqual(len(train) / len(test), a / b)
예제 #12
0
 def test_knn_speed(self):
     tab = Table(
         os.path.join(os.path.dirname(__file__),
                      '../datasets-class/spambase.tab'))
     train, calibrate = next(RandomSampler(tab, 2, 1))
     icp = InductiveClassifier(KNNDistance(Euclidean), train, calibrate)