コード例 #1
0
 def test_predict_different_domain_interpolation(self):
     train, test = separate_learn_test(self.collagen)
     aucorig = AUC(
         TestOnTestData(train, test, [LogisticRegressionLearner()]))
     test = Interpolate(points=getx(test) - 1.)(test)  # other test domain
     train = Interpolate(points=getx(train))(
         train)  # make train capable of interpolation
     aucshift = AUC(
         TestOnTestData(train, test, [LogisticRegressionLearner()]))
     self.assertAlmostEqual(aucorig, aucshift,
                            delta=0.01)  # shift can decrease AUC slightly
     test = Cut(1000, 1700)(test)
     auccut1 = AUC(
         TestOnTestData(train, test, [LogisticRegressionLearner()]))
     test = Cut(1100, 1600)(test)
     auccut2 = AUC(
         TestOnTestData(train, test, [LogisticRegressionLearner()]))
     test = Cut(1200, 1500)(test)
     auccut3 = AUC(
         TestOnTestData(train, test, [LogisticRegressionLearner()]))
     # the more we cut the lower precision we get
     self.assertTrue(aucorig > auccut1 > auccut2 > auccut3)
コード例 #2
0
    def test_slightly_different_domain(self):
        """ If test data has a slightly different domain then (with interpolation)
        we should obtain a similar classification score. """
        # rows full of unknowns make LogisticRegression undefined
        # we can obtain them, for example, with EMSC, if one of the badspectra
        # is a spectrum from the data
        learner = LogisticRegressionLearner(max_iter=1000,
                                            preprocessors=[_RemoveNaNRows()])

        for proc in PREPROCESSORS:
            if hasattr(proc, "skip_add_zeros"):
                continue
            # LR that can not handle unknown values
            train, test = separate_learn_test(preprocessor_data(proc))
            train1 = proc(train)
            aucorig = AUC(TestOnTestData()(train1, test, [learner]))
            test = slightly_change_wavenumbers(test, 0.00001)
            test = odd_attr(test)
            # a subset of points for training so that all test sets points
            # are within the train set points, which gives no unknowns
            train = Interpolate(points=getx(train)[1:-3])(
                train)  # interpolatable train
            train = proc(train)
            # explicit domain conversion test to catch exceptions that would
            # otherwise be silently handled in TestOnTestData
            _ = test.transform(train.domain)
            aucnow = AUC(TestOnTestData()(train, test, [learner]))
            self.assertAlmostEqual(aucnow,
                                   aucorig,
                                   delta=0.03,
                                   msg="Preprocessor " + str(proc))
            test = Interpolate(points=getx(test) - 1.)(test)  # also do a shift
            _ = test.transform(train.domain)  # explicit call again
            aucnow = AUC(TestOnTestData()(train, test, [learner]))
            # the difference should be slight
            self.assertAlmostEqual(aucnow,
                                   aucorig,
                                   delta=0.05,
                                   msg="Preprocessor " + str(proc))
コード例 #3
0
 def test_predict_savgov_same_domain(self):
     data = SavitzkyGolayFiltering(window=9, polyorder=2, deriv=2)(self.collagen)
     train, test = separate_learn_test(data)
     auc = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()]))
     self.assertGreater(auc, 0.85)
コード例 #4
0
 def test_predict_same_domain(self):
     train, test = separate_learn_test(self.collagen)
     auc = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()]))
     self.assertGreater(auc, 0.9) # easy dataset
コード例 #5
0
 def test_predict_different_domain(self):
     train, test = separate_learn_test(self.collagen)
     test = Interpolate(points=getx(test) - 1)(test)  # other test domain
     aucdestroyed = AUC(
         TestOnTestData(train, test, [LogisticRegressionLearner()]))
     self.assertTrue(0.45 < aucdestroyed < 0.55)
コード例 #6
0
 def test_predict_samename_domain(self):
     train, test = separate_learn_test(self.collagen)
     test = destroy_atts_conversion(test)
     aucdestroyed = AUC(
         TestOnTestData(train, test, [LogisticRegressionLearner()]))
     self.assertTrue(0.45 < aucdestroyed < 0.55)
コード例 #7
0
""" Documentation script """
from Orange.classification import LogisticRegressionLearner
from Orange.evaluation.testing import CrossValidation
from Orange.evaluation.scoring import AUC

from orangecontrib.bioinformatics.geo.dataset import GDS

gds = GDS("GDS2960")
data = gds.get_data(sample_type="disease state", transpose=True, report_genes=True)
print("Samples: %d, Genes: %d" % (len(data), len(data.domain.attributes)))

learners = [LogisticRegressionLearner()]
results = CrossValidation(data, learners, k=10)

print("AUC = %.3f" % AUC(results)[0])