def test_predict_different_domain_interpolation(self): train, test = separate_learn_test(self.collagen) aucorig = AUC( TestOnTestData(train, test, [LogisticRegressionLearner()])) test = Interpolate(points=getx(test) - 1.)(test) # other test domain train = Interpolate(points=getx(train))( train) # make train capable of interpolation aucshift = AUC( TestOnTestData(train, test, [LogisticRegressionLearner()])) self.assertAlmostEqual(aucorig, aucshift, delta=0.01) # shift can decrease AUC slightly test = Cut(1000, 1700)(test) auccut1 = AUC( TestOnTestData(train, test, [LogisticRegressionLearner()])) test = Cut(1100, 1600)(test) auccut2 = AUC( TestOnTestData(train, test, [LogisticRegressionLearner()])) test = Cut(1200, 1500)(test) auccut3 = AUC( TestOnTestData(train, test, [LogisticRegressionLearner()])) # the more we cut the lower precision we get self.assertTrue(aucorig > auccut1 > auccut2 > auccut3)
def test_slightly_different_domain(self): """ If test data has a slightly different domain then (with interpolation) we should obtain a similar classification score. """ # rows full of unknowns make LogisticRegression undefined # we can obtain them, for example, with EMSC, if one of the badspectra # is a spectrum from the data learner = LogisticRegressionLearner(max_iter=1000, preprocessors=[_RemoveNaNRows()]) for proc in PREPROCESSORS: if hasattr(proc, "skip_add_zeros"): continue # LR that can not handle unknown values train, test = separate_learn_test(preprocessor_data(proc)) train1 = proc(train) aucorig = AUC(TestOnTestData()(train1, test, [learner])) test = slightly_change_wavenumbers(test, 0.00001) test = odd_attr(test) # a subset of points for training so that all test sets points # are within the train set points, which gives no unknowns train = Interpolate(points=getx(train)[1:-3])( train) # interpolatable train train = proc(train) # explicit domain conversion test to catch exceptions that would # otherwise be silently handled in TestOnTestData _ = test.transform(train.domain) aucnow = AUC(TestOnTestData()(train, test, [learner])) self.assertAlmostEqual(aucnow, aucorig, delta=0.03, msg="Preprocessor " + str(proc)) test = Interpolate(points=getx(test) - 1.)(test) # also do a shift _ = test.transform(train.domain) # explicit call again aucnow = AUC(TestOnTestData()(train, test, [learner])) # the difference should be slight self.assertAlmostEqual(aucnow, aucorig, delta=0.05, msg="Preprocessor " + str(proc))
def test_predict_savgov_same_domain(self): data = SavitzkyGolayFiltering(window=9, polyorder=2, deriv=2)(self.collagen) train, test = separate_learn_test(data) auc = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()])) self.assertGreater(auc, 0.85)
def test_predict_same_domain(self): train, test = separate_learn_test(self.collagen) auc = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()])) self.assertGreater(auc, 0.9) # easy dataset
def test_predict_different_domain(self): train, test = separate_learn_test(self.collagen) test = Interpolate(points=getx(test) - 1)(test) # other test domain aucdestroyed = AUC( TestOnTestData(train, test, [LogisticRegressionLearner()])) self.assertTrue(0.45 < aucdestroyed < 0.55)
def test_predict_samename_domain(self): train, test = separate_learn_test(self.collagen) test = destroy_atts_conversion(test) aucdestroyed = AUC( TestOnTestData(train, test, [LogisticRegressionLearner()])) self.assertTrue(0.45 < aucdestroyed < 0.55)
""" Documentation script """ from Orange.classification import LogisticRegressionLearner from Orange.evaluation.testing import CrossValidation from Orange.evaluation.scoring import AUC from orangecontrib.bioinformatics.geo.dataset import GDS gds = GDS("GDS2960") data = gds.get_data(sample_type="disease state", transpose=True, report_genes=True) print("Samples: %d, Genes: %d" % (len(data), len(data.domain.attributes))) learners = [LogisticRegressionLearner()] results = CrossValidation(data, learners, k=10) print("AUC = %.3f" % AUC(results)[0])