def test_run_train_test(self): tab = shuffle_data(Table('iris')) cp = CrossClassifier(InverseProbability(LogisticRegressionLearner()), 4) r = run_train_test(cp, 0.1, tab[:100], tab[100:]) cp = InductiveClassifier( InverseProbability(LogisticRegressionLearner())) r = run_train_test(cp, 0.1, tab[:50], tab[100:], tab[50:100])
def test_inverse_probability(self): icp = InductiveClassifier(InverseProbability(NaiveBayesLearner()), self.train, self.calibrate) pred = icp(self.test.x, 0.01) self.assertEqual(pred, ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']) icp = InductiveClassifier(InverseProbability(NaiveBayesLearner())) icp.fit(self.train, self.calibrate) pred = icp(self.test.x, 0.1) self.assertEqual(pred, ['Iris-setosa'])
def test_inverse_probability(self): tab = Table('iris') train, test = get_instance(tab, 0) tcp = TransductiveClassifier(InverseProbability(NaiveBayesLearner()), train) pred = tcp(test.x, 0.1) self.assertEqual(pred, ['Iris-setosa']) train, test = get_instance(tab, 0) tcp = TransductiveClassifier(InverseProbability(NaiveBayesLearner())) tcp.fit(train) pred = tcp(test.x, 0.1) self.assertEqual(pred, ['Iris-setosa'])
def test_cross_classification(self): tab = Table('iris') train, test = get_instance(tab, 0) train = shuffle_data(train) ccp = CrossClassifier(InverseProbability(LogisticRegressionLearner()), 3, train) pred = ccp(test.x, 0.1) self.assertEqual(pred, ['Iris-setosa']) ccp = CrossClassifier(InverseProbability(LogisticRegressionLearner()), 3) ccp.fit(train) pred = ccp(test.x, 0.1) self.assertEqual(pred, ['Iris-setosa'])
def test_cross(self): cpm = CrossClassifier(InverseProbability(LogisticRegressionLearner()), 4, mondrian=True) rm = run(cpm, 0.1, LOOSampler(Table('iris'))) self.assertGreater(rm.accuracy(), 0.85) self.assertGreater(rm.singleton_criterion(), 0.85)
def test_nc_type(self): nc_regr = AbsError(LinearRegressionLearner()) nc_class = InverseProbability(LogisticRegressionLearner()) InductiveRegressor(nc_regr) self.assertRaises(AssertionError, InductiveRegressor, nc_class) CrossRegressor(nc_regr, 5) self.assertRaises(AssertionError, CrossRegressor, nc_class, 5)
def test_accuracy(self): tab = Table('iris') cp = CrossClassifier(InverseProbability(LogisticRegressionLearner()), 5) eps = 0.1 r = run(cp, eps, LOOSampler(tab)) acc = r.accuracy() self.assertAlmostEqual(acc, 1 - eps, delta=0.03)
def test_nonexchangeability(self): tab = Table(os.path.join(os.path.dirname(__file__), '../data/usps.tab')) train, test = split_data(tab, 7291, 2007) test = test[:200] train, calibrate = split_data(train, 3, 1) icp = InductiveClassifier( InverseProbability(LogisticRegressionLearner()), train, calibrate) err = [inst.get_class() not in icp(inst.x, 0.1) for inst in test] self.assertGreater(sum(err) / len(test), 0.13)
def test_run(self): tab = Table('iris') cp = CrossClassifier(InverseProbability(LogisticRegressionLearner()), 5) r = run(cp, 0.1, RandomSampler(tab, 4, 1), rep=3) self.assertEqual(len(r.preds), 3 * 1 / 5 * len(tab)) tab = Table('housing') cr = InductiveRegressor(AbsError(LinearRegressionLearner())) r = run(cr, 0.1, CrossSampler(tab, 4), rep=3) self.assertEqual(len(r.preds), 3 * len(tab))
def test_loo(self): train, test = get_instance(Table('iris'), 0) loocp = LOOClassifier(InverseProbability(LogisticRegressionLearner()), train) pred = loocp(test.x, 0.1) self.assertEqual(pred, ['Iris-setosa']) train, test = get_instance(Table('housing'), 0) loocr = LOORegressor(AbsError(LinearRegressionLearner()), train) lo, hi = loocr(test.x, 0.1) self.assertLess(hi - lo, 20)
def test_pickle(self): import pickle train, test = next(RandomSampler(Table('iris'), 2, 1)) train, cal = next(RandomSampler(train, 2, 1)) ic = InductiveClassifier(InverseProbability(NaiveBayesLearner())) ic.fit(train, cal) print(ic(test[0].x, 0.1)) with open('temp.cp', 'wb') as f: pickle.dump(ic, f) with open('temp.cp', 'rb') as f: ic2 = pickle.load(f) print(ic2(test[0].x, 0.1))
def test_validate_cross_classification(self): tab = shuffle_data(Table('iris')) eps = 0.1 correct, num, all = 0, 0, len(tab) for i in range(all): train, test = get_instance(tab, i) ccp = CrossClassifier(InverseProbability(NaiveBayesLearner()), 5, train) pred = ccp(test.x, eps) if test.get_class() in pred: correct += 1 num += len(pred) self.assertAlmostEqual(correct / all, 1.0 - eps, delta=0.02)
def test_model_based(self): icp = InductiveClassifier( InverseProbability(SVMLearner(probability=True)), self.train, self.calibrate) pred = icp(self.test.x, 0.1) self.assertEqual(pred, ['Iris-setosa']) icp = InductiveClassifier( ProbabilityMargin(SVMLearner(probability=True)), self.train, self.calibrate) pred = icp(self.test.x, 0.1) self.assertEqual(pred, ['Iris-setosa'])
def test_validate_transductive(self): tab = Table('iris') eps = 0.1 correct, num, all = 0, 0, len(tab) for i in range(all): train, test = get_instance(tab, i) tcp = TransductiveClassifier( InverseProbability(LogisticRegressionLearner()), train) pred = tcp(test.x, eps) if test.get_class() in pred: correct += 1 num += len(pred) self.assertAlmostEqual(correct / all, 1.0 - eps, delta=0.01)
def test_validate_inductive(self): eps = 0.1 correct, num, all = 0, 0, len(self.tab) for i in range(all): train, test = get_instance(self.tab, i) train, calibrate = split_data(shuffle_data(train), 2, 1) icp = InductiveClassifier( InverseProbability(LogisticRegressionLearner()), train, calibrate) pred = icp(test.x, eps) if test.get_class() in pred: correct += 1 num += len(pred) self.assertAlmostEqual(correct / all, 1.0 - eps, delta=0.01)
def test_individual_classification(self): train, test = get_instance(Table('iris'), 123) # borderline case train = shuffle_data(train) ccp = CrossClassifier(InverseProbability(LogisticRegressionLearner()), 3, train) pred = ccp.predict(test.x) cred, conf = pred.credibility(), pred.confidence() self.assertLess(cred, 0.5) d = 1e-6 self.assertEqual(len(ccp(test.x, 1 - (conf - d))), 1) self.assertGreater(len(ccp(test.x, 1 - (conf + d))), 1) self.assertEqual(len(ccp(test.x, (cred + d))), 0) self.assertGreater(len(ccp(test.x, (cred - d))), 0)
def test_results_class(self): tab = Table('iris') cp = CrossClassifier(InverseProbability(LogisticRegressionLearner()), 5) empty = run(cp, 0.5, RandomSampler(tab, 2, 1)).empty_criterion() self.assertGreater(empty, 0.0) single = run(cp, 0.1, RandomSampler(tab, 2, 1)).singleton_criterion() self.assertGreater(single, 0.8) multiple = run(cp, 0.01, RandomSampler(tab, 2, 1)).multiple_criterion() self.assertGreater(multiple, 0.1) results = run(cp, 0.1, RandomSampler(tab, 2, 1)) self.assertGreater(results.singleton_correct(), 0.8) self.assertGreater(results.confidence(), 0.9) self.assertGreater(results.credibility(), 0.4)
def test_accuracy(self): tab = Table('iris')[:120] results = [] for m in [False, True]: cp = CrossClassifier(InverseProbability( LogisticRegressionLearner()), 2, mondrian=m) r = run(cp, 0.2, LOOSampler(tab), rep=10) res = [r.accuracy(y) for y in range(3)] results.append(res) print(r.accuracy(), res) span = max(results[0]) - min(results[0]) span_mondrian = max(results[1]) - min(results[1]) self.assertLess(span_mondrian, span)
import os from Orange.classification import LogisticRegressionLearner from Orange.data import Table from cp.classification import TransductiveClassifier, CrossClassifier from cp.evaluation import RandomSampler, LOOSampler, ResultsClass, run_train_test from cp.nonconformity import InverseProbability tab = Table(os.path.join(os.path.dirname(__file__), './dataSets/MitoToxStdzdRDkitDescForModelling.tab')) trains, tests = [], [] lo, hi = 10, 40 for rep in range(30): train, test = next(RandomSampler(tab, 100, len(tab)-100)) trains.append(train) tests.append(test) for a, b in LOOSampler(train[:lo]): assert(len(set(inst.get_class() for inst in a)) > 1) for n in range(lo, hi, 2): print(n) tcp = TransductiveClassifier(InverseProbability(LogisticRegressionLearner())) ccp = CrossClassifier(InverseProbability(LogisticRegressionLearner()), n) tr, cr = ResultsClass(), ResultsClass() for train, test in zip(trains, tests): tr.concatenate(run_train_test(tcp, 0.1, train[:n], test)) cr.concatenate(run_train_test(ccp, 0.1, train[:n], test)) print(tr.accuracy(), tr.multiple_criterion(), tr.time()) print(cr.accuracy(), cr.multiple_criterion(), cr.time())
def test_time(self): tab = Table('iris') cp = CrossClassifier(InverseProbability(LogisticRegressionLearner()), 5) r = run(cp, 0.1, LOOSampler(tab)) r.time()