def test_results_regr(self): tab = Table('housing') cr = CrossRegressor(AbsErrorKNN(Euclidean, 10, average=True), 5) r1 = run(cr, 0.1, RandomSampler(tab, 2, 1)) r5 = run(cr, 0.5, RandomSampler(tab, 2, 1)) self.assertGreater(r1.median_range(), r5.median_range()) self.assertGreater(r1.mean_range(), r5.mean_range()) self.assertGreater(r1.interdecile_range(), r5.interdecile_range()) self.assertGreater(r1.interdecile_mean(), r5.interdecile_mean()) self.assertGreater(r1.std_dev(), r5.std_dev())
def test_repeat(self): rep = 5 s = RandomSampler(self.data, 3, 2) ids = frozenset( frozenset(inst.id for inst in test) for train, test in s.repeat(5)) self.assertEqual(len(ids), 5) s = CrossSampler(self.data, 3) ids = frozenset( frozenset(inst.id for inst in test) for train, test in s.repeat(5)) self.assertEqual(len(ids), 15)
def test_pickle(self): import pickle train, test = next(RandomSampler(Table('iris'), 2, 1)) train, cal = next(RandomSampler(train, 2, 1)) ic = InductiveClassifier(InverseProbability(NaiveBayesLearner())) ic.fit(train, cal) print(ic(test[0].x, 0.1)) with open('temp.cp', 'wb') as f: pickle.dump(ic, f) with open('temp.cp', 'rb') as f: ic2 = pickle.load(f) print(ic2(test[0].x, 0.1))
def test_results_class(self): tab = Table('iris') cp = CrossClassifier(InverseProbability(LogisticRegressionLearner()), 5) empty = run(cp, 0.5, RandomSampler(tab, 2, 1)).empty_criterion() self.assertGreater(empty, 0.0) single = run(cp, 0.1, RandomSampler(tab, 2, 1)).singleton_criterion() self.assertGreater(single, 0.8) multiple = run(cp, 0.01, RandomSampler(tab, 2, 1)).multiple_criterion() self.assertGreater(multiple, 0.1) results = run(cp, 0.1, RandomSampler(tab, 2, 1)) self.assertGreater(results.singleton_correct(), 0.8) self.assertGreater(results.confidence(), 0.9) self.assertGreater(results.credibility(), 0.4)
def evaluate_nc_dataset_eps(nc_str, dataset, eps, id): nc = eval(nc_str) tab = Table(dataset) res = None for rep in range(100): if isinstance(nc, ClassNC): r = run(InductiveClassifier(nc), eps, RandomSampler(tab, 2, 1)) else: r = run(InductiveRegressor(nc), eps, RandomSampler(tab, 2, 1)) if res is None: res = r else: res.concatenate(r) print(rep + 1, nc_str, dataset, eps) with open('results/qsar/%d.p' % id, 'wb') as f: pickle.dump((res, rep + 1), f)
def test_abs_error_normalized(self): tab = Table('housing') normalizer = Normalize(zero_based=True, norm_type=Normalize.NormalizeBySpan) tab = normalizer(tab) icr = InductiveRegressor(AbsError(LinearRegressionLearner())) icr_knn = InductiveRegressor(AbsError(KNNRegressionLearner(4))) icr_norm = InductiveRegressor( AbsErrorNormalized(KNNRegressionLearner(4), Euclidean, 4, exp=False)) icr_norm_exp = InductiveRegressor( AbsErrorNormalized(KNNRegressionLearner(4), Euclidean, 4, exp=True)) icr_norm_rf = InductiveRegressor( AbsErrorNormalized(KNNRegressionLearner(4), Euclidean, 4, rf=RandomForestRegressor())) r, r_knn, r_norm, r_norm_exp, r_norm_rf = ResultsRegr(), ResultsRegr( ), ResultsRegr(), ResultsRegr(), ResultsRegr() eps = 0.05 for rep in range(10): for train, test in CrossSampler(tab, 10): train, calibrate = next( RandomSampler(train, len(train) - 100, 100)) r.concatenate(run_train_test(icr, eps, train, test, calibrate)) r_knn.concatenate( run_train_test(icr_knn, eps, train, test, calibrate)) r_norm.concatenate( run_train_test(icr_norm, eps, train, test, calibrate)) r_norm_exp.concatenate( run_train_test(icr_norm_exp, eps, train, test, calibrate)) r_norm_rf.concatenate( run_train_test(icr_norm_rf, eps, train, test, calibrate)) print(r.median_range(), r.interdecile_mean(), 1 - r.accuracy()) print(r_knn.median_range(), r_knn.interdecile_mean(), 1 - r_knn.accuracy()) print(r_norm.median_range(), r_norm.interdecile_mean(), 1 - r_norm.accuracy()) print(r_norm_exp.median_range(), r_norm_exp.interdecile_mean(), 1 - r_norm_exp.accuracy()) print(r_norm_rf.median_range(), r_norm_rf.interdecile_mean(), 1 - r_norm_rf.accuracy()) self.assertGreater(r.accuracy(), 1 - eps - 0.03) self.assertGreater(r_knn.accuracy(), 1 - eps - 0.03) self.assertGreater(r_norm.accuracy(), 1 - eps - 0.03) self.assertGreater(r_norm_exp.accuracy(), 1 - eps - 0.03) self.assertGreater(r_norm_rf.accuracy(), 1 - eps - 0.03) """
def test_run(self): tab = Table('iris') cp = CrossClassifier(InverseProbability(LogisticRegressionLearner()), 5) r = run(cp, 0.1, RandomSampler(tab, 4, 1), rep=3) self.assertEqual(len(r.preds), 3 * 1 / 5 * len(tab)) tab = Table('housing') cr = InductiveRegressor(AbsError(LinearRegressionLearner())) r = run(cr, 0.1, CrossSampler(tab, 4), rep=3) self.assertEqual(len(r.preds), 3 * len(tab))
def test_SVM(self): iris = Table('iris') tab = Table(iris.X[50:], iris.Y[50:] - 1) # versicolor, virginica # clear cases train, test = get_instance(tab, 30) train, calibrate = next(RandomSampler(train, 2, 1)) icp = InductiveClassifier(SVMDistance(skl_svm.SVC()), train, calibrate) pred = icp(test.x, 0.1) self.assertEqual(pred, ['v1']) train, test = get_instance(tab, 85) train, calibrate = next(RandomSampler(train, 2, 1)) icp = InductiveClassifier(SVMDistance(skl_svm.SVC()), train, calibrate) pred = icp(test.x, 0.1) self.assertEqual(pred, ['v2']) # border case train, test = get_instance(tab, 27) train, calibrate = next(RandomSampler(train, 2, 1)) icp = InductiveClassifier(SVMDistance(skl_svm.SVC()), train, calibrate) pred = icp(test.x, 0.2) self.assertEqual(pred, []) pred = icp(test.x, 0.01) self.assertEqual(pred, ['v1', 'v2'])
def test_avg_error_knn(self): ncm = AvgErrorKNN(Euclidean) self.assertEqual(ncm.avg_abs_inv(6 / 5, [1, 2, 3, 4, 5]), (3, 3)) for odd in [0, 1]: ys = np.random.uniform(0, 1, 10 + odd) nc = 0.4 lo, hi = ncm.avg_abs_inv(nc, ys) self.assertGreater(ncm.avg_abs(lo - 0.001, ys), nc) self.assertLess(ncm.avg_abs(lo + 0.001, ys), nc) self.assertLess(ncm.avg_abs(hi - 0.001, ys), nc) self.assertGreater(ncm.avg_abs(hi + 0.001, ys), nc) icr = InductiveRegressor(AvgErrorKNN(Euclidean, 10), self.train, self.calibrate) lo, hi = icr(self.test.x, 0.1) self.assertLess(hi - lo, 30.0) r = run(InductiveRegressor(AvgErrorKNN(Euclidean, 10)), 0.1, RandomSampler(Table("housing"), 2, 1), rep=10) self.assertFalse(any([np.isnan(w) for w in r.widths()]))
import os from Orange.classification import LogisticRegressionLearner from Orange.data import Table from cp.classification import TransductiveClassifier, CrossClassifier from cp.evaluation import RandomSampler, LOOSampler, ResultsClass, run_train_test from cp.nonconformity import InverseProbability tab = Table(os.path.join(os.path.dirname(__file__), './dataSets/MitoToxStdzdRDkitDescForModelling.tab')) trains, tests = [], [] lo, hi = 10, 40 for rep in range(30): train, test = next(RandomSampler(tab, 100, len(tab)-100)) trains.append(train) tests.append(test) for a, b in LOOSampler(train[:lo]): assert(len(set(inst.get_class() for inst in a)) > 1) for n in range(lo, hi, 2): print(n) tcp = TransductiveClassifier(InverseProbability(LogisticRegressionLearner())) ccp = CrossClassifier(InverseProbability(LogisticRegressionLearner()), n) tr, cr = ResultsClass(), ResultsClass() for train, test in zip(trains, tests): tr.concatenate(run_train_test(tcp, 0.1, train[:n], test)) cr.concatenate(run_train_test(ccp, 0.1, train[:n], test)) print(tr.accuracy(), tr.multiple_criterion(), tr.time()) print(cr.accuracy(), cr.multiple_criterion(), cr.time())
def test_random(self): a, b = 3, 2 s = RandomSampler(self.data, a, b) train, test = next(s) self.assertTrue(isinstance(train[0], RowInstance)) self.assertAlmostEqual(len(train) / len(test), a / b)
def test_knn_speed(self): tab = Table( os.path.join(os.path.dirname(__file__), '../datasets-class/spambase.tab')) train, calibrate = next(RandomSampler(tab, 2, 1)) icp = InductiveClassifier(KNNDistance(Euclidean), train, calibrate)