def test_error_model(self): for loo in [False, True]: icr = InductiveRegressor( ErrorModelNC(LinearRegressionLearner(), LinearRegressionLearner(), loo=loo), self.train, self.calibrate) lo, hi = icr(self.test.x, 0.1) self.assertLess(hi - lo, 30.0) icr = InductiveRegressor(AbsError(RandomForestRegressionLearner())) r = run(icr, 0.1, CrossSampler(Table('housing'), 20)) self.assertGreater(r.accuracy(), 0.85) print(r.accuracy(), r.median_range(), r.interdecile_mean()) icr = InductiveRegressor( ErrorModelNC(RandomForestRegressionLearner(), LinearRegressionLearner())) r = run(icr, 0.1, CrossSampler(Table('housing'), 20)) self.assertGreater(r.accuracy(), 0.85) print(r.accuracy(), r.median_range(), r.interdecile_mean()) icr = InductiveRegressor( ErrorModelNC(RandomForestRegressionLearner(), LinearRegressionLearner(), loo=True)) r = run(icr, 0.1, CrossSampler(Table('housing'), 20)) self.assertGreater(r.accuracy(), 0.85) print(r.accuracy(), r.median_range(), r.interdecile_mean())
def test_results_regr(self): tab = Table('housing') cr = CrossRegressor(AbsErrorKNN(Euclidean, 10, average=True), 5) r1 = run(cr, 0.1, RandomSampler(tab, 2, 1)) r5 = run(cr, 0.5, RandomSampler(tab, 2, 1)) self.assertGreater(r1.median_range(), r5.median_range()) self.assertGreater(r1.mean_range(), r5.mean_range()) self.assertGreater(r1.interdecile_range(), r5.interdecile_range()) self.assertGreater(r1.interdecile_mean(), r5.interdecile_mean()) self.assertGreater(r1.std_dev(), r5.std_dev())
def test_run(self): tab = Table('iris') cp = CrossClassifier(InverseProbability(LogisticRegressionLearner()), 5) r = run(cp, 0.1, RandomSampler(tab, 4, 1), rep=3) self.assertEqual(len(r.preds), 3 * 1 / 5 * len(tab)) tab = Table('housing') cr = InductiveRegressor(AbsError(LinearRegressionLearner())) r = run(cr, 0.1, CrossSampler(tab, 4), rep=3) self.assertEqual(len(r.preds), 3 * len(tab))
def test_results_class(self): tab = Table('iris') cp = CrossClassifier(InverseProbability(LogisticRegressionLearner()), 5) empty = run(cp, 0.5, RandomSampler(tab, 2, 1)).empty_criterion() self.assertGreater(empty, 0.0) single = run(cp, 0.1, RandomSampler(tab, 2, 1)).singleton_criterion() self.assertGreater(single, 0.8) multiple = run(cp, 0.01, RandomSampler(tab, 2, 1)).multiple_criterion() self.assertGreater(multiple, 0.1) results = run(cp, 0.1, RandomSampler(tab, 2, 1)) self.assertGreater(results.singleton_correct(), 0.8) self.assertGreater(results.confidence(), 0.9) self.assertGreater(results.credibility(), 0.4)
def test_cross(self): cpm = CrossClassifier(InverseProbability(LogisticRegressionLearner()), 4, mondrian=True) rm = run(cpm, 0.1, LOOSampler(Table('iris'))) self.assertGreater(rm.accuracy(), 0.85) self.assertGreater(rm.singleton_criterion(), 0.85)
def evaluate_nc_dataset_eps(nc_str, dataset, eps, id): nc = eval(nc_str) tab = Table(dataset) res = None for rep in range(100): if isinstance(nc, ClassNC): r = run(InductiveClassifier(nc), eps, RandomSampler(tab, 2, 1)) else: r = run(InductiveRegressor(nc), eps, RandomSampler(tab, 2, 1)) if res is None: res = r else: res.concatenate(r) print(rep + 1, nc_str, dataset, eps) with open('results/qsar/%d.p' % id, 'wb') as f: pickle.dump((res, rep + 1), f)
def test_accuracy(self): tab = Table('iris') cp = CrossClassifier(InverseProbability(LogisticRegressionLearner()), 5) eps = 0.1 r = run(cp, eps, LOOSampler(tab)) acc = r.accuracy() self.assertAlmostEqual(acc, 1 - eps, delta=0.03)
def evaluate_ncs(tab, cp, nc_strs, id): res = {} for nc_str in nc_strs: nc = eval(nc_str) r = run(cp(nc), 0.1, CrossSampler(tab, 5), rep=5) res[nc_str] = r print(id, nc_str) with open('results/%s.p' % id, 'wb') as f: pickle.dump(res, f)
def evaluate_datasets(datasets, cp, nc_str, id): res = {} for dataset in datasets: dataset_id = dataset.split('/')[-1].split('.')[0] imp = Orange.preprocess.Impute() rc = Orange.preprocess.preprocess.RemoveConstant() tab = rc(imp(Table(dataset))) nc = eval(nc_str) r = run(cp(nc), 0.1, CrossSampler(tab, 5), rep=5) res[dataset_id] = r print(nc_str, dataset_id) print(nc_str.upper()) with open('results/nc/%d.p' % id, 'wb') as f: pickle.dump(res, f)
def test_accuracy(self): tab = Table('iris')[:120] results = [] for m in [False, True]: cp = CrossClassifier(InverseProbability( LogisticRegressionLearner()), 2, mondrian=m) r = run(cp, 0.2, LOOSampler(tab), rep=10) res = [r.accuracy(y) for y in range(3)] results.append(res) print(r.accuracy(), res) span = max(results[0]) - min(results[0]) span_mondrian = max(results[1]) - min(results[1]) self.assertLess(span_mondrian, span)
def test_abs_error_rf(self): icr = InductiveRegressor( AbsErrorRF(RandomForestRegressionLearner(), RandomForestRegressor()), self.train, self.calibrate) lo, hi = icr(self.test.x, 0.1) self.assertLess(hi - lo, 30.0) icr = InductiveRegressor( AbsErrorRF(LinearRegressionLearner(), RandomForestRegressor()), self.train, self.calibrate) lo, hi = icr(self.test.x, 0.1) self.assertLess(hi - lo, 30.0) icr = InductiveRegressor( AbsErrorRF(RandomForestRegressionLearner(), RandomForestRegressor())) r = run(icr, 0.1, CrossSampler(Table('housing'), 10)) self.assertGreater(r.accuracy(), 0.85) print(r.median_range(), r.interdecile_mean())
def test_LOOClassNC(self): for incl in [False, True]: for rel in [False, True]: for neigh in ['fixed', 'variable']: nc = LOOClassNC(NaiveBayesLearner(), Euclidean, 20, relative=rel, include=incl, neighbourhood=neigh) icp = InductiveClassifier(nc, self.train, self.calibrate) pred = icp(self.test.x, 0.1) print(pred) self.assertEqual(pred, ['Iris-setosa']) icp = InductiveClassifier( LOOClassNC(NaiveBayesLearner(), Euclidean, 20)) r = run(icp, 0.1, CrossSampler(Table('iris'), 4)) self.assertGreater(r.accuracy(), 0.85) self.assertGreater(r.singleton_criterion(), 0.8)
def test_LOORegrNC(self): for incl in [False, True]: for rel in [False, True]: for neigh in ['fixed', 'variable']: nc = LOORegrNC(LinearRegressionLearner, Euclidean, 150, relative=rel, include=incl, neighbourhood=neigh) icr = InductiveRegressor(nc, self.train, self.calibrate) lo, hi = icr(self.test.x, 0.1) print(lo, hi) self.assertLess(hi - lo, 20.0) tab = Table('housing') icr = InductiveRegressor( LOORegrNC(LinearRegressionLearner, Euclidean, 150)) r = run(icr, 0.1, CrossSampler(tab, 4)) self.assertGreater(r.accuracy(), 0.85) self.assertLess(r.mean_range(), 15.0)
def test_avg_error_knn(self): ncm = AvgErrorKNN(Euclidean) self.assertEqual(ncm.avg_abs_inv(6 / 5, [1, 2, 3, 4, 5]), (3, 3)) for odd in [0, 1]: ys = np.random.uniform(0, 1, 10 + odd) nc = 0.4 lo, hi = ncm.avg_abs_inv(nc, ys) self.assertGreater(ncm.avg_abs(lo - 0.001, ys), nc) self.assertLess(ncm.avg_abs(lo + 0.001, ys), nc) self.assertLess(ncm.avg_abs(hi - 0.001, ys), nc) self.assertGreater(ncm.avg_abs(hi + 0.001, ys), nc) icr = InductiveRegressor(AvgErrorKNN(Euclidean, 10), self.train, self.calibrate) lo, hi = icr(self.test.x, 0.1) self.assertLess(hi - lo, 30.0) r = run(InductiveRegressor(AvgErrorKNN(Euclidean, 10)), 0.1, RandomSampler(Table("housing"), 2, 1), rep=10) self.assertFalse(any([np.isnan(w) for w in r.widths()]))
def test_time(self): tab = Table('iris') cp = CrossClassifier(InverseProbability(LogisticRegressionLearner()), 5) r = run(cp, 0.1, LOOSampler(tab)) r.time()
def test_experimental(self): icr = InductiveRegressor( ExperimentalNC(RandomForestRegressor(n_estimators=20)), self.train, self.calibrate) r = run(icr, 0.1, CrossSampler(Table('housing'), 10)) print(r.accuracy(), r.median_range())