def test_multiclass_auc_multi_learners(self): learners = [LogisticRegressionLearner(), MajorityLearner()] res = CrossValidation(self.iris, learners, k=10) self.assertGreater(AUC(res)[0], 0.6) self.assertLess(AUC(res)[1], 0.6) self.assertGreater(AUC(res)[1], 0.4)
def test_multiclass_auc_multi_learners(self): data = Orange.data.Table('iris') learners = [ Orange.classification.LogisticRegressionLearner(), Orange.classification.MajorityLearner() ] res = Orange.evaluation.testing.CrossValidation(data, learners, k=10) self.assertTrue(AUC(res)[0] > 0.6 > AUC(res)[1] > 0.4)
def test_multiclass_auc_multi_learners(self): learners = [ Orange.classification.LogisticRegressionLearner(), Orange.classification.MajorityLearner() ] res = Orange.evaluation.testing.CrossValidation(self.iris, learners, k=10) self.assertGreater(AUC(res)[0], 0.6) self.assertLess(AUC(res)[1], 0.6) self.assertGreater(AUC(res)[1], 0.4)
def test_auc_on_multiclass_data_returns_1d_array(self): titanic = Table("titanic")[:100] lenses = Table("lenses")[:100] majority = MajorityLearner() results = TestOnTrainingData(lenses, [majority]) auc = AUC(results) self.assertEqual(auc.ndim, 1) results = TestOnTrainingData(titanic, [majority]) auc = AUC(results) self.assertEqual(auc.ndim, 1)
def test_auc_on_multiclass_data_returns_1d_array(self): titanic = Table('titanic')[:100] lenses = Table(test_filename('datasets/lenses.tab'))[:100] majority = MajorityLearner() results = TestOnTrainingData(lenses, [majority]) auc = AUC(results) self.assertEqual(auc.ndim, 1) results = TestOnTrainingData(titanic, [majority]) auc = AUC(results) self.assertEqual(auc.ndim, 1)
def compute_auc(self, actual, predicted): predicted = np.array(predicted).reshape(1, -1) results = Results(nmethods=1, domain=Domain([], [DiscreteVariable(values='01')]), actual=actual, predicted=predicted) return AUC(results)[0]
def compute_auc(self, actual, predicted): predicted = np.array(predicted).reshape(1, -1) probabilities = np.zeros((1, predicted.shape[1], 2)) probabilities[0, :, 1] = predicted[0] probabilities[0, :, 0] = 1 - predicted[0] results = Results( nmethods=1, domain=Domain([], [DiscreteVariable(values='01')]), actual=actual, predicted=predicted) results.probabilities = probabilities return AUC(results)[0]
def test_orange_models(self): data = self.heart n_repeats = self.n_repeats model = NaiveBayesLearner()(data) res = permutation_feature_importance(model, data, CA(), n_repeats) shape = len(data.domain.attributes), n_repeats self.assertEqual(res[0].shape, shape) self.assertEqual(res[1], [a.name for a in data.domain.attributes]) data = self.iris model = TreeLearner()(data) res = permutation_feature_importance(model, data, AUC(), n_repeats) shape = len(data.domain.attributes), n_repeats self.assertEqual(res[0].shape, shape) self.assertEqual(res[1], [a.name for a in data.domain.attributes]) data = self.housing model = TreeRegressionLearner()(data) res = permutation_feature_importance(model, data, MSE(), n_repeats) shape = len(data.domain.attributes), n_repeats self.assertEqual(res[0].shape, (shape)) self.assertEqual(res[1], [a.name for a in data.domain.attributes])
def test_tree(self): tree = SklTreeLearner() res = CrossValidation(self.iris, [tree], k=2) self.assertGreater(AUC(res)[0], 0.8) self.assertLess(AUC(res)[0], 1.)
def test_constant_prob(self): data = Orange.data.Table('iris') maj = Orange.classification.MajorityLearner() res = Orange.evaluation.TestOnTrainingData(data, [maj]) self.assertEqual(AUC(res)[0], 0.5)
def test_tree(self): data = Orange.data.Table('iris') tree = Orange.classification.TreeLearner() res = Orange.evaluation.CrossValidation(data, [tree], k=2) self.assertTrue(0.8 < AUC(res)[0] < 1.)
def test_SGDClassification(self): sgd = SGDClassificationLearner() cv = CrossValidation(k=3) res = cv(self.iris, [sgd]) self.assertGreater(AUC(res)[0], 0.8)
def test_auc_missing_values(self): data = self.heart model = RandomForestLearner(random_state=0)(data) res = permutation_feature_importance(model, data, AUC(), self.n_repeats) self.assertAlmostEqual(res[0].mean(), 0.013, 3)
def test_constant_prob(self): maj = MajorityLearner() res = TestOnTrainingData(self.iris, [maj]) self.assertEqual(AUC(res)[0], 0.5)
def test_auc_orange_model(self): data = self.titanic model = NaiveBayesLearner()(data) res = permutation_feature_importance(model, data, AUC(), self.n_repeats) self.assertAlmostEqual(res[0].mean(), 0.044, 3)
def test_tree(self): tree = Orange.classification.TreeLearner() res = Orange.evaluation.CrossValidation(self.iris, [tree], k=2) self.assertGreater(AUC(res)[0], 0.8) self.assertLess(AUC(res)[0], 1.)
print(classifier) fmodel = open("model.txt".format(cycle), "wt") fmodel.write(str(classifier)) # test model + other methods testdata = Orange.data.Table('testdata') bayes = Orange.classification.NaiveBayesLearner() logistic = Orange.classification.LogisticRegressionLearner() random_forest = Orange.classification.RandomForestLearner() svm = Orange.classification.SVMLearner() tree = Orange.classification.TreeLearner() cn2 = Orange.classification.rules.CN2UnorderedLearner() learners = [learner, logistic, tree, bayes, cn2, random_forest, svm] res = TestOnTestData(data, testdata, learners) ca = CA(res) auc = AUC(res) ll = LogLoss(res) names = [ 'logrules', 'logistic', 'tree', 'naive-bayes', 'cn2', 'random-forest', 'svm' ] scores = "" scores += "CA\tAUC\tLogLoss\tMethod\n" for ni, n in enumerate(names): scores += "{}\t{}\t{}\t{}\n".format(ca[ni], auc[ni], ll[ni], n) print(scores) fscores = open("scores.txt", "wt") fscores.write(scores) # find critical examples
from Orange.classification import LogisticRegressionLearner, NaiveBayesLearner, \ RandomForestLearner import orangecontrib.evcrules.rules as rules from orangecontrib.evcrules.logistic import LRRulesLearner datasets = [ 'ionosphere', 'adult_sample', 'iris', 'breast-cancer', 'bupa', 'titanic' ] for d in datasets: data = Table(d) rule_learner = rules.RulesStar(evc=True, add_sub_rules=True, parent_alpha=0.5) rule_learner_m = rules.RulesStar(evc=False, m=22, add_sub_rules=True, parent_alpha=0.5) # compare lr with rules, lr without rules and sklearn's lr learners = [ LRRulesLearner(opt_penalty=True, rule_learner=rule_learner), LRRulesLearner(opt_penalty=True, rule_learner=rule_learner_m), LRRulesLearner(opt_penalty=True), LogisticRegressionLearner(C=1), NaiveBayesLearner(), RandomForestLearner() ] res = CrossValidation(data, learners, k=5) print("Dataset: ", d) for l, ca, auc, ll in zip(learners, CA(res), AUC(res), LogLoss(res)): print("learner: {}\nCA: {}\nAUC: {}\n LL: {}".format(l, ca, auc, ll))