Ejemplo n.º 1
0
 def tune_penalty(self, data):
     learner = LRRulesLearner(fit_intercept=self.fit_intercept,
                              intercept_scaling=self.intercept_scaling)
     penalties = [0.001, 0.01, 0.1, 0.5, 1.0, 2.0, 5.0, 10., 100.]
     scores = []
     for pen in penalties:
         learner.penalty = pen
         res = CrossValidation(data, [learner], k=5, random_state=1111)
         ll = LogLoss(res)
         scores.append(ll)
     return penalties[scores.index(min(scores))]
Ejemplo n.º 2
0
    def test_log_loss_calc(self):
        data = Table('titanic')
        learner = LogisticRegressionLearner()
        results = TestOnTrainingData(data, [learner])

        actual = np.copy(results.actual)
        actual = actual.reshape(actual.shape[0], 1)
        actual = np.hstack((1 - actual, actual))
        probab = results.probabilities[0]

        ll_calc = self._log_loss(actual, probab)
        ll_orange = LogLoss(results)
        self.assertAlmostEqual(ll_calc, ll_orange[0])
Ejemplo n.º 3
0
 def test_log_loss(self):
     data = Table('iris')
     majority = MajorityLearner()
     results = TestOnTrainingData(data, [majority])
     ll = LogLoss(results)
     self.assertAlmostEqual(ll[0], - np.log(1 / 3))
Ejemplo n.º 4
0
fmodel = open("model.txt".format(cycle), "wt")
fmodel.write(str(classifier))

# test model + other methods
testdata = Orange.data.Table('testdata')
bayes = Orange.classification.NaiveBayesLearner()
logistic = Orange.classification.LogisticRegressionLearner()
random_forest = Orange.classification.RandomForestLearner()
svm = Orange.classification.SVMLearner()
tree = Orange.classification.TreeLearner()
cn2 = Orange.classification.rules.CN2UnorderedLearner()
learners = [learner, logistic, tree, bayes, cn2, random_forest, svm]
res = TestOnTestData(data, testdata, learners)
ca = CA(res)
auc = AUC(res)
ll = LogLoss(res)

names = [
    'logrules', 'logistic', 'tree', 'naive-bayes', 'cn2', 'random-forest',
    'svm'
]
scores = ""
scores += "CA\tAUC\tLogLoss\tMethod\n"
for ni, n in enumerate(names):
    scores += "{}\t{}\t{}\t{}\n".format(ca[ni], auc[ni], ll[ni], n)
print(scores)
fscores = open("scores.txt", "wt")
fscores.write(scores)

# find critical examples
indices, criticality, rules = arg.find_critical(learner, data)
Ejemplo n.º 5
0
from Orange.classification import LogisticRegressionLearner, NaiveBayesLearner, \
                                  RandomForestLearner
import orangecontrib.evcrules.rules as rules
from orangecontrib.evcrules.logistic import LRRulesLearner

datasets = [
    'ionosphere', 'adult_sample', 'iris', 'breast-cancer', 'bupa', 'titanic'
]
for d in datasets:
    data = Table(d)
    rule_learner = rules.RulesStar(evc=True,
                                   add_sub_rules=True,
                                   parent_alpha=0.5)
    rule_learner_m = rules.RulesStar(evc=False,
                                     m=22,
                                     add_sub_rules=True,
                                     parent_alpha=0.5)
    # compare lr with rules, lr without rules and sklearn's lr
    learners = [
        LRRulesLearner(opt_penalty=True, rule_learner=rule_learner),
        LRRulesLearner(opt_penalty=True, rule_learner=rule_learner_m),
        LRRulesLearner(opt_penalty=True),
        LogisticRegressionLearner(C=1),
        NaiveBayesLearner(),
        RandomForestLearner()
    ]
    res = CrossValidation(data, learners, k=5)
    print("Dataset: ", d)
    for l, ca, auc, ll in zip(learners, CA(res), AUC(res), LogLoss(res)):
        print("learner: {}\nCA: {}\nAUC: {}\n LL: {}".format(l, ca, auc, ll))