Example #1
0
def computeDists(data, weight=0, targetClass=0, N=100, learner=None):
    """ Compute distributions of likelihood ratio statistics of extreme (best) rules.  """
    if not learner:
        learner = createLearner()

    #########################
    ## Learner preparation ##
    #########################
    oldStopper = learner.ruleFinder.ruleStoppingValidator
    evaluator = learner.ruleFinder.evaluator
    learner.ruleFinder.evaluator = orange.RuleEvaluator_LRS()
    learner.ruleFinder.evaluator.storeRules = True
    learner.ruleFinder.ruleStoppingValidator = orange.RuleValidator_LRS(
        alpha=1.0)
    learner.ruleFinder.ruleStoppingValidator.max_rule_complexity = 0

    # loop through N (sampling repetitions)
    maxVals = []
    for d_i in range(N):
        # create data set (remove and randomize)
        tempData = createRandomDataSet(data)
        learner.ruleFinder.evaluator.rules = orange.RuleList()
        # Next, learn a rule
        bestRule = learner.ruleFinder(tempData, weight, targetClass,
                                      orange.RuleList())
        maxVals.append(bestRule.quality)
    extremeDists = [compParameters(maxVals, 1.0, 1.0)]

    #####################
    ## Restore learner ##
    #####################
    learner.ruleFinder.evaluator = evaluator
    learner.ruleFinder.ruleStoppingValidator = oldStopper
    return extremeDists
Example #2
0
def add_sub_rules(rules, examples, weight, learner, dists):
    apriori = orange.Distribution(examples.domain.classVar, examples, weight)
    newRules = orange.RuleList()
    for r in rules:
        newRules.append(r)

    # loop through rules
    for r in rules:
        tmpList = orange.RuleList()
        tmpRle = r.clone()
        tmpRle.filter.conditions = []
        tmpRle.parentRule = None
        tmpRle.filterAndStore(examples, weight, r.classifier.defaultVal)
        tmpList.append(tmpRle)
        while tmpList and len(tmpList[0].filter.conditions) <= len(
                r.filter.conditions):
            tmpList2 = orange.RuleList()
            for tmpRule in tmpList:
                # evaluate tmpRule
                oldREP = learner.ruleFinder.evaluator.returnExpectedProb
                learner.ruleFinder.evaluator.returnExpectedProb = False
                learner.ruleFinder.evaluator.evDistGetter.dists = createEVDistList(
                    dists[int(r.classifier.defaultVal)])
                tmpRule.quality = learner.ruleFinder.evaluator(
                    tmpRule, examples, weight, r.classifier.defaultVal,
                    apriori)
                learner.ruleFinder.evaluator.returnExpectedProb = oldREP
                # if rule not in rules already, add it to the list
                if not True in [rules_equal(ri, tmpRule)
                                for ri in newRules] and len(
                                    tmpRule.filter.conditions
                                ) > 0 and tmpRule.quality > apriori[
                                    r.classifier.defaultVal] / apriori.abs:
                    newRules.append(tmpRule)
                # create new tmpRules, set parent Rule, append them to tmpList2
                if not True in [rules_equal(ri, tmpRule) for ri in newRules]:
                    for c in r.filter.conditions:
                        tmpRule2 = tmpRule.clone()
                        tmpRule2.parentRule = tmpRule
                        tmpRule2.filter.conditions.append(c)
                        tmpRule2.filterAndStore(examples, weight,
                                                r.classifier.defaultVal)
                        if tmpRule2.classDistribution.abs < tmpRule.classDistribution.abs:
                            tmpList2.append(tmpRule2)
            tmpList = tmpList2
    for cl in examples.domain.classVar:
        tmpRle = orange.Rule()
        tmpRle.filter = orange.Filter_values(domain=examples.domain)
        tmpRle.parentRule = None
        tmpRle.filterAndStore(examples, weight, int(cl))
        tmpRle.quality = tmpRle.classDistribution[int(
            cl)] / tmpRle.classDistribution.abs
        newRules.append(tmpRle)
    return newRules
Example #3
0
 def sortRules(self, rules):
     newRules = orange.RuleList()
     foundRule = True
     while foundRule:
         foundRule = False
         bestRule = None
         for r in rules:
             if r in newRules:
                 continue
             if r.beta < 0.01 and r.beta > -0.01:
                 continue
             if not bestRule:
                 bestRule = r
                 foundRule = True
                 continue
             if len(r.filter.conditions) < len(bestRule.filter.conditions):
                 bestRule = r
                 foundRule = True
                 continue
             if len(r.filter.conditions) == len(
                     bestRule.filter.conditions) and r.beta > bestRule.beta:
                 bestRule = r
                 foundRule = True
                 continue
         if bestRule:
             newRules.append(bestRule)
     return newRules
Example #4
0
    def __call__(self, example, result_type=orange.GetValue, retRules=False):
        def add(disc1, disc2, sumd):
            disc = orange.DiscDistribution(disc1)
            sumdisc = sumd
            for i, d in enumerate(disc):
                disc[i] += disc2[i]
                sumdisc += disc2[i]
            return disc, sumdisc

        # create empty distribution
        retDist = orange.DiscDistribution(self.examples.domain.classVar)
        covRules = orange.RuleList()
        # iterate through examples - add distributions
        sumdisc = 0.
        for r in self.rules:
            if r(example) and r.classDistribution:
                retDist, sumdisc = add(retDist, r.classDistribution, sumdisc)
                covRules.append(r)
        if not sumdisc:
            retDist = self.prior
            sumdisc = self.prior.abs
        for c in self.examples.domain.classVar:
            retDist[c] /= sumdisc
        if retRules:
            if result_type == orange.GetValue:
                return (retDist.modus(), covRules)
            if result_type == orange.GetProbabilities:
                return (retDist, covRules)
            return (retDist.modus(), retDist, covRules)
        if result_type == orange.GetValue:
            return retDist.modus()
        if result_type == orange.GetProbabilities:
            return retDist
        return (retDist.modus(), retDist)
Example #5
0
 def __init__(self,
              alpha=.05,
              min_coverage=0,
              max_rule_length=0,
              rules=orange.RuleList()):
     self.rules = rules
     self.validator = orange.RuleValidator_LRS(
         alpha=alpha,
         min_coverage=min_coverage,
         max_rule_length=max_rule_length)
Example #6
0
 def getBestRules(self, currentRules, examples, weightID):
     bestRules = orange.RuleList()
     for r in currentRules:
         if hasattr(r.learner, "argumentRule") and not orngCN2.rule_in_set(
                 r, bestRules):
             bestRules.append(r)
     for r_i, r in enumerate(self.bestRule):
         if r and not rule_in_set(r, bestRules) and examples[r_i].getclass(
         ) == r.classifier.defaultValue:
             bestRules.append(r)
     return bestRules
Example #7
0
    def __call__(self, examples, weight=0):
        supervisedClassCheck(examples)

        rules = orange.RuleList()
        self.ruleStopping.apriori = orange.Distribution(
            examples.domain.classVar, examples)
        progress = getattr(self, "progressCallback", None)
        if progress:
            progress.start = 0.0
            progress.end = 0.0
            distrib = orange.Distribution(examples.domain.classVar, examples,
                                          weight)
            distrib.normalize()
        for targetClass in examples.domain.classVar:
            if progress:
                progress.start = progress.end
                progress.end += distrib[targetClass]
            self.targetClass = targetClass
            cl = orange.RuleLearner.__call__(self, examples, weight)
            for r in cl.rules:
                rules.append(r)
        if progress:
            progress(1.0, None)
        return CN2UnorderedClassifier(rules, examples, weight)
Example #8
0
 def learnRule(self, examples, weightID, targetClass):
     self.ruleFinder.evaluator.bestRule = None
     rule = self.ruleFinder(examples, weightID, targetClass,
                            orange.RuleList())
     return self.ruleFinder.evaluator.bestRule
Example #9
0
    def __call__(self, examples, weight=0):
        supervisedClassCheck(examples)
        apriori = orange.Distribution(examples.domain.classVar, examples,
                                      weight)
        ruleSet = orange.RuleList()  # resulting set of rules

        # Progress bar in widgets
        progress = getattr(self, "progressCallback", None)
        if progress:
            self.progressCallback = progress
            progress.start = 0.0
            progress.end = 0.0
            distrib = orange.Distribution(examples.domain.classVar, examples,
                                          weightID)
            distrib.normalize()

        # Main Loop
        temp_dists = []
        for cl_i, cl in enumerate(examples.domain.classVar):
            # rulesForClass ... rules for this class only
            rulesForClass = orange.RuleList()
            if progress:
                progress.start = progress.end
                progress.end += distrib[cl]

            # Compute EVD distribution if not set
            if getattr(self, "dists", None):
                self.ruleFinder.evaluator.evDistGetter.dists = createEVDistList(
                    self.dists[cl_i])
                temp_dists.append(self.dists[cl_i])
            else:
                ds = computeDists(examples,
                                  weight=weight,
                                  targetClass=cl_i,
                                  N=self.N,
                                  learner=self)
                self.ruleFinder.evaluator.evDistGetter.dists = createEVDistList(
                    ds)
                temp_dists.append(ds)
            examples = self.coverAndRemove.initialize(examples, weight, cl,
                                                      apriori)
            self.ruleFinder.evaluator.probVar = examples.domain.getmeta(
                self.coverAndRemove.probAttribute)
            self.targetClass = cl
            # Learn rules
            while not self.dataStopping(examples, weight, cl):
                # Learn rule
                rule = self.learnRule(examples, weight, cl)
                if not rule or len(
                        rule.filter.conditions
                ) == 0:  # stop learning if no rule has been learned
                    break
                (examples,
                 weight) = self.coverAndRemove(rule, examples, weight, cl)
                # add rule to rule set
                if not rule_in_set(rule, rulesForClass):
                    rulesForClass.append(rule)
                if progress:
                    progress(
                        self.coverAndRemove.remainingExamplesP(examples, cl),
                        None)
                else:
                    print "%4.2f," % self.coverAndRemove.remainingExamplesP(
                        examples, cl),
            if not progress:
                print
            ruleSet.extend(
                self.coverAndRemove.getBestRules(rulesForClass, examples,
                                                 weight))
            if progress:
                progress(1.0, None)
            self.ruleFinder.evaluator.probVar = None
        if self.add_sub_rules:
            ruleSet = add_sub_rules(ruleSet, examples, weight, self,
                                    temp_dists)
        return self.LCR(ruleSet, examples, weight)