Exemple #1
0
 def test_normalize(self):
     d = orange.ExampleTable("iris")
     dist = orange.Distribution(0, d)
     dist2 = orange.Distribution(dist)
     dist.normalize()
     self.assertTrue(
         all(x == y / 150 for x, y in zip(dist.values(), dist2.values())))
Exemple #2
0
    def test_discrete(self):
        d = orange.ExampleTable("iris")
        dist = orange.Distribution(d.domain.class_var, d)

        cc = orange.RandomClassifier(d.domain.class_var)
        self.assertEqual(cc.probabilities.variable, cc.class_var)

        cc2 = orange.RandomClassifier(None, dist)
        self.assertEqual(cc2.class_var, d.domain.class_var)
        self.assertEqual(cc2.probabilities.variable, cc2.class_var)
        self.assertEqual(id(cc2.probabilities), id(dist))
        self.assertTrue(all(x==50 for x in cc2.probabilities))
        
        for cl in [cc, cc2]:
            for e in d[0:150:20]:
                anss = set()
                for i in range(5):
                    anss.add(cl(e))
                self.assertEqual(len(anss), 1)
            
            anss = set()
            for e in d:
                anss.add(cl(e))
            self.assertEqual(len(anss), 3)

        for e in d[0:150:20]:
            self.assertTrue(all(x==50 for x in cc2(e, orange.Classifier.GetProbabilities)))
        
        self.assertRaises(TypeError, orange.RandomClassifier, dist)
        self.assertRaises(ValueError, orange.RandomClassifier, None, orange.DiscDistribution())
        self.assertRaises(ValueError, orange.RandomClassifier, d.domain[1], orange.Distribution(d.domain[0]))
Exemple #3
0
    def test_pickle(self):
        import pickle
        d = orange.ExampleTable("iris")
        dist = orange.Distribution(d.domain[0], d)
        cc = orange.ConstantClassifier(dist)
        s = pickle.dumps(cc)
        cc2 = pickle.loads(s)
        self.assertEqual(cc.class_var, cc2.class_var)
        self.assertEqual(cc.default_val, cc2.default_val)
        self.assertEqual(cc.default_distribution, cc2.default_distribution)

        cc.default_val = 42        
        s = pickle.dumps(cc)
        cc2 = pickle.loads(s)
        self.assertEqual(cc.default_val, cc2.default_val)

        dist = orange.Distribution(d.domain.class_var, d)
        cc = orange.ConstantClassifier(dist)
        s = pickle.dumps(cc)
        cc2 = pickle.loads(s)
        self.assertEqual(cc.class_var, cc2.class_var)
        self.assertEqual(cc.default_val, cc2.default_val)
        self.assertEqual(cc.default_distribution, cc2.default_distribution)

        cc.default_val = 1       
        s = pickle.dumps(cc)
        cc2 = pickle.loads(s)
        self.assertEqual(cc.default_val, cc2.default_val)
Exemple #4
0
    def test_hash(self):
        d = orange.ExampleTable("zoo")
        disc = orange.Distribution("type", d)

        disc2 = orange.Distribution(d.domain.classVar, d)
        self.assertEqual(hash(disc), hash(disc2))

        disc2[0] += 1
        self.assertNotEqual(hash(disc), hash(disc2))
Exemple #5
0
    def t2est_discrete(self):
        d = orange.ExampleTable("iris")
        dist = orange.Distribution(d.domain.class_var, d)

        cc = orange.ConstantClassifier(d.domain.class_var)
        self.assertEqual(cc.class_var, d.domain.class_var)
        self.assertEqual(cc.default_distribution.variable, cc.class_var)

        cc2 = orange.ConstantClassifier(dist)
        self.assertEqual(cc2.class_var, d.domain.class_var)
        self.assertEqual(cc2.default_distribution.variable, cc2.class_var)
        self.assertEqual(id(cc2.default_distribution), id(dist))
        self.assertTrue(all(x==50 for x in cc2.default_distribution))
        
        cc3 = orange.ConstantClassifier(d.domain.class_var, None, dist)
        self.assertEqual(cc3.class_var, d.domain.class_var)
        self.assertEqual(cc3.default_distribution.variable, cc3.class_var)
        self.assertEqual(id(cc3.default_distribution), id(dist))
        self.assertTrue(all(x==50 for x in cc3.default_distribution))

        cc4 = orange.ConstantClassifier(d.domain.class_var, "Iris-setosa", dist)
        self.assertEqual(cc4.class_var, d.domain.class_var)
        self.assertEqual(cc4.default_distribution.variable, cc3.class_var)
        self.assertEqual(id(cc4.default_distribution), id(dist))
        self.assertTrue(all(x==50 for x in cc4.default_distribution))

        for cl in [cc, cc2, cc3]:
            for e in d[0:150:20]:
                anss = set()
                for i in range(5):
                    anss.add(cl(e))
                self.assertEqual(len(anss), 1)
            
            anss = set()
            for e in d:
                anss.add(cl(e))
            self.assertEqual(len(anss), 3)

        for e in d[0:150:20]:
            anss = set()
            for i in range(5):
                self.assertEqual(cc4(e), "Iris-setosa")

        for cl in [cc2, cc3, cc4]:
            for e in d[0:150:20]:
                self.assertTrue(all(x==50 for x in cl(e, orange.Classifier.GetProbabilities)))
        
        self.assertRaises(TypeError, orange.ConstantClassifier, d.domain.class_var, dist)
        self.assertRaises(ValueError, orange.ConstantClassifier, None, "?", orange.DiscDistribution())
        self.assertRaises(ValueError, orange.ConstantClassifier, d.domain[1], "?", orange.Distribution(d.domain[0]))

        cc4.default_distribution = [50, 50, 50]
        self.assertEqual(list(cc4.default_distribution), [50, 50, 50])

        cc5 = orange.ConstantClassifier(d.domain.class_var, "Iris-setosa", [50, 50, 50])
        self.assertEqual(list(cc5.default_distribution), [50, 50, 50])
Exemple #6
0
def test():
    x = data.domain.attributes[1]
    y = data.domain.attributes[2]
    c = data.domain.classVar
    print "H(%s) = %5.5f" % (x.name, _entropy(p2f(orange.Distribution(x, data))))
    print "H(%s) = %5.5f" % (y.name, _entropy(p2f(orange.Distribution(y, data))))
    print "H(%s,%s)= %5.5f" % (x.name, y.name, joint_entropy(x, y, data))
    print "I(%s;%s)= %5.5f" % (x.name, y.name, mutual_information(x, y, data))
    print "H(%s|%s)= %5.5f" % (x.name, c.name, mutual_information(x, c, data))
    print "InfoGain = %5.5f" % orange.MeasureAttribute_info(x, data)
Exemple #7
0
    def test_construction(self):
        d = orange.ExampleTable("zoo")

        self.assertRaises(TypeError, orange.DiscDistribution, zip(d.domain["type"].values, self.freqs))

        disc = orange.Distribution("type", d)
        disc7 = orange.DiscDistribution(self.freqs)
        self.assertEqual(disc, disc7)

        disc1 = orange.Distribution(d.domain.classVar)
        self.assertTrue(isinstance(disc1, orange.DiscDistribution))
Exemple #8
0
    def test_hash(self):
        d = orange.ExampleTable("iris")
        dist = orange.Distribution("sepal length", d)
        dist2 = orange.Distribution(dist)

        self.assertEqual(hash(dist), hash(dist2))

        dist2[4.4] += 1
        self.assertNotEqual(hash(dist), hash(dist2))

        dist2[4.4] -= 1
        self.assertEqual(hash(dist), hash(dist2))

        dist2[42] = 2011
        self.assertNotEqual(hash(dist), hash(dist2))
Exemple #9
0
    def test_fromExamples(self):
        d = orange.ExampleTable("zoo")
        disc = orange.Distribution("type", d)

        disc2 = orange.Distribution(d.domain.classVar, d)
        self.assertEqual(disc, disc2)

        disc3 = orange.Distribution(len(d.domain.attributes), d)        
        self.assertEqual(disc, disc3)

        disc4 = orange.Distribution(-1, d)
        self.assertEqual(disc, disc4)

        disc5 = orange.get_class_distribution(d)        
        self.assertEqual(disc, disc5)
Exemple #10
0
    def test_classAttr_cont(self):
        d = orange.ExampleTable("iris")
        cd = orange.get_class_distribution(d)
        ad = orange.Distribution(0, d)
        cont = orange.ContingencyClassAttr(0, d)
        fv = cont[0].keys()[0]
        self.assertEqual(cont.inner_distribution, ad)
        self.assertEqual(cont.outer_distribution, cd)
        self.assertEqual(len(cont), len(cd))
        
        s = pickle.dumps(cont)
        cont2 = pickle.loads(s)
        self.assertEqual(cont.innerDistribution, cont2.innerDistribution)
        self.assertEqual(cont.innerVariable, cont2.innerVariable)
        self.assertEqual(cont.outerDistribution, cont2.outerDistribution)
        self.assertEqual(cont.outerVariable, cont2.outerVariable)
        self.assertEqual(cont[0], cont2[0])
        
        cont.normalize()
        self.assertAlmostEqual(sum(cont.p_attr(0).values()), 1.0)
        self.assertEqual(cont.p_attr(0)[fv], cont.p_attr(fv, 0))
        self.assertEqual(cont.p_attr(0)[fv], cont2.p_attr(fv, 0)/cont2.p_attr(0).abs)

        x = cont[0][0]
        cont.add_var_class(0, 0, 0.5)
        self.assertEqual(x+0.5, cont[0][0])
        
        self.assertEqual(cont[fv][0], cont[fv,0])
        
        with self.assertRaises(IndexError):
            cont["?"]
Exemple #11
0
 def test_random(self):
     d = orange.ExampleTable("zoo")
     disc = orange.Distribution("type", d)
     ans = set()
     for i in range(1000):
         ans.add(int(disc.random()))
     self.assertEqual(ans, set(range(len(d.domain.classVar.values))))
Exemple #12
0
    def test_continuous(self):
        d = orange.ExampleTable("iris")
        dom2 = orange.Domain(d.domain.attributes)
        d = orange.ExampleTable(dom2, d)
        self.assertEqual(d.domain.class_var.var_type, orange.Variable.Type.Continuous)
        
        dist = orange.Distribution(d.domain.class_var, d)

        cc = orange.RandomClassifier(d.domain.class_var)
        self.assertEqual(cc.class_var, d.domain.class_var)
        self.assertEqual(cc.probabilities.variable, cc.class_var)
        self.assertRaises(ValueError, cc, d[0])

        cc2 = orange.RandomClassifier(None, dist)
        self.assertEqual(cc2.class_var, d.domain.class_var)
        self.assertEqual(cc2.probabilities.variable, cc2.class_var)
        self.assertEqual(id(cc2.probabilities), id(dist))

        for e in d[0:150:20]:
            anss = set()
            for i in range(5):
                anss.add(cc2(e))
            self.assertEqual(len(anss), 1)

        anss = set()
        for e in d:
            anss.add(cc2(e))
        self.assertGreater(len(anss), 10)
Exemple #13
0
 def filterAndStore(self):
     self.examples = self.filter(self.data)  # set examples
     self.classifier = self.learner(self.examples)  # set classifier
     distribution = [0.0] * len(self.data.domain.classVar.values)
     self.complexity = len(self.filter.conditions)  # set rule complexity
     if len(self.examples) > 0:
         for d in self.examples:
             distribution[int(d.getclass())] += 1
         distribution = map(lambda d: d / len(self.examples), distribution)
         self.classDistribution = orange.Distribution(
             distribution)  # set distribution
         self.TP = self.examples.filter(
             {self.examples.domain.classVar: self.targetClass})
         self.FP = self.examples.filter(
             {self.examples.domain.classVar: self.targetClass}, negate=1)
         # self.TP = filter(lambda e: e.getclass()==self.targetClass, self.examples)   # True positives
         # self.FP = filter(lambda e: e.getclass()!=self.targetClass, self.examples)   # flase positives
         TPlen = len(self.TP) * 1.0
         self.quality = TPlen / (len(
             self.FP) + self.g)  # set rule quality: generalization quocient
         self.support = 1.0 * len(self.examples) / len(
             self.data)  # set rule support
         self.confidence = TPlen / len(self.examples)
     else:
         self.classDistribution = distribution
         self.TP = []
         self.FP = []
         self.quality = 0  # set rule quality: generalization kvocient
         self.support = 0  # set rule support
 def err(condDist, att, value, targetClass, priorError, data):
     sumE = sum(condDist)
     valueE = condDist[targetClass]
     distAtt = orange.Distribution(att, data)
     inf = distAtt[value] * (valueE / sumE) * (1 - valueE / sumE)
     inf = max(inf, aproxZero)
     var = max(1 / inf - priorError * priorError, 0)
     return (math.sqrt(var))
Exemple #15
0
    def test_random(self):
        d = orange.ExampleTable("iris")
        dist = orange.Distribution(0, d)
        self.assertTrue(dist.random() in dist.keys())

        rands = set()
        for i in range(100):
            rands.add(float(dist.random()))
        self.assertTrue(len(rands) > 1)
Exemple #16
0
    def __call__(self, data, weight=0):
        (X, y) = self.createArrayData(data)

        exTable = orange.ExampleTable(data.domain)
        for id, ex in self.anch_examples:
            exTable.extend(orange.ExampleTable(ex, data.domain))
        (X_anch, y_anch) = self.createArrayData(exTable)

        betas = array([0.0] * (len(data.domain.attributes) + 1))

        likelihood, betas = self.estimateBeta(X, y, betas, [0] * (len(betas)),
                                              X_anch, y_anch)

        # get attribute groups atGroup = [(startIndex, number of values), ...)
        ats = data.domain.attributes
        atVec = reduce(
            lambda x, y: x + [(y, not y == x[-1][0])],
            [a.getValueFrom and a.getValueFrom.whichVar or a for a in ats],
            [(ats[0].getValueFrom and ats[0].getValueFrom.whichVar
              or ats[0], 0)])[1:]
        atGroup = [[0, 0]]
        for v_i, v in enumerate(atVec):
            if v[1] == 0: atGroup[-1][1] += 1
            else: atGroup.append([v_i, 1])

        # compute zero values for attributes
        sumB = 0.
        for ag in atGroup:
            X_temp = concatenate((X[:, :ag[0] + 1], X[:, ag[0] + 1 + ag[1]:]),
                                 1)
            if X_anch:
                X_anch_temp = concatenate(
                    (X_anch[:, :ag[0] + 1], X_anch[:, ag[0] + 1 + ag[1]:]), 1)
            else:
                X_anch_temp = X_anch
            ##            print "1", concatenate((betas[:i+1],betas[i+2:]))
            ##            print "2", betas
            likelihood_temp, betas_temp = self.estimateBeta(
                X_temp, y,
                concatenate((betas[:ag[0] + 1], betas[ag[0] + ag[1] + 1:])),
                [0] + [1] * (len(betas) - 1 - ag[1]), X_anch_temp, y_anch)
            print "finBetas", betas, betas_temp
            print "betas", betas[0], betas_temp[0]
            sumB += betas[0] - betas_temp[0]
        apriori = orange.Distribution(data.domain.classVar, data)
        aprioriProb = apriori[0] / apriori.abs

        print "koncni rezultat", sumB, math.log(
            (1 - aprioriProb) / aprioriProb), betas[0]

        beta = []
        beta_se = []
        print "likelihood2", likelihood
        for i in range(len(betas)):
            beta.append(betas[i])
            beta_se.append(0.0)
        return (self.OK, beta, beta_se, 0)
Exemple #17
0
 def __init__(self, rules=None, examples=None, weightID=0, **argkw):
     self.rules = rules
     self.examples = examples
     self.weightID = weightID
     self.classVar = examples.domain.classVar if examples is not None else None
     self.__dict__.update(argkw)
     if examples is not None:
         self.prior = orange.Distribution(examples.domain.classVar,
                                          examples)
Exemple #18
0
def entropy(x, data):
    """entropy of an attribute x from dataset data"""
    if type(x)==orange.EnumVariable:
        return _entropy(p2f(orange.Distribution(x, data)))
    if type(x)==list:
        if len(x)==2: # joint entropy of a pair of attributes
            c = orange.ContingencyAttrAttr(x, y, data)
            return _entropy(p2f(flatten(c)))
        else: # joint entropy of for a set of attributes
            pass
Exemple #19
0
def add_sub_rules(rules, examples, weight, learner, dists):
    apriori = orange.Distribution(examples.domain.classVar, examples, weight)
    newRules = orange.RuleList()
    for r in rules:
        newRules.append(r)

    # loop through rules
    for r in rules:
        tmpList = orange.RuleList()
        tmpRle = r.clone()
        tmpRle.filter.conditions = []
        tmpRle.parentRule = None
        tmpRle.filterAndStore(examples, weight, r.classifier.defaultVal)
        tmpList.append(tmpRle)
        while tmpList and len(tmpList[0].filter.conditions) <= len(
                r.filter.conditions):
            tmpList2 = orange.RuleList()
            for tmpRule in tmpList:
                # evaluate tmpRule
                oldREP = learner.ruleFinder.evaluator.returnExpectedProb
                learner.ruleFinder.evaluator.returnExpectedProb = False
                learner.ruleFinder.evaluator.evDistGetter.dists = createEVDistList(
                    dists[int(r.classifier.defaultVal)])
                tmpRule.quality = learner.ruleFinder.evaluator(
                    tmpRule, examples, weight, r.classifier.defaultVal,
                    apriori)
                learner.ruleFinder.evaluator.returnExpectedProb = oldREP
                # if rule not in rules already, add it to the list
                if not True in [rules_equal(ri, tmpRule)
                                for ri in newRules] and len(
                                    tmpRule.filter.conditions
                                ) > 0 and tmpRule.quality > apriori[
                                    r.classifier.defaultVal] / apriori.abs:
                    newRules.append(tmpRule)
                # create new tmpRules, set parent Rule, append them to tmpList2
                if not True in [rules_equal(ri, tmpRule) for ri in newRules]:
                    for c in r.filter.conditions:
                        tmpRule2 = tmpRule.clone()
                        tmpRule2.parentRule = tmpRule
                        tmpRule2.filter.conditions.append(c)
                        tmpRule2.filterAndStore(examples, weight,
                                                r.classifier.defaultVal)
                        if tmpRule2.classDistribution.abs < tmpRule.classDistribution.abs:
                            tmpList2.append(tmpRule2)
            tmpList = tmpList2
    for cl in examples.domain.classVar:
        tmpRle = orange.Rule()
        tmpRle.filter = orange.Filter_values(domain=examples.domain)
        tmpRle.parentRule = None
        tmpRle.filterAndStore(examples, weight, int(cl))
        tmpRle.quality = tmpRle.classDistribution[int(
            cl)] / tmpRle.classDistribution.abs
        newRules.append(tmpRle)
    return newRules
Exemple #20
0
def data_center(data):
    """Return the central - average - point in the data set"""
    atts = data.domain.attributes
    astats = orange.DomainBasicAttrStat(data)
    center = [astats[a].avg if a.varType == orange.VarTypes.Continuous \
              else max(enumerate(orange.Distribution(a, data)), key=lambda x:x[1])[0] if a.varType == orange.VarTypes.Discrete
              else None
              for a in atts]
    if data.domain.classVar:
        center.append(0)
    return orange.Example(data.domain, center)
Exemple #21
0
    def test_construction(self):
        d = orange.ExampleTable("iris")
        dist = orange.Distribution("sepal length", d)

        g = orange.GaussianDistribution(dist)
        self.assertAlmostEqual(g.average(), dist.average())
        self.assertAlmostEqual(g.modus(), dist.average())
        self.assertAlmostEqual(g.var(), dist.var())
        self.assertAlmostEqual(g.dev(), dist.dev())

        g2 = orange.GaussianDistribution(dist.average(), dist.dev())
        self.assertEqual(g, g2)
Exemple #22
0
    def test_add(self):        
        d = orange.ExampleTable("zoo")
        disc = orange.Distribution("type", d)
        
        disc2 = orange.Distribution(d.domain.classVar)
        for ex in d:
            disc2.add(ex[-1])
        self.assertEqual(disc, disc2)
        
        disc3 = orange.Distribution(d.domain.classVar)
        for ex in d:
            disc3.add(int(ex[-1]), 1.0)
        self.assertEqual(disc, disc3)

        disc4 = orange.Distribution(d.domain.classVar)
        for ex in d:
            disc4.add(float(ex[-1]), 1.0)
        self.assertEqual(disc, disc4)

        disc4.add(0, 1e-8)
        self.assertNotEqual(disc4[0], disc[0])
Exemple #23
0
    def test_stat(self):
        d = orange.ExampleTable("iris")
        dist = orange.Distribution(0, d)
        self.assertAlmostEqual(dist.average(), 5.843333333)
        self.assertAlmostEqual(dist.dev(), 0.8253012, 4)

        self.assertEqual(dist.percentile(50), 5.8)
        self.assertRaises(ValueError, dist.percentile, -5)
        self.assertRaises(ValueError, dist.percentile, 105)

        self.assertEqual(dist.density(5.0), 10)
        self.assertEqual(dist.density(4.95), 8)
Exemple #24
0
    def test_add(self):
        d = orange.ExampleTable("iris")
        dist = orange.Distribution(0, d)

        dist2 = orange.Distribution(d.domain[0])
        for ex in d:
            dist2.add(ex[0])
        self.assertEqual(dist, dist2)

        dist3 = orange.ContDistribution()
        for ex in d:
            dist3.add(ex[0])
        self.assertEqual(dist, dist3)

        dist4 = orange.ContDistribution()
        for ex in d:
            dist4.add(ex[0], 1.0)
        self.assertEqual(dist, dist4)

        dist4.add(0, 1e-8)
        self.assertNotEqual(dist4[0], dist[0])
Exemple #25
0
    def t2est_continuous(self):
        d = orange.ExampleTable("iris")
        dom2 = orange.Domain(d.domain.attributes)
        d = orange.ExampleTable(dom2, d)
        self.assertEqual(d.domain.class_var.var_type, orange.Variable.Continuous)
        
        dist = orange.Distribution(d.domain.class_var, d)

        cc = orange.ConstantClassifier(d.domain.class_var)
        self.assertEqual(cc.class_var, d.domain.class_var)
        self.assertEqual(cc.default_distribution.variable, cc.class_var)

        cc2 = orange.ConstantClassifier(dist)
        self.assertEqual(cc2.class_var, d.domain.class_var)
        self.assertEqual(cc2.default_distribution.variable, cc2.class_var)
        self.assertEqual(id(cc2.default_distribution), id(dist))
        
        cc3 = orange.ConstantClassifier(d.domain.class_var, None, dist)
        self.assertEqual(cc3.class_var, d.domain.class_var)
        self.assertEqual(cc3.default_distribution.variable, cc3.class_var)
        self.assertEqual(id(cc3.default_distribution), id(dist))

        cc4 = orange.ConstantClassifier(d.domain.class_var, 5, dist)
        self.assertEqual(cc4.class_var, d.domain.class_var)
        self.assertEqual(cc4.default_distribution.variable, cc3.class_var)
        self.assertEqual(id(cc4.default_distribution), id(dist))

        for cl in [cc2, cc3]:
            for e in d:
                self.assertEqual(cl(e), dist.average())

        for e in d:
            self.assertEqual(cc4(e), 5)

        self.assertRaises(TypeError, orange.ConstantClassifier, d.domain.class_var, dist)
        self.assertRaises(ValueError, orange.ConstantClassifier, None, "?", orange.DiscDistribution())
        self.assertRaises(ValueError, orange.ConstantClassifier, d.domain[1], "?", orange.Distribution(d.domain[0]))

        cc4.default_distribution = [50, 50, 50]
        self.assertEqual(list(cc4.default_distribution), [50, 50, 50])
Exemple #26
0
    def test_construction(self):
        d = orange.ExampleTable("iris")
        dist = orange.Distribution("sepal length", d)

        import collections
        dictdist = collections.defaultdict(float)
        for e in d:
            dictdist[float(e["sepal length"])] += 1

        self.assertEqual(dist, dictdist)

        dist2 = orange.ContDistribution(dictdist)
        self.assertEqual(dist, dist2)
Exemple #27
0
    def __call__(self, examples, weight=0, fulldata=0):
        if examples.domain.classVar.varType != 1:
            raise "Logistic learner only works with discrete class."
        translate = orng2Array.DomainTranslation(self.translation_mode_d,
                                                 self.translation_mode_c)
        if fulldata != 0:
            translate.analyse(fulldata, weight, warning=0)
        else:
            translate.analyse(examples, weight, warning=0)
        translate.prepareLR()
        mdata = translate.transform(examples)

        # get the attribute importances
        t = examples
        importance = []
        for i in xrange(len(t.domain.attributes)):
            qi = orange.MeasureAttribute_relief(t.domain.attributes[i], t)
            importance.append((qi, i))
        importance.sort()
        freqs = list(orange.Distribution(examples.domain.classVar, examples))
        s = 1.0 / sum(freqs)
        freqs = [x * s for x in freqs]  # normalize

        rl = RobustBLogisticLearner(regularization=self.regularization)
        if len(examples.domain.classVar.values) > 2:
            ## form several experiments:
            # identify the most frequent class value
            tfreqs = [(freqs[i], i) for i in xrange(len(freqs))]
            tfreqs.sort()
            base = tfreqs[-1][1]  # the most frequent class
            classifiers = []
            for i in xrange(len(tfreqs) - 1):
                # edit the translation
                alter = tfreqs[i][1]
                cfreqs = [tfreqs[-1][0], tfreqs[i][0]]  # 0=base,1=alternative
                # edit all the examples
                for j in xrange(len(mdata)):
                    c = int(examples[j].getclass())
                    if c == alter:
                        mdata[j][-1] = 1
                    else:
                        mdata[j][-1] = 0
                r = rl(mdata, translate, importance, cfreqs)
                classifiers.append(r)
            return ArrayLogisticClassifier(classifiers, translate,
                                           tfreqs, examples.domain.classVar,
                                           len(mdata))
        else:
            r = rl(mdata, translate, importance, freqs)
            return BasicLogisticClassifier(r, translate)
Exemple #28
0
def maxNu(examples):
    """ Given example table compute the maximum nu parameter for Nu_SVC
    """
    nu = 1.0
    dist = list(orange.Distribution(examples.domain.classVar, examples))

    def pairs(seq):
        for i, n1 in enumerate(seq):
            for n2 in seq[i + 1:]:
                yield n1, n2

    return min([
        2.0 * min(n1, n2) / (n1 + n2)
        for n1, n2 in pairs(dist) if n1 != 0 and n2 != 0
    ] + [nu])
Exemple #29
0
    def __call__(self, examples, weight=0):
        supervisedClassCheck(examples)

        rules = orange.RuleList()
        self.ruleStopping.apriori = orange.Distribution(
            examples.domain.classVar, examples)
        progress = getattr(self, "progressCallback", None)
        if progress:
            progress.start = 0.0
            progress.end = 0.0
            distrib = orange.Distribution(examples.domain.classVar, examples,
                                          weight)
            distrib.normalize()
        for targetClass in examples.domain.classVar:
            if progress:
                progress.start = progress.end
                progress.end += distrib[targetClass]
            self.targetClass = targetClass
            cl = orange.RuleLearner.__call__(self, examples, weight)
            for r in cl.rules:
                rules.append(r)
        if progress:
            progress(1.0, None)
        return CN2UnorderedClassifier(rules, examples, weight)
Exemple #30
0
def data_center(data):
    """
    Returns a center of the instances in the data set (average across data instances for continuous attributes, most frequent value for discrete attributes).
    """
    atts = data.domain.attributes
    astats = orange.DomainBasicAttrStat(data)
    center = [astats[a].avg if a.varType == orange.VarTypes.Continuous \
#              else max(enumerate(orange.Distribution(a, data)), key=lambda x:x[1])[0] if a.varType == orange.VarTypes.Discrete

              else _modus(orange.Distribution(a, data)) if a.varType == orange.VarTypes.Discrete
              else None
              for a in atts]
    if data.domain.classVar:
        center.append(0)
    return orange.Example(data.domain, center)