Ejemplo n.º 1
0
 def add(disc1, disc2, sumd):
     disc = orange.DiscDistribution(disc1)
     sumdisc = sumd
     for i, d in enumerate(disc):
         disc[i] += disc2[i]
         sumdisc += disc2[i]
     return disc, sumdisc
Ejemplo n.º 2
0
    def __call__(self, example, result_type=orange.GetValue, retRules=False):
        def add(disc1, disc2, sumd):
            disc = orange.DiscDistribution(disc1)
            sumdisc = sumd
            for i, d in enumerate(disc):
                disc[i] += disc2[i]
                sumdisc += disc2[i]
            return disc, sumdisc

        # create empty distribution
        retDist = orange.DiscDistribution(self.examples.domain.classVar)
        covRules = orange.RuleList()
        # iterate through examples - add distributions
        sumdisc = 0.
        for r in self.rules:
            if r(example) and r.classDistribution:
                retDist, sumdisc = add(retDist, r.classDistribution, sumdisc)
                covRules.append(r)
        if not sumdisc:
            retDist = self.prior
            sumdisc = self.prior.abs
        for c in self.examples.domain.classVar:
            retDist[c] /= sumdisc
        if retRules:
            if result_type == orange.GetValue:
                return (retDist.modus(), covRules)
            if result_type == orange.GetProbabilities:
                return (retDist, covRules)
            return (retDist.modus(), retDist, covRules)
        if result_type == orange.GetValue:
            return retDist.modus()
        if result_type == orange.GetProbabilities:
            return retDist
        return (retDist.modus(), retDist)
Ejemplo n.º 3
0
    def test_discrete(self):
        d = orange.ExampleTable("iris")
        dist = orange.Distribution(d.domain.class_var, d)

        cc = orange.RandomClassifier(d.domain.class_var)
        self.assertEqual(cc.probabilities.variable, cc.class_var)

        cc2 = orange.RandomClassifier(None, dist)
        self.assertEqual(cc2.class_var, d.domain.class_var)
        self.assertEqual(cc2.probabilities.variable, cc2.class_var)
        self.assertEqual(id(cc2.probabilities), id(dist))
        self.assertTrue(all(x==50 for x in cc2.probabilities))
        
        for cl in [cc, cc2]:
            for e in d[0:150:20]:
                anss = set()
                for i in range(5):
                    anss.add(cl(e))
                self.assertEqual(len(anss), 1)
            
            anss = set()
            for e in d:
                anss.add(cl(e))
            self.assertEqual(len(anss), 3)

        for e in d[0:150:20]:
            self.assertTrue(all(x==50 for x in cc2(e, orange.Classifier.GetProbabilities)))
        
        self.assertRaises(TypeError, orange.RandomClassifier, dist)
        self.assertRaises(ValueError, orange.RandomClassifier, None, orange.DiscDistribution())
        self.assertRaises(ValueError, orange.RandomClassifier, d.domain[1], orange.Distribution(d.domain[0]))
Ejemplo n.º 4
0
 def _generateProbabilities(self, prediction):
     # Method to artificialy generate a list the length of the number of classes and set the predicted class to 1
     dist = orange.DiscDistribution(self.classVar)
     if prediction == "?":
         return dist
     dist[prediction] = 1
     return dist
Ejemplo n.º 5
0
    def __call__(self, example, resultType=orange.GetValue):
        # 1. calculate sum of distributions of examples that cover the example
        num_cover = 0.0
        distribution = [0] * len(self.data.domain.classVar.values)
        for rsc in self.rulesClass:
            for rule in rsc.rules.rules:
                if rule.covers(example):
                    num_cover += 1
                    tmp_dist = rule(example, orange.GetProbabilities)
                    for i in range(len(distribution)):
                        distribution[i] += tmp_dist[i]
        # 2. calculate average of distributions of rules that cover example
        if num_cover != 0:
            max_index = 0
            for i in range(len(distribution)):
                distribution[i] = distribution[i] / num_cover
                if distribution[i] > distribution[max_index]:
                    max_index = i
            dist = orange.DiscDistribution(distribution)
            value = orange.Value(self.data.domain.classVar,
                                 self.data.domain.classVar.values[max_index])
        # if no rule fiers
        else:
            value, dist = self.majorityClassifier(example, orange.GetBoth)

        # 3. -----------return
        if resultType == orange.GetValue:
            return value
        elif resultType == orange.GetBoth:
            return (value, dist)
        else:
            return dist
Ejemplo n.º 6
0
    def t2est_discrete(self):
        d = orange.ExampleTable("iris")
        dist = orange.Distribution(d.domain.class_var, d)

        cc = orange.ConstantClassifier(d.domain.class_var)
        self.assertEqual(cc.class_var, d.domain.class_var)
        self.assertEqual(cc.default_distribution.variable, cc.class_var)

        cc2 = orange.ConstantClassifier(dist)
        self.assertEqual(cc2.class_var, d.domain.class_var)
        self.assertEqual(cc2.default_distribution.variable, cc2.class_var)
        self.assertEqual(id(cc2.default_distribution), id(dist))
        self.assertTrue(all(x==50 for x in cc2.default_distribution))
        
        cc3 = orange.ConstantClassifier(d.domain.class_var, None, dist)
        self.assertEqual(cc3.class_var, d.domain.class_var)
        self.assertEqual(cc3.default_distribution.variable, cc3.class_var)
        self.assertEqual(id(cc3.default_distribution), id(dist))
        self.assertTrue(all(x==50 for x in cc3.default_distribution))

        cc4 = orange.ConstantClassifier(d.domain.class_var, "Iris-setosa", dist)
        self.assertEqual(cc4.class_var, d.domain.class_var)
        self.assertEqual(cc4.default_distribution.variable, cc3.class_var)
        self.assertEqual(id(cc4.default_distribution), id(dist))
        self.assertTrue(all(x==50 for x in cc4.default_distribution))

        for cl in [cc, cc2, cc3]:
            for e in d[0:150:20]:
                anss = set()
                for i in range(5):
                    anss.add(cl(e))
                self.assertEqual(len(anss), 1)
            
            anss = set()
            for e in d:
                anss.add(cl(e))
            self.assertEqual(len(anss), 3)

        for e in d[0:150:20]:
            anss = set()
            for i in range(5):
                self.assertEqual(cc4(e), "Iris-setosa")

        for cl in [cc2, cc3, cc4]:
            for e in d[0:150:20]:
                self.assertTrue(all(x==50 for x in cl(e, orange.Classifier.GetProbabilities)))
        
        self.assertRaises(TypeError, orange.ConstantClassifier, d.domain.class_var, dist)
        self.assertRaises(ValueError, orange.ConstantClassifier, None, "?", orange.DiscDistribution())
        self.assertRaises(ValueError, orange.ConstantClassifier, d.domain[1], "?", orange.Distribution(d.domain[0]))

        cc4.default_distribution = [50, 50, 50]
        self.assertEqual(list(cc4.default_distribution), [50, 50, 50])

        cc5 = orange.ConstantClassifier(d.domain.class_var, "Iris-setosa", [50, 50, 50])
        self.assertEqual(list(cc5.default_distribution), [50, 50, 50])
Ejemplo n.º 7
0
 def _getProbabilities(self, ProbOf1):
     """Get the orange like output probabilities for the current predicted example"""
     #This is only valid for binary classifiers opencv limitation!
     # From openCv we know that the probability returned for this example represents the fraction of tree votes for class 1
     #Find the classValue string that is represented by the scalar 1 in opencvRF
     class1 = dataUtilities.CvMat2orangeResponse(1, self.classVar).value
     dist = orange.DiscDistribution(self.classVar)
     dist[self.classVar.values.index(class1)] = ProbOf1
     dist[not self.classVar.values.index(class1)] = 1 - ProbOf1
     return dist
Ejemplo n.º 8
0
 def __call__(self, ex, what=orange.Classifier.GetValue):
     val = ex[self.var1] * self.noValues2 + ex[self.var2]
     if what == orange.Classifier.GetValue:
         return orange.Value(self.classVar, val)
     probs = orange.DiscDistribution(self.classVar)
     probs[val] = 1.0
     if what == orange.Classifier.GetProbabilities:
         return probs
     else:
         return (orange.Value(self.classVar, val), probs)
Ejemplo n.º 9
0
    def test_construction(self):
        d = orange.ExampleTable("zoo")

        self.assertRaises(TypeError, orange.DiscDistribution, zip(d.domain["type"].values, self.freqs))

        disc = orange.Distribution("type", d)
        disc7 = orange.DiscDistribution(self.freqs)
        self.assertEqual(disc, disc7)

        disc1 = orange.Distribution(d.domain.classVar)
        self.assertTrue(isinstance(disc1, orange.DiscDistribution))
Ejemplo n.º 10
0
    def __call__(self, example, what=orange.Classifier.GetValue):
        probability = self.classifier.orange_classify(example)

        answer = orange.Value(self.classVar, int(round(probability)))
        probabilities = orange.DiscDistribution(self.classVar)
        probabilities[answer] = probability
        if what == orange.Classifier.GetValue:
            return answer
        elif what == orange.Classifier.GetProbabilities:
            return probabilities
        else:
            return answer, probabilities
Ejemplo n.º 11
0
 def __call__(self, node, example):
   while node.branchSelector:
     branch = node.branchSelector(example)
     if branch.isSpecial() or int(branch)>=len(node.branches):
       votes = orange.DiscDistribution([random.randint(0, 100) for i in node.branches])
       votes.normalize()
       print "Weights:", votes
       return node, votes
     nextNode = node.branches[int(branch)]
     if not nextNode:
       break
     node = nextNode
   return node
Ejemplo n.º 12
0
 def __call__(self, ex, what=orange.Classifier.GetValue):
     value = self.classify(ex)
     result = orange.Value(ex.domain.classVar, str(value))
     probs = orange.DiscDistribution(ex.domain.classVar)
     probs[value] = 1.0
     if what == orange.Classifier.GetValue:
         return result
     elif what == orange.Classifier.GetProbabilities:
         return probs
     elif what == orange.Classifier.GetBoth:
         return result, probs
     else:
         raise ValueError("Bad what argument: %s" % ` what `)
Ejemplo n.º 13
0
 def __getProbabilities(self, fannOutVector):
     """Get the orange like output probabilities for the current predicted example"""
     dist = orange.DiscDistribution(self.classVar)
     vectorSum = sum(fannOutVector)
     #fix the probabilities so that values are between 0 and 1
     OutVector = [p / vectorSum for p in fannOutVector]
     subtract = abs(sum([x for x in OutVector if x < 0]))
     for idx, p in enumerate(OutVector):
         if p > 1:
             dist[self.classVar.values[idx]] = p - subtract
         elif p <= 0:
             dist[self.classVar.values[idx]] = 0
         else:
             dist[self.classVar.values[idx]] = p
     return dist
Ejemplo n.º 14
0
    def t2est_continuous(self):
        d = orange.ExampleTable("iris")
        dom2 = orange.Domain(d.domain.attributes)
        d = orange.ExampleTable(dom2, d)
        self.assertEqual(d.domain.class_var.var_type, orange.Variable.Continuous)
        
        dist = orange.Distribution(d.domain.class_var, d)

        cc = orange.ConstantClassifier(d.domain.class_var)
        self.assertEqual(cc.class_var, d.domain.class_var)
        self.assertEqual(cc.default_distribution.variable, cc.class_var)

        cc2 = orange.ConstantClassifier(dist)
        self.assertEqual(cc2.class_var, d.domain.class_var)
        self.assertEqual(cc2.default_distribution.variable, cc2.class_var)
        self.assertEqual(id(cc2.default_distribution), id(dist))
        
        cc3 = orange.ConstantClassifier(d.domain.class_var, None, dist)
        self.assertEqual(cc3.class_var, d.domain.class_var)
        self.assertEqual(cc3.default_distribution.variable, cc3.class_var)
        self.assertEqual(id(cc3.default_distribution), id(dist))

        cc4 = orange.ConstantClassifier(d.domain.class_var, 5, dist)
        self.assertEqual(cc4.class_var, d.domain.class_var)
        self.assertEqual(cc4.default_distribution.variable, cc3.class_var)
        self.assertEqual(id(cc4.default_distribution), id(dist))

        for cl in [cc2, cc3]:
            for e in d:
                self.assertEqual(cl(e), dist.average())

        for e in d:
            self.assertEqual(cc4(e), 5)

        self.assertRaises(TypeError, orange.ConstantClassifier, d.domain.class_var, dist)
        self.assertRaises(ValueError, orange.ConstantClassifier, None, "?", orange.DiscDistribution())
        self.assertRaises(ValueError, orange.ConstantClassifier, d.domain[1], "?", orange.Distribution(d.domain[0]))

        cc4.default_distribution = [50, 50, 50]
        self.assertEqual(list(cc4.default_distribution), [50, 50, 50])
Ejemplo n.º 15
0
 def getProbabilities(self, prediction):
     dist = orange.DiscDistribution(self.domain.classVar)
     dist[prediction] = 1
     return dist
Ejemplo n.º 16
0
print "Quartiles: %5.3f - %5.3f - %5.3f" % (
    dage.percentile(25), dage.percentile(50), dage.percentile(75))
print

for x in range(170, 190):
    print "dens(%4.1f)=%5.3f," % (x / 10.0, dage.density(x / 10.0)),

print "*** WORKCLASS ***"
dwcl = dist["workclass"]
print "Native representation:", dwcl.native()
print "Keys:", dwcl.keys()
print "Values:", dwcl.values()
print "Items: ", dwcl.items()
print

disc = orange.DiscDistribution([0.5, 0.3, 0.2])
for i in range(20):
    print disc.random(),
print

v = orange.EnumVariable(values=["red", "green", "blue"])
disc.variable = v
for i in range(20):
    print disc.random(),
print

print
cont = orange.ContDistribution({0.1: 12, 0.3: 3, 0.7: 3})
print "Manually constructed continuous distibution: ", cont
print
Ejemplo n.º 17
0
    def _singlePredict(self, origExamples = None, resultType = orange.GetValue, returnDFV = False):
        """
        orange.GetBoth -          <type 'tuple'>                     ->    (<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>)
        orange.GetValue -         <type 'orange.Value'>              ->    <orange.Value 'Act'='3.44158792'>
        orange.GetProbabilities - <type 'orange.DiscDistribution'>   ->    <0.000, 0.000>
        returnDFV - Flag indicating to return the Decision Function Value. If set to True, it will encapsulate the original result asked by the keyword resultType and the DFV into a tuple:
                ((<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>), 2.34443)
                (<orange.Value 'Act'='3.44158792'>, 2.34443) 
                (<0.000, 0.000>, 2.34443)
                If it is not a binary classifier, DFV will be equal to None
                DFV will be a value from greater or equal to 0  
        """
        res = None
        #dataUtilities.rmAllMeta(examples)
        if len(origExamples.domain.getmetas()) == 0:
            examples = origExamples
        else:
            examples = dataUtilities.getCopyWithoutMeta(origExamples)

        #Check if the examples are compatible with the classifier (attributes order and varType compatibility)
        if self.imputer:
            dataUtilities.verbose = self.verbose
            if not self.ExFix.ready:
                self.ExFix.set_domain(self.imputer.defaults.domain)
                self.ExFix.set_examplesFixedLog(self.examplesFixedLog)
            inExamples = self.ExFix.fixExample(examples)

            if not inExamples:
                if self.verbose > 0: print "Warning no example. Returning None prediction"
                return None

            #Imput the examples if there are missing values     
            examplesImp = self.imputer(inExamples)
            # There is a problem with using the imputer when examples contain meta attributes.
            # Unable to remove meta attributes from the examples. OK to rm meta from ExampleTables, but not from Example objects.
            if not examplesImp:
                if self.verbose > 0: print "Unable to predict with the SVM model."
                if self.verbose > 0: print "Perhaps you need to remove meta attributes from your examples."
                return None
        else:
            if self.verbose > 0: print "Warning: No Imputer in SVM Classifier"
            examplesImp = examples

        if self.classifier.get_support_vector_count() ==0:
            if self.verbose > 0: print "WARNING:  Support Vectors count is 0 (zero)" 
        DFV = None
        if examplesImp: 
            if self.scalizer:
                exToPredict = dataUtilities.Example2CvMat(self.scalizer.scaleEx(examplesImp,True), self.varNames)
                res = self.classifier.predict(exToPredict)
                res = self.scalizer.convertClass(res)
                if self.classVar.varType != orange.VarTypes.Continuous and len(self.classVar.values) == 2 and returnDFV:
                    DFV = self.classifier.predict(exToPredict, True)
                else:
                    #On Regression models assume the DVF as the value predicted
                    DFV = res 
                self._updateDFVExtremes(DFV)
                res = dataUtilities.CvMat2orangeResponse(res,self.classVar)
            else:
                exToPredict = dataUtilities.Example2CvMat(examplesImp,self.varNames)
                res = self.classifier.predict(exToPredict)
                if self.classVar.varType != orange.VarTypes.Continuous and len(self.classVar.values) == 2 and returnDFV:
                    DFV = self.classifier.predict(exToPredict, True)
                else:
                    #On Regression models assume the DVF as the value predicted
                    DFV = res
                self._updateDFVExtremes(DFV)
                res = dataUtilities.CvMat2orangeResponse(res,self.classVar)
             
            if resultType!=orange.GetValue:
                if examplesImp.domain.classVar.varType != orange.VarTypes.Continuous:
                    dist = orange.DiscDistribution(examplesImp.domain.classVar)
                    dist[res]=1
                else:
                    y_hat = self.classVar(res)
                    dist = Orange.statistics.distribution.Continuous(self.classVar)
                    dist[y_hat] = 1.0
                if resultType==orange.GetProbabilities:
                    res = dist
                else:
                    res = (res,dist)
                    
            if returnDFV:
                res = (res,DFV)
                
        self.nPredictions += 1
        return res