Esempio n. 1
0
    def __call__(self, examples, weight=0, fulldata=0):
        if examples.domain.classVar.varType != 1:
            raise "Logistic learner only works with discrete class."
        translate = orng2Array.DomainTranslation(self.translation_mode_d,
                                                 self.translation_mode_c)
        if fulldata != 0:
            translate.analyse(fulldata, weight, warning=0)
        else:
            translate.analyse(examples, weight, warning=0)
        translate.prepareLR()
        mdata = translate.transform(examples)

        # get the attribute importances
        t = examples
        importance = []
        for i in xrange(len(t.domain.attributes)):
            qi = orange.MeasureAttribute_relief(t.domain.attributes[i], t)
            importance.append((qi, i))
        importance.sort()
        freqs = list(orange.Distribution(examples.domain.classVar, examples))
        s = 1.0 / sum(freqs)
        freqs = [x * s for x in freqs]  # normalize

        rl = RobustBLogisticLearner(regularization=self.regularization)
        if len(examples.domain.classVar.values) > 2:
            ## form several experiments:
            # identify the most frequent class value
            tfreqs = [(freqs[i], i) for i in xrange(len(freqs))]
            tfreqs.sort()
            base = tfreqs[-1][1]  # the most frequent class
            classifiers = []
            for i in xrange(len(tfreqs) - 1):
                # edit the translation
                alter = tfreqs[i][1]
                cfreqs = [tfreqs[-1][0], tfreqs[i][0]]  # 0=base,1=alternative
                # edit all the examples
                for j in xrange(len(mdata)):
                    c = int(examples[j].getclass())
                    if c == alter:
                        mdata[j][-1] = 1
                    else:
                        mdata[j][-1] = 0
                r = rl(mdata, translate, importance, cfreqs)
                classifiers.append(r)
            return ArrayLogisticClassifier(classifiers, translate,
                                           tfreqs, examples.domain.classVar,
                                           len(mdata))
        else:
            r = rl(mdata, translate, importance, freqs)
            return BasicLogisticClassifier(r, translate)
Esempio n. 2
0
 def __call__(self, examples, weight = 0,fulldata=0):
     if not(examples.domain.classVar.varType == 1 and len(examples.domain.classVar.values)==2):
         raise "BasicBayes learner only works with binary discrete class."
     for attr in examples.domain.attributes:
         if not(attr.varType == 1):
             raise "BasicBayes learner does not work with continuous attributes."
     translate = orng2Array.DomainTranslation(self.translation_mode_d,self.translation_mode_c)
     if fulldata != 0:
         translate.analyse(fulldata, weight)
     else:
         translate.analyse(examples, weight)
     translate.prepareLR()
     (beta, coeffs) = self._process(orange.BayesLearner(examples), examples)
     return BasicBayesClassifier(beta,coeffs,translate)
Esempio n. 3
0
    def getmodel(self, data, fulldata):
        # make sure that regression is used for continuous classes, and classification
        # for discrete class
        assert (data.domain.classVar.varType == 1
                or data.domain.classVar.varType == 2)
        typ = self.type
        if typ == -1:  # Classical
            if data.domain.classVar.varType == 2:  # continuous class
                typ = 3  # regression
            else:  # discrete class
                typ = 0  # classification
        elif typ == -2:  # Nu
            if data.domain.classVar.varType == 2:  # continuous class
                typ = 4  # regression
            else:  # discrete class
                typ = 1  # classification
        elif typ == -3:  # OC
            typ = 2  # one-class, class is ignored.

        # do error checking
        if type(self.degree) == type(1):
            self.degree = float(self.degree)
        if type(self.cache_size) == type(1):
            self.cache_size = float(self.cache_size)
        assert (type(self.degree) == type(1.0))
        assert (type(self.gamma) == type(1.0))
        assert (type(self.coef0) == type(1.0))
        assert (type(self.nu) == type(1.0))
        assert (type(self.cache_size) == type(1.0))
        assert (type(self.C) == type(1.0))
        assert (type(self.eps) == type(1.0))
        assert (type(self.p) == type(1.0))
        assert (typ in [0, 1, 2, 3, 4])
        assert (self.kernel in [0, 1, 2, 3])
        assert (self.cache_size > 0)
        assert (self.eps > 0)
        assert (self.nu <= 1.0 and self.nu >= 0.0)
        assert (self.p >= 0.0)
        assert (self.shrinking in [0, 1])
        assert (self.probability in [0, 1])
        if type == 1:
            counts = [0] * len(data.domain.classVar.values)
            for x in data:
                counts[int(x.getclass())] += 1
            for i in range(1, len(counts)):
                for j in range(i):
                    if self.nu * (counts[i] + counts[j]) > 2 * min(
                            counts[i], counts[j]):
                        raise "Infeasible nu value."

        puredata = orange.Filter_hasClassValue(data)
        translate = orng2Array.DomainTranslation(self.translation_mode_d,
                                                 self.translation_mode_c)
        if fulldata != 0:
            purefulldata = orange.Filter_hasClassValue(fulldata)
            translate.analyse(purefulldata)
        else:
            translate.analyse(puredata)
        translate.prepareSVM(not self.for_nomogram)
        mdata = translate.transform(puredata)

        if len(self.classweights) == 0:
            model = orngCRS.SVMLearn(mdata, typ, self.kernel, self.degree,
                                     self.gamma, self.coef0, self.nu,
                                     self.cache_size, self.C, self.eps, self.p,
                                     self.shrinking, self.probability, 0, [],
                                     [])
        else:
            assert (len(puredata.domain.classVar.values) == len(
                self.classweights))
            cvals = [
                data.domain.classVar(i) for i in data.domain.classVar.values
            ]
            labels = translate.transformClass(cvals)
            model = orngCRS.SVMLearn(mdata, typ, self.kernel, self.degree,
                                     self.gamma, self.coef0, self.nu,
                                     self.cache_size, self.C, self.eps, self.p,
                                     self.shrinking, self.probability,
                                     len(self.classweights), self.classweights,
                                     labels)
        return (model, translate)