Esempio n. 1
0
 def __init__(self, name = "RF classifier", **kwds):
     self.verbose = 0
     self.varImportance =  {}
     self.__dict__.update(kwds)
     self._isRealProb = False
     self.name = name
     self.domain = None
     if self.classVar.varType == orange.VarTypes.Discrete:
         self.oobError = None  #self.classifier.get_train_error()  #   This is resulting in Segmentation fault : Problem reported by others in opencv blogs!
     else:   
         self.oobError = None
     self.ExFix = dataUtilities.ExFix()
     if self.imputeData != None:
         '''Create the imputer: the imputer needs the imputeData to exists allong it's life time'''
         try :
             self.domain = self.imputeData.domain
             if self.useBuiltInMissValHandling:
                 self.imputer = None
             else:
                 self.imputer = orange.Imputer_defaults(self.imputeData)
         except:
             self.imputer = None
             if self.verbose > 0: print "Unable to create the imputer, or even the builtIn RF imputer."
             return None
     else:
         self.imputer = None
         if self.verbose > 0: print "Warning! - No impute data defined"
         return None
Esempio n. 2
0
def getMahalanobisResults(predictor,
                          invCovMatFile=None,
                          centerFile=None,
                          dataTableFile=None):
    domain = None
    if predictor.highConf == None and predictor.lowConf == None:
        return None, None
    if not dataTableFile and (not hasattr(predictor, "trainDataPath")
                              or not predictor.trainDataPath):
        print "The predictor does not have a trainDataPath specifyed. We need it for calculating Mahalanobis results!"
        return None, None
    testData = dataUtilities.attributeDeselectionData(predictor.exToPred,
                                                      ["SMILEStoPred"])
    if not dataTableFile:
        trainData = dataUtilities.DataTable(predictor.trainDataPath)
        domain = trainData.domain
    else:
        trainData = None
        domain = predictor.model.domain
    ExampleFix = dataUtilities.ExFix(domain, None, False)
    exFixed1 = ExampleFix.fixExample(testData[0])
    if testData.hasMissingValues():
        if not trainData:
            averageImputer = orange.Imputer_defaults(
                predictor.model.imputeData)
        else:
            averageImputer = orange.ImputerConstructor_average(trainData)
        dat = averageImputer(exFixed1)
    else:
        dat = exFixed1

    tab = dataUtilities.DataTable(domain)
    tab.append(dat)

    MD = calcMahalanobis(trainData, tab, invCovMatFile, centerFile,
                         dataTableFile, domain)
    near3neighbors = [(MD[0]["_train_id_near1"], MD[0]["_train_SMI_near1"]),
                      (MD[0]["_train_id_near2"], MD[0]["_train_SMI_near2"]),
                      (MD[0]["_train_id_near3"], MD[0]["_train_SMI_near3"])]
    avg3nearest = MD[0]["_train_av3nearest"]
    if avg3nearest < predictor.highConf:
        confStr = predictor.highConfString
    elif avg3nearest > predictor.lowConf:
        confStr = predictor.lowConfString
    else:
        confStr = predictor.medConfString

    return near3neighbors, confStr
Esempio n. 3
0
 def __init__(self, name = "CvSVM classifier", **kwds):
     self.verbose = 0
     self.loadedModel = False
     self.__dict__.update(kwds)
     self.name = name
     self.domain = None
     self.ExFix = dataUtilities.ExFix()
     if self.imputeData:
         '''Create the imputer: the imputer needs the imputeData to exists allong it's life time'''
         try:
             self.domain = self.imputeData.domain
             self.imputer = orange.Imputer_defaults(self.imputeData)
         except:
             self.imputer = None
             if self.verbose > 0: print "Unable to create the imputer"
     else:
         if self.verbose > 0: print "Warning! - No impute data defined"
Esempio n. 4
0
    def __init__(self, name="CvANN classifier", **kwds):
        self.verbose = 0
        self.loadedModel = False
        self.__dict__.update(kwds)
        self._isRealProb = True  #This learner always return real probabilities
        self.name = name
        self.domain = None
        self.ExFix = dataUtilities.ExFix()

        if self.imputeData:
            '''Create the imputer: the imputer needs the imputeData to exists allong it's life time'''
            try:
                self.domain = self.imputeData.domain
                self.imputer = orange.Imputer_defaults(self.imputeData)
            except:
                if self.verbose > 0: print "Unable to create the imputer"
                return None
        else:
            if self.verbose > 0:
                print "Warning! - No impute data defined"
                return None
Esempio n. 5
0
    def getTopImportantVars(self,
                            inEx,
                            nVars=1,
                            gradRef=None,
                            absGradient=True,
                            c_step=None,
                            getGrad=False):
        """Return the n top important variables (n = nVars) for the given example
            if nVars is 0, it returns all variables ordered by importance
            if c_step (costume step) is passed, force it instead of hardcoded
        """
        #    Determine Signature and non-Signature descriptor names
        #signDesc = []   # This Disable distinction from signatures ans non-signatures
        cinfonyDesc, clabDesc, signatureHeight, bbrcDesc, signDesc = descUtilities.getDescTypes(
            [attr.name for attr in self.domain.attributes])

        varGrad = []

        ExFix = dataUtilities.ExFix()
        ExFix.set_domain(self.domain)
        ex = ExFix.fixExample(inEx)
        if self.basicStat == None or not self.NTrainEx or (
                self.domain.classVar.varType == orange.VarTypes.Discrete
                and len(self.domain.classVar.values) != 2):
            return None
        if gradRef == None:
            gradRef = self(ex, returnDFV=True)[1]

        def calcDiscVarGrad(var, ex, gradRef):
            step = 1  # MUST be 1!!
            if ex[var].isSpecial():
                return ([gradRef, gradRef], step)
            localMaxDiff = 0
            localMaxPred = gradRef
            #Uncomment next line to skip discrete variables
            #return localMaxPred
            for val in self.domain[var].values:
                localEx = orange.Example(ex)
                localEx[var] = val
                pred = self(localEx, returnDFV=True)[1]
                if abs(pred - gradRef) > localMaxDiff:
                    localMaxDiff = abs(pred - gradRef)
                    localMaxPred = pred
            return ([localMaxPred, gradRef], step)

        def calcContVarGrad(var, ex, gradRef):
            localEx = orange.Example(ex)
            if c_step is None:
                if self.domain.classVar.varType == orange.VarTypes.Discrete:  # Classification
                    coef_step = 1.0
                else:
                    coef_step = 0.08  # Needs confirmation! Coefficient step: c
            else:
                #  used for testing significance: comment next and uncomment next-next
                raise (Exception(
                    "This mode should only be used for debugging! Comment this line if debugging."
                ))
                #coef_step = float(c_step)

            if var in signDesc:
                step = 1  # Set step to one in case od signatures
            else:
                #   dev - Standard deviation:  http://orange.biolab.si/doc/reference/Orange.statistics.basic/
                if "dev" in self.basicStat[var]:
                    step = self.basicStat[var]["dev"] * coef_step
                else:
                    return ([gradRef, gradRef], 0)

            if ex[var].isSpecial():
                return ([gradRef, gradRef], step)
            # step UP
            localEx[var] = ex[var] + step
            ResUp = self(localEx, returnDFV=True)[1]
            # step DOWN
            localEx[var] = ex[var] - step
            ResDown = self(localEx, returnDFV=True)[1]
            return ([ResUp, ResDown], step)

        def calcVarGrad(var, ex, gradRef):
            if attr.varType == orange.VarTypes.Discrete:
                res, step = calcDiscVarGrad(attr.name, ex, gradRef)
                #          f(a)   f(x)
                _grad = (res[0] - res[1])  # /step   ... but step MUST be 1!!
                _faMax = res[0]
            else:
                res, step = calcContVarGrad(attr.name, ex, gradRef)
                if step == 0:
                    _grad = 0
                else:
                    _grad = (res[0] - res[1]) / (2.0 * step)
                _faMax = None
            return (_grad, _faMax)

        def compareABS(x, y):
            if abs(x) > abs(y):
                return 1
            elif abs(x) < abs(y):
                return -1
            else:
                return 0

        eps = 1E-5  # epsilon: amplitude of derivatives that will be considered 0. Attributes with derivative amplitude less than epsilon will not be considered.
        # Print used for algorithm final confirmation
        #print "  %s  " % (str(gradRef)),

        for attr in self.domain.attributes:
            grad = calcVarGrad(attr.name, ex, gradRef)
            # Print used for testing significance
            #print  "  %s  " % (str(grad[0])),

            # Print used for algorithm final confirmation
            #print "  %s  " % (str(grad[1])),

            if attr.name in signDesc:
                actualEps = 0
            else:
                actualEps = eps
            if abs(
                    grad[0]
            ) > actualEps:  # only consider attributes with derivative greatest than epsilon
                #                  f'(x)                  x             f(a)
                #                derivative value     direction      f(a) farest away from f(x) only setted for classification
                varGrad.append((grad[0], attr.name, grad[1]))

        #Separate continuous from categorical variables
        contVars = []
        discVars = []
        for var in varGrad:
            if self.domain[var[1]].varType == orange.VarTypes.Discrete:
                discVars.append(var)
            else:
                contVars.append(var)

        if nVars == 0:
            nRet = None
        else:
            nRet = nVars

        #Order the vars in terms of importance
        if absGradient:
            contVars.sort(reverse=1, cmp=lambda x, y: compareABS(x[0], y[0]))
            contVars = getVarNames(groupTiedScores(contVars, 0),
                                   getGrad=getGrad)
            discVars.sort(reverse=1, cmp=lambda x, y: compareABS(x[0], y[0]))
            discVars = getVarNames(groupTiedScores(discVars, 0),
                                   getGrad=getGrad)
            return {"Continuous":contVars[0:min(nRet,len(contVars))] ,\
                    "Discrete"  :discVars[0:min(nRet,len(discVars))] }

        if self.domain.classVar.varType == orange.VarTypes.Discrete:  # Classificatio
            # We will be looking to the max f(a) [2]
            # Will be excluding attributes for which f(a) was between 0 and f(x):  |f(a)| < |f(x)| AND f(x)*f(a)>0
            idx4Rem = []
            for idx, v in enumerate(discVars):
                fx = gradRef
                fa = v[2]
                if abs(fa) < abs(fx) and (fx * fa) > 0:
                    idx4Rem.append(idx)
            idx4Rem.sort(reverse=True)
            for idx in idx4Rem:
                discVars.pop(idx)

        # (3 lines) Print used for algorithm final confirmation
        #        print "   %s   " % (idx4Rem),
        #else:
        #        print "   %s   " % ([]),

        # Now we will be looking only to the actual derivative value; [0]
        UPd = [v for v in discVars if v[0] > 0]
        UPd.sort(reverse=1, cmp=lambda x, y: compareABS(x[0], y[0]))
        UPd = getVarNames(groupTiedScores(UPd, 0), getGrad=getGrad)

        DOWNd = [v for v in discVars if v[0] < 0]
        DOWNd.sort(reverse=1, cmp=lambda x, y: compareABS(x[0], y[0]))
        DOWNd = getVarNames(groupTiedScores(DOWNd, 0), getGrad=getGrad)

        UPc = [v for v in contVars if v[0] > 0]
        UPc.sort(reverse=1, cmp=lambda x, y: compareABS(x[0], y[0]))
        UPc = getVarNames(groupTiedScores(UPc, 0), getGrad=getGrad)
        DOWNc = [v for v in contVars if v[0] < 0]
        DOWNc.sort(reverse=1, cmp=lambda x, y: compareABS(x[0], y[0]))
        DOWNc = getVarNames(groupTiedScores(DOWNc, 0), getGrad=getGrad)


        return {"Continuous":{"UP":   UPc[0:min(nRet,len(  UPc))],\
                              "DOWN": DOWNc[0:min(nRet,len(DOWNc))]},\
                "Discrete":  {"UP":   UPd[0:min(nRet,len(  UPd))],\
                              "DOWN": DOWNd[0:min(nRet,len(DOWNd))]}   }