예제 #1
0
 def updateStatsForPredictor(self,predictedValue:float,instance:Instance):
     if not instance.classIsMissing():
         self.m_WithClass+=instance.weight()
         if Utils.isMissingValue(predictedValue):
             self.m_Unclassified+=instance.weight()
             return
         self.m_SumClass+=instance.weight()*instance.classValue()
         self.m_SumSqrClass+=instance.weight()*instance.classValue()*instance.classValue()
         self.m_SumClassPredicted+=instance.weight()*instance.classValue()*predictedValue
         self.m_SumPredicted+=instance.weight()*predictedValue
         self.m_SumSqrPredicted+=instance.weight()*predictedValue*predictedValue
         self.updateNumericScores(self.makeDistribution(predictedValue),self.makeDistribution(instance.classValue()),instance.weight())
     else:
         self.m_MissingClass+=instance.weight()
예제 #2
0
 def add(self, bagIndex: int, instance: Instance):
     classIndex = int(instance.classValue())
     weight = instance.weight()
     self.m_perClassPerBag[bagIndex][
         classIndex] = self.m_perClassPerBag[bagIndex][classIndex] + weight
     self.m_perBag[bagIndex] = self.m_perBag[bagIndex] + weight
     self.m_perClass[classIndex] = self.m_perClass[classIndex] + weight
     self.totaL += weight
예제 #3
0
 def addWeights(self, instance: Instance, weights: List):
     classIndex = int(instance.classValue())
     for i in range(len(self.m_perBag)):
         weight = instance.weight() * weights[i]
         self.m_perClassPerBag[i][
             classIndex] = self.m_perClassPerBag[i][classIndex] + weight
         self.m_perBag[i] = self.m_perBag[i] + weight
         self.m_perClass[classIndex] = self.m_perClass[classIndex] + weight
         self.totaL += weight
예제 #4
0
 def updateStatsForClassifier(self,predictedDistribution:List,instance:Instance):
     actualClass=instance.classValue()
     if not instance.classIsMissing():
         self.updateMargins(predictedDistribution,actualClass,instance.weight())
         predictedClass=-1
         bestProb=0
         for i in range(self.m_NumClasses):
             if predictedDistribution[i] > bestProb:
                 predictedClass=i
                 bestProb=predictedDistribution[i]
         self.m_WithClass+=instance.weight()
         if predictedClass < 0:
             self.m_Unclassified+=instance.weight()
             return
         predictedProb=max(float('-inf'),predictedDistribution[actualClass])
         priorProb=max(float('-inf'),self.m_ClassPriors[actualClass]/self.m_ClassPriorsSum)
         if predictedProb >= priorProb:
             self.m_SumKBInfo+= (Utils.log2(predictedProb) - Utils.log2(priorProb)) * instance.weight()
         else:
             self.m_SumKBInfo-= (Utils.log2(1 - predictedProb) - Utils.log2(1 - priorProb)) * instance.weight()
         self.m_SumSchemeEntropy-= Utils.log2(predictedProb) * instance.weight()
         self.m_SumPriorEntropy-= Utils.log2(priorProb) * instance.weight()
         self.updateNumericScores(predictedDistribution,self.makeDistribution(instance.classValue()),instance.weight())
         indices= Utils.stableSort(predictedDistribution)
         sum=sizeOfregions=0
         for i in range(len(predictedDistribution)-1,-1,-1):
             if sum >= self.m_ConfLevel:
                 break
             sum+=predictedDistribution[indices[i]]
             sizeOfregions+=1
             if actualClass == indices[i]:
                 self.m_TotalCoverage+=instance.weight()
         self.m_TotalSizeOfRegions+=instance.weight()*sizeOfregions/(self.m_MaxTarget-self.m_MinTarget)
         self.m_ConfusionMatrix[actualClass][predictedClass]+=instance.weight()
         if predictedClass != actualClass:
             self.m_Incorrect+=instance.weight()
         else:
             self.m_Correct+=instance.weight()
     else:
         self.m_MissingClass+=instance.weight()
예제 #5
0
    def evaluationForSingleInstance(self, a0, instance:Instance, storePredictions:bool):
        if isinstance(a0,List):
            if self.m_ClassIsNominal:
                pred= Utils.maxIndex(a0)
                if a0[int(pred)] <= 0:
                    pred= Utils.missingValue()
                self.updateStatsForClassifier(a0, instance)
                if storePredictions and not self.m_DiscardPredictions:
                    if self.m_Predictions is None:
                        self.m_Predictions=[]
                    self.m_Predictions.append(NominalPrediction(instance.classValue(), a0, instance.weight()))
            else:
                pred=a0[0]
                self.updateStatsForPredictor(pred,instance)
                if storePredictions and not self.m_DiscardPredictions:
                    if self.m_Predictions is None:
                        self.m_Predictions=[]
                    self.m_Predictions.append(NumericPrediction(instance.classValue(),pred,instance.weight()))
            return pred
        elif isinstance(a0,Classifier):
            classMissing=copy.deepcopy(instance)
            classMissing.setDataset(instance.dataset())
            #TODO
            # if isinstance(a0,InputMappedClassifier)
            # else:
            classMissing.setClassMissing()
            # print("isMiss: ", instance.value(5))

            pred=self.evaluationForSingleInstance(a0.distributionForInstance(classMissing),instance,storePredictions)
            if not self.m_ClassIsNominal:
                if not instance.classIsMissing() and not Utils.isMissingValue(pred):
                    if isinstance(a0,IntervalEstimator):
                        self.updateStatsForIntervalEstimator(a0,classMissing,instance.classValue())
                    else:
                        self.m_CoverageStatisticsAvailable=False
                    if isinstance(a0,ConditionalDensityEstimator):
                        self.updateStatsForConditionalDensityEstimator(a0,classMissing,instance.classValue())
                    else:
                        self.m_ComplexityStatisticsAvailable=False
            return pred
예제 #6
0
    def process(self, toPredict: Instance, classifier: Classifier,
                evaluation: Evaluation):
        probActual = probNext = pred = 0
        classMissing = copy.deepcopy(toPredict)
        classMissing.setDataset(toPredict.dataset())

        if toPredict.classAttribute().isNominal():
            #返回分类预测的概率分布
            preds = classifier.distributionForInstance(classMissing)
            #若概率全部为0,则表示不属于任何一类
            val = 0
            if sum(preds) == 0:
                pred = Utils.missingValue()
                probActual = Utils.missingValue()
            else:
                #分类结果为概率最大的一项下标
                pred = Utils.maxIndex(preds)
                if not Utils.isMissingValue(toPredict.classIndex()):
                    #如果值不缺失,表示非预测样本,不做修改
                    if not Utils.isMissingValue(toPredict.classValue()):
                        val = int(toPredict.classValue())
                    probActual = preds[val]
                else:
                    probActual = preds[Utils.maxIndex(preds)]
            for i in range(toPredict.classAttribute().numValues()):
                if i != val and preds[i] > probNext:
                    probNext = preds[i]
            evaluation.evaluationForSingleInstance(preds, toPredict, True)
        else:
            #单项评估
            pred = evaluation.evaluateModelOnceAndRecordPrediction(
                classifier, toPredict)
        if not self.m_SaveForVisualization:
            return
        #保存可视化数据
        if self.m_PlotInstances is not None:
            isNominal = toPredict.classAttribute().isNominal()
            values = [0] * self.m_PlotInstances.numAttributes()
            i = 0
            while i < self.m_PlotInstances.numAttributes():
                #预测值前的所有值照原来的拷贝
                if i < toPredict.classIndex():
                    values[i] = toPredict.value(i)
                elif i == toPredict.classIndex():
                    if isNominal:
                        #首选结果与备选结果的差值
                        values[i] = probActual - probNext
                        #预测结果
                        values[i + 1] = pred
                        #原始值
                        values[i + 2] = toPredict.value(i)
                        i += 2
                    else:
                        values[i] = pred
                        values[i + 1] = toPredict.value(i)
                        i += 1
                else:
                    if isNominal:
                        values[i] = toPredict.value(i - 2)
                    else:
                        values[i] = toPredict.value(i - 1)
                i += 1
            # print("============")
            # for m in values:
            #     print("val:",m)
            # print("============")
            self.m_PlotInstances.add(Instance(1.0, values))
            if toPredict.classAttribute().isNominal():
                if toPredict.isMissing(
                        toPredict.classIndex()) or Utils.isMissingValue(pred):
                    self.m_PlotShapes.append(Plot2D.MISSING_SHAPE)
                elif pred != toPredict.classValue():
                    self.m_PlotShapes.append(Plot2D.ERROR_SHAPE)
                else:
                    self.m_PlotShapes.append(Plot2D.CONST_AUTOMATIC_SHAPE)
                if self.m_pointSizeProportionalToMargin:
                    self.m_PlotSizes.append(probActual - probNext)
                else:
                    sizeAdj = 0
                    if pred != toPredict.classValue():
                        sizeAdj = 1
                    self.m_PlotSizes.append(Plot2D.DEFAULT_SHAPE_SIZE.value +
                                            sizeAdj)
            else:
                errd = None
                if not toPredict.isMissing(toPredict.classIndex(
                )) and not Utils.isMissingValue(pred):
                    errd = pred - toPredict.classValue()
                    self.m_PlotShapes.append(Plot2D.CONST_AUTOMATIC_SHAPE)
                else:
                    self.m_PlotShapes.append(Plot2D.MISSING_SHAPE)
                self.m_PlotSizes.append(errd)