def updateStatsForPredictor(self,predictedValue:float,instance:Instance): if not instance.classIsMissing(): self.m_WithClass+=instance.weight() if Utils.isMissingValue(predictedValue): self.m_Unclassified+=instance.weight() return self.m_SumClass+=instance.weight()*instance.classValue() self.m_SumSqrClass+=instance.weight()*instance.classValue()*instance.classValue() self.m_SumClassPredicted+=instance.weight()*instance.classValue()*predictedValue self.m_SumPredicted+=instance.weight()*predictedValue self.m_SumSqrPredicted+=instance.weight()*predictedValue*predictedValue self.updateNumericScores(self.makeDistribution(predictedValue),self.makeDistribution(instance.classValue()),instance.weight()) else: self.m_MissingClass+=instance.weight()
def add(self, bagIndex: int, instance: Instance): classIndex = int(instance.classValue()) weight = instance.weight() self.m_perClassPerBag[bagIndex][ classIndex] = self.m_perClassPerBag[bagIndex][classIndex] + weight self.m_perBag[bagIndex] = self.m_perBag[bagIndex] + weight self.m_perClass[classIndex] = self.m_perClass[classIndex] + weight self.totaL += weight
def addWeights(self, instance: Instance, weights: List): classIndex = int(instance.classValue()) for i in range(len(self.m_perBag)): weight = instance.weight() * weights[i] self.m_perClassPerBag[i][ classIndex] = self.m_perClassPerBag[i][classIndex] + weight self.m_perBag[i] = self.m_perBag[i] + weight self.m_perClass[classIndex] = self.m_perClass[classIndex] + weight self.totaL += weight
def updateStatsForClassifier(self,predictedDistribution:List,instance:Instance): actualClass=instance.classValue() if not instance.classIsMissing(): self.updateMargins(predictedDistribution,actualClass,instance.weight()) predictedClass=-1 bestProb=0 for i in range(self.m_NumClasses): if predictedDistribution[i] > bestProb: predictedClass=i bestProb=predictedDistribution[i] self.m_WithClass+=instance.weight() if predictedClass < 0: self.m_Unclassified+=instance.weight() return predictedProb=max(float('-inf'),predictedDistribution[actualClass]) priorProb=max(float('-inf'),self.m_ClassPriors[actualClass]/self.m_ClassPriorsSum) if predictedProb >= priorProb: self.m_SumKBInfo+= (Utils.log2(predictedProb) - Utils.log2(priorProb)) * instance.weight() else: self.m_SumKBInfo-= (Utils.log2(1 - predictedProb) - Utils.log2(1 - priorProb)) * instance.weight() self.m_SumSchemeEntropy-= Utils.log2(predictedProb) * instance.weight() self.m_SumPriorEntropy-= Utils.log2(priorProb) * instance.weight() self.updateNumericScores(predictedDistribution,self.makeDistribution(instance.classValue()),instance.weight()) indices= Utils.stableSort(predictedDistribution) sum=sizeOfregions=0 for i in range(len(predictedDistribution)-1,-1,-1): if sum >= self.m_ConfLevel: break sum+=predictedDistribution[indices[i]] sizeOfregions+=1 if actualClass == indices[i]: self.m_TotalCoverage+=instance.weight() self.m_TotalSizeOfRegions+=instance.weight()*sizeOfregions/(self.m_MaxTarget-self.m_MinTarget) self.m_ConfusionMatrix[actualClass][predictedClass]+=instance.weight() if predictedClass != actualClass: self.m_Incorrect+=instance.weight() else: self.m_Correct+=instance.weight() else: self.m_MissingClass+=instance.weight()
def evaluationForSingleInstance(self, a0, instance:Instance, storePredictions:bool): if isinstance(a0,List): if self.m_ClassIsNominal: pred= Utils.maxIndex(a0) if a0[int(pred)] <= 0: pred= Utils.missingValue() self.updateStatsForClassifier(a0, instance) if storePredictions and not self.m_DiscardPredictions: if self.m_Predictions is None: self.m_Predictions=[] self.m_Predictions.append(NominalPrediction(instance.classValue(), a0, instance.weight())) else: pred=a0[0] self.updateStatsForPredictor(pred,instance) if storePredictions and not self.m_DiscardPredictions: if self.m_Predictions is None: self.m_Predictions=[] self.m_Predictions.append(NumericPrediction(instance.classValue(),pred,instance.weight())) return pred elif isinstance(a0,Classifier): classMissing=copy.deepcopy(instance) classMissing.setDataset(instance.dataset()) #TODO # if isinstance(a0,InputMappedClassifier) # else: classMissing.setClassMissing() # print("isMiss: ", instance.value(5)) pred=self.evaluationForSingleInstance(a0.distributionForInstance(classMissing),instance,storePredictions) if not self.m_ClassIsNominal: if not instance.classIsMissing() and not Utils.isMissingValue(pred): if isinstance(a0,IntervalEstimator): self.updateStatsForIntervalEstimator(a0,classMissing,instance.classValue()) else: self.m_CoverageStatisticsAvailable=False if isinstance(a0,ConditionalDensityEstimator): self.updateStatsForConditionalDensityEstimator(a0,classMissing,instance.classValue()) else: self.m_ComplexityStatisticsAvailable=False return pred
def process(self, toPredict: Instance, classifier: Classifier, evaluation: Evaluation): probActual = probNext = pred = 0 classMissing = copy.deepcopy(toPredict) classMissing.setDataset(toPredict.dataset()) if toPredict.classAttribute().isNominal(): #返回分类预测的概率分布 preds = classifier.distributionForInstance(classMissing) #若概率全部为0,则表示不属于任何一类 val = 0 if sum(preds) == 0: pred = Utils.missingValue() probActual = Utils.missingValue() else: #分类结果为概率最大的一项下标 pred = Utils.maxIndex(preds) if not Utils.isMissingValue(toPredict.classIndex()): #如果值不缺失,表示非预测样本,不做修改 if not Utils.isMissingValue(toPredict.classValue()): val = int(toPredict.classValue()) probActual = preds[val] else: probActual = preds[Utils.maxIndex(preds)] for i in range(toPredict.classAttribute().numValues()): if i != val and preds[i] > probNext: probNext = preds[i] evaluation.evaluationForSingleInstance(preds, toPredict, True) else: #单项评估 pred = evaluation.evaluateModelOnceAndRecordPrediction( classifier, toPredict) if not self.m_SaveForVisualization: return #保存可视化数据 if self.m_PlotInstances is not None: isNominal = toPredict.classAttribute().isNominal() values = [0] * self.m_PlotInstances.numAttributes() i = 0 while i < self.m_PlotInstances.numAttributes(): #预测值前的所有值照原来的拷贝 if i < toPredict.classIndex(): values[i] = toPredict.value(i) elif i == toPredict.classIndex(): if isNominal: #首选结果与备选结果的差值 values[i] = probActual - probNext #预测结果 values[i + 1] = pred #原始值 values[i + 2] = toPredict.value(i) i += 2 else: values[i] = pred values[i + 1] = toPredict.value(i) i += 1 else: if isNominal: values[i] = toPredict.value(i - 2) else: values[i] = toPredict.value(i - 1) i += 1 # print("============") # for m in values: # print("val:",m) # print("============") self.m_PlotInstances.add(Instance(1.0, values)) if toPredict.classAttribute().isNominal(): if toPredict.isMissing( toPredict.classIndex()) or Utils.isMissingValue(pred): self.m_PlotShapes.append(Plot2D.MISSING_SHAPE) elif pred != toPredict.classValue(): self.m_PlotShapes.append(Plot2D.ERROR_SHAPE) else: self.m_PlotShapes.append(Plot2D.CONST_AUTOMATIC_SHAPE) if self.m_pointSizeProportionalToMargin: self.m_PlotSizes.append(probActual - probNext) else: sizeAdj = 0 if pred != toPredict.classValue(): sizeAdj = 1 self.m_PlotSizes.append(Plot2D.DEFAULT_SHAPE_SIZE.value + sizeAdj) else: errd = None if not toPredict.isMissing(toPredict.classIndex( )) and not Utils.isMissingValue(pred): errd = pred - toPredict.classValue() self.m_PlotShapes.append(Plot2D.CONST_AUTOMATIC_SHAPE) else: self.m_PlotShapes.append(Plot2D.MISSING_SHAPE) self.m_PlotSizes.append(errd)