def distributionForInstance(self,instance:Instance)->List[float]: dist=[0]*instance.numClasses() if instance.classAttribute().type() == Attribute.NOMINAL: classification=self.classifyInstance(instance) if Utils.isMissingValue(classification): return dist else: dist[int(classification)]=1.0 return dist elif instance.classAttribute().type() == Attribute.NUMERIC or instance.classAttribute().type() == Attribute.DATE: dist[0]=self.classifyInstance(instance) return dist return dist
def classifyInstance(self,instance:Instance): dist=self.distributionForInstance(instance) if dist is None: raise Exception("Null distribution predicted") if instance.classAttribute().type() == Attribute.NOMINAL: max=maxIndex=0 for i in range(len(dist)): if dist[i]>max: maxIndex=i max=dist[i] if max > 0: return maxIndex return Utils.missingValue() elif instance.classAttribute().type() == Attribute.NUMERIC or instance.classAttribute().type() == Attribute.DATE: return dist[0] return Utils.missingValue()
def process(self, toPredict: Instance, classifier: Classifier, evaluation: Evaluation): probActual = probNext = pred = 0 classMissing = copy.deepcopy(toPredict) classMissing.setDataset(toPredict.dataset()) if toPredict.classAttribute().isNominal(): #返回分类预测的概率分布 preds = classifier.distributionForInstance(classMissing) #若概率全部为0,则表示不属于任何一类 val = 0 if sum(preds) == 0: pred = Utils.missingValue() probActual = Utils.missingValue() else: #分类结果为概率最大的一项下标 pred = Utils.maxIndex(preds) if not Utils.isMissingValue(toPredict.classIndex()): #如果值不缺失,表示非预测样本,不做修改 if not Utils.isMissingValue(toPredict.classValue()): val = int(toPredict.classValue()) probActual = preds[val] else: probActual = preds[Utils.maxIndex(preds)] for i in range(toPredict.classAttribute().numValues()): if i != val and preds[i] > probNext: probNext = preds[i] evaluation.evaluationForSingleInstance(preds, toPredict, True) else: #单项评估 pred = evaluation.evaluateModelOnceAndRecordPrediction( classifier, toPredict) if not self.m_SaveForVisualization: return #保存可视化数据 if self.m_PlotInstances is not None: isNominal = toPredict.classAttribute().isNominal() values = [0] * self.m_PlotInstances.numAttributes() i = 0 while i < self.m_PlotInstances.numAttributes(): #预测值前的所有值照原来的拷贝 if i < toPredict.classIndex(): values[i] = toPredict.value(i) elif i == toPredict.classIndex(): if isNominal: #首选结果与备选结果的差值 values[i] = probActual - probNext #预测结果 values[i + 1] = pred #原始值 values[i + 2] = toPredict.value(i) i += 2 else: values[i] = pred values[i + 1] = toPredict.value(i) i += 1 else: if isNominal: values[i] = toPredict.value(i - 2) else: values[i] = toPredict.value(i - 1) i += 1 # print("============") # for m in values: # print("val:",m) # print("============") self.m_PlotInstances.add(Instance(1.0, values)) if toPredict.classAttribute().isNominal(): if toPredict.isMissing( toPredict.classIndex()) or Utils.isMissingValue(pred): self.m_PlotShapes.append(Plot2D.MISSING_SHAPE) elif pred != toPredict.classValue(): self.m_PlotShapes.append(Plot2D.ERROR_SHAPE) else: self.m_PlotShapes.append(Plot2D.CONST_AUTOMATIC_SHAPE) if self.m_pointSizeProportionalToMargin: self.m_PlotSizes.append(probActual - probNext) else: sizeAdj = 0 if pred != toPredict.classValue(): sizeAdj = 1 self.m_PlotSizes.append(Plot2D.DEFAULT_SHAPE_SIZE.value + sizeAdj) else: errd = None if not toPredict.isMissing(toPredict.classIndex( )) and not Utils.isMissingValue(pred): errd = pred - toPredict.classValue() self.m_PlotShapes.append(Plot2D.CONST_AUTOMATIC_SHAPE) else: self.m_PlotShapes.append(Plot2D.MISSING_SHAPE) self.m_PlotSizes.append(errd)