def copyStringValues(cls, inst:Instance, a0=None, a1=None, a2:AttributeLocator=None, a3:Instances=None, a4:AttributeLocator=None): if isinstance(a0,Instances) and isinstance(a1,AttributeLocator): if inst.dataset() is None: raise Exception("Instance has no dataset assigned!!") elif inst.dataset().numAttributes() != a0.numAttributes(): raise Exception("Src and Dest differ in # of attributes: " + str(inst.dataset().numAttributes()) + " != " + str(a0.numAttributes())) cls.copyStringValuesFromSrc(inst,True,inst.dataset(),a1,a0,a1) else: if a1 == a3: return if len(a2.getAttributeIndices()) != len(a4.getAttributeIndices()): raise Exception("Src and Dest string indices differ in length: " + str(len(a2.getAttributeIndices())) + " != " + str(len(a4.getAttributeIndices()))) if len(a2.getLocatorIndices()) != len(a4.getLocatorIndices()): raise Exception("Src and Dest locator indices differ in length: " + str(len(a2.getLocatorIndices())) + " != " + str(len(a4.getLocatorIndices()))) for i in range(len(a2.getAttributeIndices())): if a0: instIndex = a2.getActualIndex(a2.getAttributeIndices()[i]) else: instIndex = a4.getActualIndex(a4.getAttributeIndices()[i]) src = a1.attribute(a2.getActualIndex(a2.getAttributeIndices()[i])) dest = a3.attribute(a4.getActualIndex(a4.getAttributeIndices()[i])) if not inst.isMissing(instIndex): valIndex = dest.addStringValue(src, int(inst.value(instIndex))) inst.setValue(instIndex, valIndex)
def convertInstanceNumeric(self, instance: Instance): if not self.m_needToTransform: self.push(instance, False) return vals = [0] * self.outputFormatPeek().numAttributes() attSoFar = 0 for j in range(self.getInputFormat().numAttributes()): att = self.getInputFormat().attribute(j) if not att.isNominal() or j == self.getInputFormat().classIndex(): vals[attSoFar] = instance.value(j) attSoFar += 1 else: if instance.isMissing(j): for k in range(att.numValues() - 1): vals[attSoFar + k] = instance.value(j) else: k = 0 while int(instance.value(j)) != self.m_Indices[j][k]: vals[attSoFar + k] = 1 k += 1 while k < att.numValues() - 1: vals[attSoFar + k] = 0 k += 1 attSoFar += att.numValues() - 1 inst = Instance(instance.weight(), vals) self.copyValues(inst, False, instance.dataset(), self.outputFormatPeek()) self.push(inst)
def convertInstanceNominal(self, instance: Instance): if not self.m_needToTransform: self.push(instance, False) return vals = [0] * self.outputFormatPeek().numAttributes() attSoFar = 0 for j in range(self.getInputFormat().numAttributes()): att = self.getInputFormat().attribute(j) if not att.isNominal() or j == self.getInputFormat().classIndex(): vals[attSoFar] = instance.value(j) attSoFar += 1 else: if att.numValues() <= 2 and not self.m_TransformAll: vals[attSoFar] = instance.value(j) attSoFar += 1 else: if instance.isMissing(j): for k in range(att.numValues()): vals[attSoFar + k] = instance.value(j) else: for k in range(att.numValues()): if k == int(instance.value(j)): vals[attSoFar + k] = 1 else: vals[attSoFar + k] = 0 attSoFar += att.numValues() inst = Instance(instance.weight(), vals) self.copyValues(inst, False, instance.dataset(), self.outputFormatPeek()) self.push(inst)
def push(self, instance: Instance, copyInstance: bool = True): if instance is not None: if instance.dataset() is not None: if copyInstance: instance = copy.deepcopy(instance) self.copyValues(instance, False) instance.setDataset(self.m_OutputFormat) self.m_OutputQueue.put(instance)
def convertInstance(self,instance:Instance): inst=instance hasMissing=instance.hasMissingValue() if hasMissing: vals=[0]*self.getInputFormat().numAttributes() for j in range(instance.numAttributes()): if instance.isMissing(j) and self.getInputFormat().classIndex()!=j \ and (self.getInputFormat().attribute(j).isNominal() or self.getInputFormat().attribute(j).isNumeric()): vals[j]=self.m_ModesAndMeans[j] else: vals[j]=instance.value(j) inst=Instance(instance.weight(),vals) inst.setDataset(instance.dataset()) self.push(inst,not hasMissing)
def evaluationForSingleInstance(self, a0, instance:Instance, storePredictions:bool): if isinstance(a0,List): if self.m_ClassIsNominal: pred= Utils.maxIndex(a0) if a0[int(pred)] <= 0: pred= Utils.missingValue() self.updateStatsForClassifier(a0, instance) if storePredictions and not self.m_DiscardPredictions: if self.m_Predictions is None: self.m_Predictions=[] self.m_Predictions.append(NominalPrediction(instance.classValue(), a0, instance.weight())) else: pred=a0[0] self.updateStatsForPredictor(pred,instance) if storePredictions and not self.m_DiscardPredictions: if self.m_Predictions is None: self.m_Predictions=[] self.m_Predictions.append(NumericPrediction(instance.classValue(),pred,instance.weight())) return pred elif isinstance(a0,Classifier): classMissing=copy.deepcopy(instance) classMissing.setDataset(instance.dataset()) #TODO # if isinstance(a0,InputMappedClassifier) # else: classMissing.setClassMissing() # print("isMiss: ", instance.value(5)) pred=self.evaluationForSingleInstance(a0.distributionForInstance(classMissing),instance,storePredictions) if not self.m_ClassIsNominal: if not instance.classIsMissing() and not Utils.isMissingValue(pred): if isinstance(a0,IntervalEstimator): self.updateStatsForIntervalEstimator(a0,classMissing,instance.classValue()) else: self.m_CoverageStatisticsAvailable=False if isinstance(a0,ConditionalDensityEstimator): self.updateStatsForConditionalDensityEstimator(a0,classMissing,instance.classValue()) else: self.m_ComplexityStatisticsAvailable=False return pred
def input(self, instance: Instance): if self.getInputFormat() is None: raise Exception("No input instance format defined") if self.m_NewBatch: self.resetQueue() self.m_NewBatch = False if self.getOutputFormat().numAttributes() == 0: return False if len(self.m_SelectedAttributes) == self.getInputFormat( ).numAttributes(): inst = copy.deepcopy(instance) inst.setDataset(None) else: vals = [0] * self.getOutputFormat().numAttributes() for i in range(len(self.m_SelectedAttributes)): current = self.m_SelectedAttributes[i] vals[i] = instance.value(current) inst = Instance(instance.weight(), vals) self.copyValues(inst, False, instance.dataset(), self.outputFormatPeek()) self.push(inst) return True
def process(self, toPredict: Instance, classifier: Classifier, evaluation: Evaluation): probActual = probNext = pred = 0 classMissing = copy.deepcopy(toPredict) classMissing.setDataset(toPredict.dataset()) if toPredict.classAttribute().isNominal(): #返回分类预测的概率分布 preds = classifier.distributionForInstance(classMissing) #若概率全部为0,则表示不属于任何一类 val = 0 if sum(preds) == 0: pred = Utils.missingValue() probActual = Utils.missingValue() else: #分类结果为概率最大的一项下标 pred = Utils.maxIndex(preds) if not Utils.isMissingValue(toPredict.classIndex()): #如果值不缺失,表示非预测样本,不做修改 if not Utils.isMissingValue(toPredict.classValue()): val = int(toPredict.classValue()) probActual = preds[val] else: probActual = preds[Utils.maxIndex(preds)] for i in range(toPredict.classAttribute().numValues()): if i != val and preds[i] > probNext: probNext = preds[i] evaluation.evaluationForSingleInstance(preds, toPredict, True) else: #单项评估 pred = evaluation.evaluateModelOnceAndRecordPrediction( classifier, toPredict) if not self.m_SaveForVisualization: return #保存可视化数据 if self.m_PlotInstances is not None: isNominal = toPredict.classAttribute().isNominal() values = [0] * self.m_PlotInstances.numAttributes() i = 0 while i < self.m_PlotInstances.numAttributes(): #预测值前的所有值照原来的拷贝 if i < toPredict.classIndex(): values[i] = toPredict.value(i) elif i == toPredict.classIndex(): if isNominal: #首选结果与备选结果的差值 values[i] = probActual - probNext #预测结果 values[i + 1] = pred #原始值 values[i + 2] = toPredict.value(i) i += 2 else: values[i] = pred values[i + 1] = toPredict.value(i) i += 1 else: if isNominal: values[i] = toPredict.value(i - 2) else: values[i] = toPredict.value(i - 1) i += 1 # print("============") # for m in values: # print("val:",m) # print("============") self.m_PlotInstances.add(Instance(1.0, values)) if toPredict.classAttribute().isNominal(): if toPredict.isMissing( toPredict.classIndex()) or Utils.isMissingValue(pred): self.m_PlotShapes.append(Plot2D.MISSING_SHAPE) elif pred != toPredict.classValue(): self.m_PlotShapes.append(Plot2D.ERROR_SHAPE) else: self.m_PlotShapes.append(Plot2D.CONST_AUTOMATIC_SHAPE) if self.m_pointSizeProportionalToMargin: self.m_PlotSizes.append(probActual - probNext) else: sizeAdj = 0 if pred != toPredict.classValue(): sizeAdj = 1 self.m_PlotSizes.append(Plot2D.DEFAULT_SHAPE_SIZE.value + sizeAdj) else: errd = None if not toPredict.isMissing(toPredict.classIndex( )) and not Utils.isMissingValue(pred): errd = pred - toPredict.classValue() self.m_PlotShapes.append(Plot2D.CONST_AUTOMATIC_SHAPE) else: self.m_PlotShapes.append(Plot2D.MISSING_SHAPE) self.m_PlotSizes.append(errd)