Example #1
0
 def resetDistribution(self, data: Instances):
     insts = Instances(data, data.numInstances())
     for i in range(data.numInstances()):
         if self.whichSubset(data.instance(i)) > -1:
             insts.add(data.instance(i))
     newD = Distribution(insts, self)
     newD.addInstWithUnknown(data, self.m_attIndex)
     self.m_distribution = newD
Example #2
0
 def kNearestNeighbours(self, target: Instance, kNN: int) -> Instances:
     if self.m_Stats is not None:
         self.m_Stats.searchStart()
     heap = MyHeap(kNN)
     firstkNN = 0
     for i in range(self.m_Instances.numInstances()):
         if target == self.m_Instances.instance(i):
             continue
         if self.m_Stats is not None:
             self.m_Stats.incrPointCount()
         if firstkNN < kNN:
             distance = self.m_DistanceFunction.distance(
                 target, self.m_Instances.instance(i), float("inf"),
                 self.m_Stats)
             if distance == 0 and self.m_SkipIdentical and i < self.m_Instances.numInstances(
             ) - 1:
                 continue
             heap.put(i, distance)
             firstkNN += 1
         else:
             temp = heap.peek()
             distance = self.m_DistanceFunction.distance(
                 target, self.m_Instances.instance(i), temp.distance,
                 self.m_Stats)
             if distance == 0 and self.m_SkipIdentical:
                 continue
             if distance < temp.distance:
                 heap.putBySubstitute(i, distance)
             elif distance == temp.distance:
                 heap.putKthNearest(i, distance)
     neighbours = Instances(self.m_Instances,
                            heap.size() + heap.noOfKthNearest())
     self.m_Distances = [0] * (heap.size() + heap.noOfKthNearest())
     indices = [0] * (heap.size() + heap.noOfKthNearest())
     i = 1
     while heap.noOfKthNearest() > 0:
         h = heap.getKthNearest()
         indices[len(indices) - i] = h.index
         self.m_Distances[len(indices) - i] = h.distance
         i += 1
     while heap.size() > 0:
         h = heap.get()
         indices[len(indices) - i] = h.index
         self.m_Distances[len(indices) - i] = h.distance
         i += 1
     self.m_DistanceFunction.postProcessDistances(self.m_Distances)
     for k in range(len(indices)):
         neighbours.add(self.m_Instances.instance(indices[k]))
     if self.m_Stats is not None:
         self.m_Stats.searchStart()
     return neighbours
Example #3
0
 def saveVisibleInstances(self):
     plots = self.m_plot.m_plot2D.getPlots()
     if plots is not None:
         master = plots[0]
         saveInsts = Instances(master.getPlotInstances())
         for i in range(1, len(plots)):
             temp = plots[i]
             addInsts = temp.getPlotInstances()
             for j in range(addInsts.numInstances()):
                 saveInsts.add(addInsts.instance(j))
         # for ins in saveInsts:
         #     for i in range(saveInsts.numAttributes()):
         #         print(",",ins.value(i),end="")
         #     print()
         # print(saveInsts.toArffString())
         filename = QFileDialog.getSaveFileName(self, '保存文件', '/',
                                                'Arff data files(*.arff)')
         with open(filename[0], 'w') as f:
             text = saveInsts.toArffString()
             f.write(text)
Example #4
0
class SimpleKMeans(RandomizableClusterer):
    methodList = {"NumClusters":"setNumClusters","DontReplaceMissing":"setDontReplaceMissing",
                  "Seed":"setSeedDefault"}
    propertyList = {"NumClusters":"2","DontReplaceMissing":"False","Seed":"10"}
    def __init__(self):
        super().__init__()
        self.NumClusters=2
        self.DontReplaceMissing=False
        self.m_MaxIterations=500
        self.m_Iterations=0
        self.m_PreserveOrder=False
        self.m_FastDistanceCalc=False
        self.Seed=10
        self.m_speedUpDistanceCompWithCanopies=False
        self.m_maxCanopyCandidates=100
        self.m_minClusterDensity=2
        self.m_periodicPruningRate=10000
        self.setSeed(self.Seed)
        self.m_ClusterNominalCounts=None        #type:List[List[List[float]]]
        self.m_ClusterMissingCounts=None        #type:List[List[float]]
        self.m_FullMeansOrMediansOrModes=None   #type:List[float]
        self.m_FullStdDevs=None #type:List[float]
        self.m_FullNominalCounts=None       #type:List[List[float]]
        self.m_FullMissingCounts=None       #type:List[float]
        self.m_ClusterCentroids=None        #type:Instances
        self.m_initialStartPoints=None      #type:Instances
        self.m_executionSlots=1
        self.m_ClusterSizes=[]        #type:List[float]
        self.m_squaredErrors=[]       #type:List[float]
        self.m_DistanceFunction=EuclideanDistance()

    def __str__(self):
        if self.m_ClusterCentroids is None:
            return "No clusterer built yet!"
        maxAttWidth=0
        maxWidth=0

        for i in range(self.NumClusters):
            for j in range(self.m_ClusterCentroids.numAttributes()):
                if len(self.m_ClusterCentroids.attribute(j).name())>maxAttWidth:
                    maxAttWidth=len(self.m_ClusterCentroids.attribute(j).name())
                if self.m_ClusterCentroids.attribute(j).isNumeric():
                    try:
                        width=math.log(math.fabs(self.m_ClusterCentroids.instance(i).value(j)))/math.log(10)
                    except ValueError:
                        width=float('-inf')
                    if width<0:
                        width=1
                    width+=6
                    if int(width) > maxWidth:
                        maxWidth=int(width)
        for i in range(self.m_ClusterCentroids.numAttributes()):
            if self.m_ClusterCentroids.attribute(i).isNominal():
                a=self.m_ClusterCentroids.attribute(i)
                for j in range(self.m_ClusterCentroids.numInstances()):
                    val=a.value(int(self.m_ClusterCentroids.instance(j).value(i)))
                    if len(val)>maxWidth:
                        maxWidth=len(val)
                for j in range(a.numValues()):
                    val=a.value(j)+" "
                    if len(val)>maxAttWidth:
                        maxAttWidth=len(val)
        for m_ClusterSize in self.m_ClusterSizes:
            size="("+str(m_ClusterSize)+")"
            if len(size)>maxWidth:
                maxWidth=len(size)
        plusMinus="+/-"
        maxAttWidth+=2
        if maxAttWidth<len("Attribute")+2:
            maxAttWidth=len("Attribute")+2
        if maxWidth<len("Full Data"):
            maxWidth=len("Full Data")+1
        if maxWidth<len("missing"):
            maxWidth=len("missing")+1
        temp="\nkMeans\n======\n"
        temp+="\nNumber of iterations: " + str(self.m_Iterations)
        if not self.m_FastDistanceCalc:
            temp+='\n'
            temp+="Within cluster sum of squared errors: "+ str(sum(self.m_squaredErrors))
        temp+="\n\nInitial starting points (random):\n"
        temp+='\n'
        for i in range(self.m_initialStartPoints.numInstances()):
            temp+="Cluster " + str(i) + ": " + str(self.m_initialStartPoints.instance(i))+"\n"
        temp+="\nMissing values globally replaced with mean/mode"
        temp+="\n\nFinal cluster centroids:\n"
        temp+=self.pad("Cluster#", " ", (maxAttWidth + (maxWidth * 2 + 2))- len("Cluster#"), True)
        temp+='\n'
        temp+=self.pad("Attribute", " ", maxAttWidth - len("Attribute"), False)
        temp+=self.pad("Full Data", " ", maxWidth + 1 - len("Full Data"), True)
        for i in range(self.NumClusters):
            clustNum=str(i)
            temp+=self.pad(clustNum, " ", maxWidth + 1 - len(clustNum), True)
        temp+='\n'
        cSize="(" + str(sum(self.m_ClusterSizes)) + ")"
        temp+=self.pad(cSize, " ", maxAttWidth + maxWidth + 1 - len(cSize),True)
        for i in range(self.NumClusters):
            cSize="(" + str(self.m_ClusterSizes[i]) + ")"
            temp+=self.pad(cSize, " ", maxWidth + 1 - len(cSize), True)
        temp+='\n'
        temp+=self.pad("", "=",maxAttWidth+ (maxWidth * (self.m_ClusterCentroids.numInstances() + 1)
                    + self.m_ClusterCentroids.numInstances() + 1), True)
        temp+='\n'
        for i in range(self.m_ClusterCentroids.numAttributes()):
            attName=self.m_ClusterCentroids.attribute(i).name()
            temp+=attName
            for j in range(maxAttWidth-len(attName)):
                temp+=" "
            if self.m_ClusterCentroids.attribute(i).isNominal():
                if self.m_FullMeansOrMediansOrModes[i] == -1:
                    valMeanMode=self.pad("missing", " ", maxWidth + 1 - len("missing"), True)
                else:
                    strVal=self.m_ClusterCentroids.attribute(i).value(int(self.m_FullMeansOrMediansOrModes[i]))
                    valMeanMode=self.pad(strVal," ",maxWidth+1-len(strVal),True)
            else:
                if math.isnan(self.m_FullMeansOrMediansOrModes[i]):
                    valMeanMode=self.pad("missing", " ", maxWidth + 1 - len("missing"), True)
                else:
                    strVal= Utils.doubleToString(self.m_FullMeansOrMediansOrModes[i], maxWidth, 4).strip()
                    valMeanMode=self.pad(strVal," ",maxWidth+1-len(strVal),True)
            temp+=valMeanMode
            for j in range(self.NumClusters):
                if self.m_ClusterCentroids.attribute(i).isNominal():
                    if self.m_ClusterCentroids.instance(j).isMissing(i):
                        valMeanMode=self.pad("missing", " ", maxWidth + 1 - len("missing"), True)
                    else:
                        strVal=self.m_ClusterCentroids.attribute(i).value(int(self.m_ClusterCentroids.instance(j).value(i)))
                        valMeanMode=self.pad(strVal," ",maxWidth+1-len(strVal),True)
                else:
                    if self.m_ClusterCentroids.instance(j).isMissing(i):
                        valMeanMode=self.pad("missing", " ", maxWidth + 1 - len("missing"), True)
                    else:
                        strVal= Utils.doubleToString(self.m_ClusterCentroids.instance(j).value(i), maxWidth, 4).strip()
                        valMeanMode=self.pad(strVal," ",maxWidth+1-len(strVal),True)
                temp+=valMeanMode
            temp+='\n'
        temp+='\n\n'
        return temp

    def clusterInstance(self,instance:Instance):
        self.m_ReplaceMissingFilter.input(instance)
        self.m_ReplaceMissingFilter.batchFinished()
        inst=self.m_ReplaceMissingFilter.output()
        return self.clusterProcessedInstance(inst,False,True)

    def setNumClusters(self,value:str):
        try:
            val=int(value)
            self.NumClusters=int(val)
            self.propertyList.update({"NumClusters":value})
        except ValueError:
            pass

    def setDontReplaceMissing(self,value:int):
        if value == 0:
            self.DontReplaceMissing=False
        else:
            self.DontReplaceMissing=True

    def pad(self,source:str,padChar:str,length:int,leftPad:bool):
        temp=""
        if leftPad:
            for i in range(length):
                temp+=padChar
            temp+=source
        else:
            temp+=source
            for i in range(length):
                temp+=padChar
        return temp


    def getCapabilities(self)->Capabilities:
        result=super().getCapabilities()
        result.disableAll()
        result.enable(CapabilityEnum.NO_CLASS)
        result.enable(CapabilityEnum.NOMINAL_ATTRIBUTES)
        result.enable(CapabilityEnum.NUMERIC_ATTRIBUTES)
        result.enable(CapabilityEnum.MISSING_VALUES)
        return result

    def buildClusterer(self,data:Instances):
        self.getCapabilities().testWithFail(data)
        self.m_Iterations=0
        #调用筛选器替换缺失值,Numeric使用平均值代替,Nominal使用出现次数最多的值代替
        self.m_ReplaceMissingFilter=ReplaceMissingValues()
        instances=Instances(data)
        instances.setClassIndex(-1)
        self.m_ReplaceMissingFilter.setInputFormat(instances)
        instances=Filter.useFilter(instances,self.m_ReplaceMissingFilter)

        #保存每个簇的样本属性值频率,m_ClusterNominalCounts是个3维,1维n个簇,2维属性类,3维属性值频率
        self.m_ClusterNominalCounts=[[[] for i in range(instances.numAttributes())] for j in range(self.NumClusters)]
        #每个簇不同属性缺失值频率
        self.m_ClusterMissingCounts=[[0]*instances.numAttributes() for  i in range(self.NumClusters)]

        #移动质心
        self.m_FullMeansOrMediansOrModes=self.moveCentroid(0,instances,True,False)
        #整个样本集的属性缺失率
        self.m_FullMissingCounts=self.m_ClusterMissingCounts[0]
        self.m_FullNominalCounts=self.m_ClusterNominalCounts[0]
        sumofWeights=instances.sumOfWeight()
        for i in range(instances.numAttributes()):
            if instances.attribute(i).isNumeric():
                if self.m_FullMissingCounts[i] == sumofWeights:
                    self.m_FullMeansOrMediansOrModes[i]=float('nan')
            else:
                if self.m_FullMissingCounts[i]>self.m_FullNominalCounts[i][Utils.maxIndex(self.m_FullNominalCounts[i])]:
                    self.m_FullMeansOrMediansOrModes[i]=-1
        self.m_ClusterCentroids=Instances(instances,self.NumClusters)
        clusterAssignments=[0]*instances.numInstances()
        self.m_DistanceFunction.setInstances(instances)
        random.seed(self.getSeed())
        initC=dict()        #type:Dict[DecisionTableHashKey,int]
        initInstances=instances

        for j in range(initInstances.numInstances()-1,-1,-1):
            instIndex=random.randint(0,j)
            hk=DecisionTableHashKey(initInstances.instance(instIndex),initInstances.numAttributes(),True)
            if hk not in initC:
                self.m_ClusterCentroids.add(initInstances.instance(instIndex))
                initC.update({hk:None})
            initInstances.swap(j,instIndex)
            if self.m_ClusterCentroids.numInstances() == self.NumClusters:
                break

        self.m_initialStartPoints=Instances(self.m_ClusterCentroids)
        self.NumClusters=self.m_ClusterCentroids.numInstances()
        converged=False
        tempI=[]    #type:List[Instances]
        self.m_squaredErrors=[0]*self.NumClusters
        self.m_ClusterNominalCounts=[[[] for i in range(instances.numAttributes())] for j in range(self.NumClusters)]
        self.m_ClusterMissingCounts=[[0]*instances.numAttributes() for  i in range(self.NumClusters)]
        #循环更新质心
        while not converged:
            emptyClusterCount=0
            self.m_Iterations+=1
            converged=True
            if self.m_executionSlots<=1 or instances.numInstances() <2*self.m_executionSlots:
                for i in range(instances.numInstances()):
                    toCluster=instances.instance(i)
                    newC=self.clusterProcessedInstance(toCluster,False,True)
                    if newC != clusterAssignments[i]:
                        converged=False
                    clusterAssignments[i]=newC
            self.m_ClusterCentroids=Instances(instances,self.NumClusters)
            for i in range(self.NumClusters):
                tempI.append(Instances(instances,0))
            for i in range(instances.numInstances()):
                tempI[clusterAssignments[i]].add(instances.instance(i))
            for i in range(self.NumClusters):
                if tempI[i].numInstances() == 0:
                    emptyClusterCount+=1
                else:
                    self.moveCentroid(i,tempI[i],True,True)
            if self.m_Iterations == self.m_MaxIterations:
                converged=True
            if emptyClusterCount>0:
                self.NumClusters-=emptyClusterCount
                if converged:
                    t=[None]*self.NumClusters   #type:List[Instances]
                    index=0
                    for k in range(len(tempI)):
                        if tempI[k].numInstances()>0:
                            t[index]=tempI[k]
                            for i in range(tempI[k].numAttributes()):
                                self.m_ClusterNominalCounts[index][i]=self.m_ClusterNominalCounts[k][i]
                            index+=1
                    tempI=t
                else:
                    tempI=[None]*self.NumClusters
            if not converged:
                self.m_ClusterNominalCounts=[[[] for i in range(instances.numAttributes())] for j in range(self.NumClusters)]
        if not self.m_FastDistanceCalc:
            for i in range(instances.numInstances()):
                self.clusterProcessedInstance(instances.instance(i),True,False)

        # for i in self.m_squaredErrors:
        #     print("squ:",i)
        self.m_ClusterSizes=[]
        for i in range(self.NumClusters):
            self.m_ClusterSizes.append(tempI[i].sumOfWeight())
        self.m_DistanceFunction.clean()

    def numberOfClusters(self):
        return self.NumClusters

    def setSeedDefault(self,value:str):
        try:
            val=int(value)
            self.Seed=val
            self.propertyList.update({"Seed":value})
        except ValueError:
            pass

    def clusterProcessedInstance(self,instance:Instance,updateErrors:bool,useFastDistCalc:bool):
        minDist=float('inf')
        bestCluster=0
        for i in range(self.NumClusters):
            if useFastDistCalc:
                dist=self.m_DistanceFunction.distance(instance,self.m_ClusterCentroids.instance(i),minDist)
            else:
                dist=self.m_DistanceFunction.distance(instance,self.m_ClusterCentroids.instance(i))
            if dist<minDist:
                minDist=dist
                bestCluster=i
        if updateErrors:
            minDist*=minDist*instance.weight()
            self.m_squaredErrors[bestCluster]+=minDist
        # print("bestCluster:  ",bestCluster)
        return bestCluster

    def moveCentroid(self,centroidIndex:int,members:Instances,updateClusterInfo:bool,addToCentroidInstances:bool):
        vals=[0]*members.numAttributes()
        nominalDists=[[] for i in range(members.numAttributes())]
        weightMissing=[0]*members.numAttributes()
        weightNonMissing=[0]*members.numAttributes()
        for j in range(members.numAttributes()):
            if members.attribute(j).isNominal():
                nominalDists[j]=[0]*members.attribute(j).numValues()
        for inst in members:
            for j in range(members.numAttributes()):
                if inst.isMissing(j):
                    weightMissing[j]+=inst.weight()
                else:
                    weightNonMissing[j]+=inst.weight()
                    if members.attribute(j).isNumeric():
                        vals[j]+=inst.weight()*inst.value(j)
                    else:
                        nominalDists[j][int(inst.value(j))]+=inst.weight()
        for j in range(members.numAttributes()):
            if members.attribute(j).isNumeric():
                if weightNonMissing[j]>0:
                    vals[j]/=weightNonMissing[j]
                else:
                    vals[j]= Utils.missingValue()
            else:
                max=float('-inf')
                maxIndex=-1
                for i in range(len(nominalDists[j])):
                    if nominalDists[j][i]>max:
                        max=nominalDists[j][i]
                        maxIndex=i
                    if max < weightMissing[j]:
                        vals[j]= Utils.missingValue()
                    else:
                        vals[j]=maxIndex
        if updateClusterInfo:
            for j in range(members.numAttributes()):
                self.m_ClusterMissingCounts[centroidIndex][j]=weightMissing[j]
                self.m_ClusterNominalCounts[centroidIndex][j]=nominalDists[j]
        if addToCentroidInstances:
            self.m_ClusterCentroids.add(Instance(1.0,vals))
        return vals
Example #5
0
class ClassifierErrorsPlotInstances(AbstractPlotInstances):
    def __init__(self):
        super().__init__()

    def initialize(self):
        super().initialize()
        self.m_PlotShapes = []  #type:List[int]
        self.m_PlotSizes = []  #type:List[object]
        self.m_Classifier = None  #type:Classifier
        self.m_ClassIndex = -1
        self.m_Evaluation = None  #type:Evaluation
        self.m_SaveForVisualization = True
        self.m_MinimumPlotSizeNumeric = 30
        self.m_MaximumPlotSizeNumeric = 200

    def setClassifier(self, value: Classifier):
        self.m_Classifier = value

    def setClassIndex(self, index: int):
        self.m_ClassIndex = index

    def setPointSizeProportionalToMargin(self, b: bool):
        self.m_pointSizeProportionalToMargin = b

    def setEvaluation(self, value: Evaluation):
        self.m_Evaluation = value

    def determineFormat(self):
        margin = None  #type:Attribute
        if not self.m_SaveForVisualization:
            self.m_PlotInstances = None
            return
        hv = []  #type:List[Attribute]
        classAt = self.m_Instances.attribute(self.m_ClassIndex)
        if classAt.isNominal():
            attVals = []
            for i in range(classAt.numValues()):
                attVals.append(classAt.value(i))
            predictedClass = Attribute("predicted " + classAt.name(), attVals)
            margin = Attribute("prediction margin")
        else:
            predictedClass = Attribute("predicted" + classAt.name())
        for i in range(self.m_Instances.numAttributes()):
            if i == self.m_Instances.classIndex():
                if classAt.isNominal():
                    hv.append(margin)
                hv.append(predictedClass)
            hv.append(self.m_Instances.attribute(i).copy())
        #添加预测属性
        self.m_PlotInstances = Instances(
            self.m_Instances.relationName() + "_predicted", hv,
            self.m_Instances.numInstances())
        if classAt.isNominal():
            self.m_PlotInstances.setClassIndex(self.m_ClassIndex + 2)
        else:
            self.m_PlotInstances.setClassIndex(self.m_ClassIndex + 1)

    def process(self, toPredict: Instance, classifier: Classifier,
                evaluation: Evaluation):
        probActual = probNext = pred = 0
        classMissing = copy.deepcopy(toPredict)
        classMissing.setDataset(toPredict.dataset())

        if toPredict.classAttribute().isNominal():
            #返回分类预测的概率分布
            preds = classifier.distributionForInstance(classMissing)
            #若概率全部为0,则表示不属于任何一类
            val = 0
            if sum(preds) == 0:
                pred = Utils.missingValue()
                probActual = Utils.missingValue()
            else:
                #分类结果为概率最大的一项下标
                pred = Utils.maxIndex(preds)
                if not Utils.isMissingValue(toPredict.classIndex()):
                    #如果值不缺失,表示非预测样本,不做修改
                    if not Utils.isMissingValue(toPredict.classValue()):
                        val = int(toPredict.classValue())
                    probActual = preds[val]
                else:
                    probActual = preds[Utils.maxIndex(preds)]
            for i in range(toPredict.classAttribute().numValues()):
                if i != val and preds[i] > probNext:
                    probNext = preds[i]
            evaluation.evaluationForSingleInstance(preds, toPredict, True)
        else:
            #单项评估
            pred = evaluation.evaluateModelOnceAndRecordPrediction(
                classifier, toPredict)
        if not self.m_SaveForVisualization:
            return
        #保存可视化数据
        if self.m_PlotInstances is not None:
            isNominal = toPredict.classAttribute().isNominal()
            values = [0] * self.m_PlotInstances.numAttributes()
            i = 0
            while i < self.m_PlotInstances.numAttributes():
                #预测值前的所有值照原来的拷贝
                if i < toPredict.classIndex():
                    values[i] = toPredict.value(i)
                elif i == toPredict.classIndex():
                    if isNominal:
                        #首选结果与备选结果的差值
                        values[i] = probActual - probNext
                        #预测结果
                        values[i + 1] = pred
                        #原始值
                        values[i + 2] = toPredict.value(i)
                        i += 2
                    else:
                        values[i] = pred
                        values[i + 1] = toPredict.value(i)
                        i += 1
                else:
                    if isNominal:
                        values[i] = toPredict.value(i - 2)
                    else:
                        values[i] = toPredict.value(i - 1)
                i += 1
            # print("============")
            # for m in values:
            #     print("val:",m)
            # print("============")
            self.m_PlotInstances.add(Instance(1.0, values))
            if toPredict.classAttribute().isNominal():
                if toPredict.isMissing(
                        toPredict.classIndex()) or Utils.isMissingValue(pred):
                    self.m_PlotShapes.append(Plot2D.MISSING_SHAPE)
                elif pred != toPredict.classValue():
                    self.m_PlotShapes.append(Plot2D.ERROR_SHAPE)
                else:
                    self.m_PlotShapes.append(Plot2D.CONST_AUTOMATIC_SHAPE)
                if self.m_pointSizeProportionalToMargin:
                    self.m_PlotSizes.append(probActual - probNext)
                else:
                    sizeAdj = 0
                    if pred != toPredict.classValue():
                        sizeAdj = 1
                    self.m_PlotSizes.append(Plot2D.DEFAULT_SHAPE_SIZE.value +
                                            sizeAdj)
            else:
                errd = None
                if not toPredict.isMissing(toPredict.classIndex(
                )) and not Utils.isMissingValue(pred):
                    errd = pred - toPredict.classValue()
                    self.m_PlotShapes.append(Plot2D.CONST_AUTOMATIC_SHAPE)
                else:
                    self.m_PlotShapes.append(Plot2D.MISSING_SHAPE)
                self.m_PlotSizes.append(errd)

    def createPlotData(self, name: str):
        if not self.m_SaveForVisualization:
            return None
        result = PlotData2D(self.m_PlotInstances)
        result.setShapeSize(self.m_PlotSizes)
        result.setShapeType(self.m_PlotShapes)
        result.setPlotName(name + " (" + self.m_Instances.relationName() + ")")
        return result

    def finishUp(self):
        super().finishUp()
        if not self.m_SaveForVisualization:
            return
        if self.m_Instances.classAttribute().isNumeric(
        ) or self.m_pointSizeProportionalToMargin:
            self.scaleNumericPredictions()

    def scaleNumericPredictions(self):
        maxErr = float("-inf")
        minErr = float("inf")
        if self.m_Instances.classAttribute().isNominal():
            maxErr = 1
            minErr = 0
        else:
            for i in range(len(self.m_PlotSizes)):
                errd = self.m_PlotSizes[i]
                if errd is not None:
                    err = abs(errd)
                    if err < minErr:
                        minErr = err
                    if err > maxErr:
                        maxErr = err
        for i in range(len(self.m_PlotSizes)):
            errd = self.m_PlotSizes[i]
            if errd is not None:
                err = abs(errd)
                if maxErr - minErr > 0:
                    temp = ((err - minErr) / (maxErr - minErr)) * (
                        self.m_MaximumPlotSizeNumeric -
                        self.m_MinimumPlotSizeNumeric + 1)
                    self.m_PlotSizes[i] = int(
                        temp) + self.m_MinimumPlotSizeNumeric
                else:
                    self.m_PlotSizes[i] = self.m_MinimumPlotSizeNumeric
            else:
                self.m_PlotSizes[i] = self.m_MinimumPlotSizeNumeric

    def cleanUp(self):
        super().cleanUp()
        self.m_Classifier = None
        self.m_PlotShapes = None
        self.m_PlotSizes = None
        self.m_Evaluation = None