def makeDistribution(self,neighbours:Instances,distances:List)->List[float]: distribution=[0]*self.m_NumClasses total=0 if self.m_ClassType == Attribute.NOMINAL: for i in range(self.m_NumClasses): distribution[i]=1/max(1,self.m_Train.numInstances()) total=self.m_NumClasses/max(1,self.m_Train.numInstances()) for i in range(neighbours.numInstances()): current=neighbours.instance(i) distances[i]=distances[i]*distances[i] distances[i]=math.sqrt(distances[i]/self.m_NumAttributesUsed) if self.DistanceWeighting == self.WEIGHT_INVERSE: weight=1/distances[i] elif self.DistanceWeighting == self.WEIGHT_SIMILARITY: weight=1-distances[i] else: weight=1 weight*=current.weight() if self.m_ClassType == Attribute.NOMINAL: distribution[int(current.classValue())]+=weight elif self.m_ClassType == Attribute.NUMERIC: distribution[0]+=current.classValue()*weight total+=weight if total > 0: Utils.normalize(distribution, total) return distribution
def initialize(self, data: Instances, type: int, indices: List[int]): self.m_Data = Instances(data, 0) self.m_Type = type self.m_AllowedIndices = copy.deepcopy(indices) self.locate() self.m_Indices = self.find(True) self.m_LocatorIndices = self.find(False)
def setAttributes(self, inst: Instances, pos: int = -1): flo = QFormLayout() flo.setLabelAlignment(Qt.AlignRight) flo.setContentsMargins(20, 20, 20, 20) flo.setSpacing(15) self.m_Instance = inst self.m_WidgetList = [] self.m_InsertPos = pos for i in range(inst.numAttributes()): attr = inst.attribute(i) label = QLabel(attr.name()) if attr.isNominal(): edit = QComboBox() edit.addItem("") edit.addItems(attr.values()) elif attr.isNumeric(): edit = QLineEdit() edit.setPlaceholderText("输入数字") pDoubleValidator = QDoubleValidator(self) edit.setValidator(pDoubleValidator) else: edit = QLineEdit() self.m_WidgetList.append(edit) flo.addRow(label, edit) hlayout = QHBoxLayout() submit = QPushButton("提交") submit.clicked.connect(self.submitClick) cancel = QPushButton("取消") cancel.clicked.connect(self.close) hlayout.addWidget(submit) hlayout.addWidget(cancel) widget = QWidget() widget.setLayout(hlayout) flo.addRow(widget) self.setLayout(flo)
def determineFormat(self): margin = None #type:Attribute if not self.m_SaveForVisualization: self.m_PlotInstances = None return hv = [] #type:List[Attribute] classAt = self.m_Instances.attribute(self.m_ClassIndex) if classAt.isNominal(): attVals = [] for i in range(classAt.numValues()): attVals.append(classAt.value(i)) predictedClass = Attribute("predicted " + classAt.name(), attVals) margin = Attribute("prediction margin") else: predictedClass = Attribute("predicted" + classAt.name()) for i in range(self.m_Instances.numAttributes()): if i == self.m_Instances.classIndex(): if classAt.isNominal(): hv.append(margin) hv.append(predictedClass) hv.append(self.m_Instances.attribute(i).copy()) #添加预测属性 self.m_PlotInstances = Instances( self.m_Instances.relationName() + "_predicted", hv, self.m_Instances.numInstances()) if classAt.isNominal(): self.m_PlotInstances.setClassIndex(self.m_ClassIndex + 2) else: self.m_PlotInstances.setClassIndex(self.m_ClassIndex + 1)
def getROCArea(cls, tcurve: Instances): n = tcurve.numInstances() if cls.RELATION_NAME != tcurve.relationName() or n == 0: return float('nan') tpInd = tcurve.attribute(cls.TRUE_POS_NAME).index() fpInd = tcurve.attribute(cls.FALSE_POS_NAME).index() tpVals = tcurve.attributeToDoubleArray(tpInd) fpVals = tcurve.attributeToDoubleArray(fpInd) area = cumNeg = 0 totalPos = tpVals[0] totalNeg = fpVals[0] for i in range(n): if i < n - 1: cip = tpVals[i] - tpVals[i + 1] cin = fpVals[i] - fpVals[i + 1] else: cip = tpVals[n - 1] cin = fpVals[n - 1] area += cip * (cumNeg + (0.5 * cin)) cumNeg += cin if totalNeg * totalPos == 0: if area == 0: return float("nan") elif area > 0: return float("inf") else: return float("-inf") area /= (totalNeg * totalPos) return area
def __init__(self,data:Instances): self.m_Header=Instances(data,0) self.m_NumClasses=data.numClasses() self.m_NumFolds=1 self.m_metricsToDisplay=[] self.m_ClassIsNominal=data.classAttribute().isNominal() self.m_WithClass=0 self.m_Unclassified=0 self.m_SumKBInfo=0 self.m_SumSchemeEntropy=0 self.m_SumPriorEntropy=0 self.m_SumErr=self.m_SumAbsErr=self.m_SumSqrErr=self.m_SumPriorAbsErr=self.m_SumPriorSqrErr=0 self.m_ConfLevel=0.95 self.m_TotalCoverage=self.m_TotalSizeOfRegions=0 self.m_MissingClass=0 self.m_Incorrect=self.m_Correct=0 self.m_DiscardPredictions=False self.m_CoverageStatisticsAvailable=True self.m_ComplexityStatisticsAvailable=True self.m_SumClass=self.m_SumSqrClass=self.m_SumPredicted=self.m_SumSqrPredicted=self.m_SumClassPredicted=0 self.m_Predictions=None #type:List[Prediction] if self.m_ClassIsNominal: self.m_ConfusionMatrix=[[0]*self.m_NumClasses for i in range(self.m_NumClasses)] #type:List[List[float]] self.m_ClassNames=[] #type:List[str] for i in range(self.m_NumClasses): self.m_ClassNames.append(data.classAttribute().value(i)) self.m_ClassPriors=[0]*self.m_NumClasses #type:List[float] self.setPriors(data) self.m_MarginCounts=[0]*(self.k_MarginResolution+1) for s in self.BUILT_IN_EVAL_METRICS: if s.lower() != "coverage" and s.lower() != "region size": self.m_metricsToDisplay.append(s.lower())
def calculateRSquared(data: Instances, ssr: float): yMean = data.meanOrMode(data.classIndex()) tss = 0 for i in range(data.numInstances()): tss+=(data.instance(i).value(data.classIndex())-yMean)*\ (data.instance(i).value(data.classIndex())-yMean) rsq = 1 - ssr / tss return rsq
def rightSide(self, index: int, data: Instances): text = "" if data.attribute(self.m_attIndex).isNominal(): text += " = " + data.attribute(self.m_attIndex).value(index) elif index == 0: text += " <= " + Utils.doubleToString(self.m_splitPoint, 6) else: text += " > " + Utils.doubleToString(self.m_splitPoint, 6) return text
def setInputFormat(self, instanceInfo: Instances): super().setInputFormat(instanceInfo) if instanceInfo.classIndex() < 0: raise Exception("No class has been assigned to the instances") self.setOutputFormatBinary() self.m_Indices = None if instanceInfo.classAttribute().isNominal(): return True return False
def buildClassifier(self, data: Instances): data = Instances(data) data.deleteWithMissingClass() self.buildTree(data, self.m_subtreeRaising or not self.m_cleanup) if self.m_collapseTheTree: self.collapse() if self.m_pruneTheTree: self.prune() if self.m_cleanup: self.cleanup(Instances(data, 0))
def setOutputFormat(self, outputFormat: Instances = None): if outputFormat is not None: self.m_OutputFormat = outputFormat.stringFreeStructure() self.initOutputLocators(self.m_OutputFormat) relationName = outputFormat.relationName( ) + "-" + self.__class__.__name__ self.m_OutputFormat.setRelationName(relationName) else: self.m_OutputFormat = None self.m_OutputQueue = Queue()
def removeClass(self, inst: Instances): af = Remove() if inst.classIndex() < 0: retI = inst else: af.setAttributeIndices("" + str(inst.classIndex() + 1)) af.setInvertSelection(False) af.setInputFormat(inst) retI = Filter.useFilter(inst, af) return retI
def handleNumericAttribute(self, trainInstances: Instances): next = 1 last = 0 splitIndex = -1 self.m_distribution = Distribution(2, trainInstances.numClasses()) i = 0 for inst in trainInstances: if inst.isMissing(self.m_attIndex): break self.m_distribution.add(1, inst) i += 1 firstMiss = i minSplit = 0.1 * self.m_distribution.total( ) / trainInstances.numClasses() if Utils.gr(self.m_minNoObj, minSplit) or Utils.equal( minSplit, self.m_minNoObj): minSplit = self.m_minNoObj elif Utils.gr(minSplit, 25): minSplit = 25 if Utils.gr(2 * minSplit, firstMiss): return defaultEnt = self.infoGainCrit.oldEnt(self.m_distribution) print("dfalut", defaultEnt) while next < firstMiss: if trainInstances.instance(next - 1).value( self.m_attIndex) + 1e-5 < trainInstances.instance( next).value(self.m_attIndex): self.m_distribution.shiftRange(1, 0, trainInstances, last, next) if (Utils.gr(self.m_distribution.perBag(0), minSplit) or Utils.equal(self.m_distribution.perBag(0), minSplit))\ and (Utils.gr(self.m_distribution.perBag(1), minSplit) or Utils.equal(self.m_distribution.perBag(1), minSplit)): currentInfoGain = self.infoGainCrit.splitCritValue( self.m_distribution, self.m_sumOfWeights, defaultEnt) if Utils.gr(currentInfoGain, self.m_infoGain): self.m_infoGain = currentInfoGain splitIndex = next - 1 self.m_index += 1 last = next next += 1 if self.m_index == 0: return if self.m_useMDLcorrection: self.m_infoGain = self.m_infoGain - (Utils.log2(self.m_index) / self.m_sumOfWeights) if Utils.gr(0, self.m_infoGain) or Utils.equal(0, self.m_infoGain): return self.m_numSubsets = 2 self.m_splitPoint = ( trainInstances.instance(splitIndex + 1).value(self.m_attIndex) + trainInstances.instance(splitIndex).value(self.m_attIndex)) / 2 if self.m_splitPoint == trainInstances.instance(splitIndex + 1).value( self.m_attIndex): self.m_splitPoint = trainInstances.instance(splitIndex).value( self.m_attIndex) self.m_distribution = Distribution(2, trainInstances.numClasses()) self.m_distribution.addRange(0, trainInstances, 0, splitIndex + 1) self.m_distribution.addRange(1, trainInstances, splitIndex + 1, firstMiss) self.m_gainRatio = self.gainRatioCrit.splitCritValue( self.m_distribution, self.m_sumOfWeights, self.m_infoGain)
def evaluateClusterer(self, test: Instances, outputModel: bool): i = loglk = unclusteredInstances = 0 cc = self.m_Clusterer.numberOfClusters() self.m_numClusters = cc instanceStats = [0] * cc hasClass = test.classIndex() >= 0 clusterAssignments = [] filter = None #type:Filter testRaw = copy.deepcopy(test) testRaw.setClassIndex(test.classIndex()) if hasClass: if testRaw.classAttribute().isNumeric(): raise Exception(unclusteredInstances) filter = Remove() filter.setAttributeIndices(str(testRaw.classIndex() + 1)) filter.setInvertSelection(False) filter.setInputFormat(testRaw) for inst in testRaw: if filter is not None: filter.input(inst) filter.batchFinished() inst = filter.output() cnum = self.m_Clusterer.clusterInstance(inst) clusterAssignments.append(cnum) if cnum != -1: instanceStats[cnum] += 1 sumNum = sum(instanceStats) loglk /= sumNum self.m_logL = loglk self.m_clusterAssignments = [] # for i in clusterAssignments: # print(",",i,end="") # print() for i in range(len(clusterAssignments)): self.m_clusterAssignments.append(clusterAssignments[i]) numInstFieldWidth = int( math.log(len(clusterAssignments)) / math.log(10) + 1) if outputModel: self.m_clusteringResult += str(self.m_Clusterer) self.m_clusteringResult += "Clustered Instances\n\n" clustFieldWidth = int((math.log(cc) / math.log(10)) + 1) for i in range(cc): if instanceStats[i] > 0: self.m_clusteringResult+= Utils.doubleToString(i, clustFieldWidth, 0) \ +" " \ + Utils.doubleToString(instanceStats[i], numInstFieldWidth, 0) \ +"(" + Utils.doubleToString((instanceStats[i] / sumNum * 100), 3, 0) \ +"%)\n" if unclusteredInstances > 0: self.m_clusteringResult += "\nUnclustered instances : " + str( unclusteredInstances) if hasClass: self.evaluateClustersWithRespectToClass(test)
def buildClassifer(self, instances: Instances): self.m_numSubsets = 0 self.m_splitPoint = float("inf") self.m_infoGain = 0 self.m_gainRatio = 0 if instances.attribute(self.m_attIndex).isNominal(): self.handleEnumeratedAttribute(instances) else: instances.sort(instances.attribute(self.m_attIndex)) self.handleNumericAttribute(instances)
def setSplitPoint(self, allInstances: Instances): newSplitPoint = float("-inf") if allInstances.attribute( self.m_attIndex).isNumeric() and self.m_numSubsets > 1: for i in range(allInstances.numInstances()): instance = allInstances.instance(i) tempValue = instance.value(self.m_attIndex) if not Utils.isMissingValue(tempValue): if tempValue > newSplitPoint and tempValue <= self.m_splitPoint: newSplitPoint = tempValue self.m_splitPoint = newSplitPoint
def useFilter(cls, data: Instances, filter: 'Filter'): for i in range(data.numInstances()): filter.input(data.instance(i)) filter.batchFinished() newData = filter.getOutputFormat() Utils.debugOut("Queue size:", filter.m_OutputQueue.qsize()) processed = filter.output() while processed is not None: newData.add(processed) processed = filter.output() return newData
def setOutputFormatNumeric(self): if self.m_Indices is None: self.setOutputFormat() return self.m_needToTransform = False for i in range(self.getInputFormat().numAttributes()): att = self.getInputFormat().attribute(i) if att.isNominal() and (att.numValues() > 2 or self.binaryAttributesNominal or self.m_TransformAll): self.m_needToTransform = True break if not self.m_needToTransform: self.setOutputFormat(self.getInputFormat()) return newClassIndex = self.getInputFormat().classIndex() newAtts = [] for j in range(self.getInputFormat().numAttributes()): att = self.getInputFormat().attribute(j) if not att.isNominal() or j == self.getInputFormat().classIndex(): newAtts.append(att.copy()) else: if j < self.getInputFormat().classIndex(): newClassIndex += att.numValues() - 2 for k in range(att.numValues()): attributeName = att.name() + "=" for l in range(att.numValues()): if l > k: attributeName += ',' attributeName += att.value( att.value(self.m_Indices[j][l])) if self.binaryAttributesNominal: a = Attribute(attributeName) if self.getSpreadAttributeWeight(): a.setWeight(att.weight() / (att.numValues() - 1)) else: a.setWeight(att.weight()) newAtts.append(a) else: vals = [] vals.append("f") vals.append("t") a = Attribute(attributeName, vals) if self.getSpreadAttributeWeight(): a.setWeight(att.weight() / (att.numValues() - 1)) else: a.setWeight(att.weight()) newAtts.append(a) outputFormat = Instances(self.getInputFormat().relationName(), newAtts, 0) outputFormat.setClassIndex(newClassIndex) self.setOutputFormat(outputFormat)
def kNearestNeighbours(self, target: Instance, kNN: int) -> Instances: if self.m_Stats is not None: self.m_Stats.searchStart() heap = MyHeap(kNN) firstkNN = 0 for i in range(self.m_Instances.numInstances()): if target == self.m_Instances.instance(i): continue if self.m_Stats is not None: self.m_Stats.incrPointCount() if firstkNN < kNN: distance = self.m_DistanceFunction.distance( target, self.m_Instances.instance(i), float("inf"), self.m_Stats) if distance == 0 and self.m_SkipIdentical and i < self.m_Instances.numInstances( ) - 1: continue heap.put(i, distance) firstkNN += 1 else: temp = heap.peek() distance = self.m_DistanceFunction.distance( target, self.m_Instances.instance(i), temp.distance, self.m_Stats) if distance == 0 and self.m_SkipIdentical: continue if distance < temp.distance: heap.putBySubstitute(i, distance) elif distance == temp.distance: heap.putKthNearest(i, distance) neighbours = Instances(self.m_Instances, heap.size() + heap.noOfKthNearest()) self.m_Distances = [0] * (heap.size() + heap.noOfKthNearest()) indices = [0] * (heap.size() + heap.noOfKthNearest()) i = 1 while heap.noOfKthNearest() > 0: h = heap.getKthNearest() indices[len(indices) - i] = h.index self.m_Distances[len(indices) - i] = h.distance i += 1 while heap.size() > 0: h = heap.get() indices[len(indices) - i] = h.index self.m_Distances[len(indices) - i] = h.distance i += 1 self.m_DistanceFunction.postProcessDistances(self.m_Distances) for k in range(len(indices)): neighbours.add(self.m_Instances.instance(indices[k])) if self.m_Stats is not None: self.m_Stats.searchStart() return neighbours
def rightSide(self, index: int, data: Instances): text = "" if data.attribute(self.m_attIndex).isNominal(): if index == 0: text += " = " + data.attribute(self.m_attIndex).value( int(self.m_splitPoint)) else: text += " != " + data.attribute(self.m_attIndex).value( int(self.m_splitPoint)) elif index == 0: text += " <= " + str(self.m_splitPoint) else: text += " > " + str(self.m_splitPoint) return text
def resetDistribution(self, data: Instances): insts = Instances(data, data.numInstances()) for i in range(data.numInstances()): if self.whichSubset(data.instance(i)) > -1: insts.add(data.instance(i)) newD = Distribution(insts, self) newD.addInstWithUnknown(data, self.m_attIndex) self.m_distribution = newD
def setUpComboBoxes(self, inst: Instances): XNames = [] YNames = [] CNames = [] for i in range(inst.numAttributes()): type = " (" + Attribute.typeToStringShort(inst.attribute(i)) + ")" XNames.append("X: " + inst.attribute(i).name() + type) YNames.append("Y: " + inst.attribute(i).name() + type) CNames.append("Colour: " + inst.attribute(i).name() + type) self.m_XCombo.addItems(XNames) self.m_YCombo.addItems(YNames) self.m_ColourCombo.addItems(CNames) self.m_XCombo.setCurrentIndex(0) self.m_YCombo.setCurrentIndex(1) self.m_ColourCombo.setCurrentIndex(inst.numAttributes() - 1)
def copyStringValues(cls, inst:Instance, a0=None, a1=None, a2:AttributeLocator=None, a3:Instances=None, a4:AttributeLocator=None): if isinstance(a0,Instances) and isinstance(a1,AttributeLocator): if inst.dataset() is None: raise Exception("Instance has no dataset assigned!!") elif inst.dataset().numAttributes() != a0.numAttributes(): raise Exception("Src and Dest differ in # of attributes: " + str(inst.dataset().numAttributes()) + " != " + str(a0.numAttributes())) cls.copyStringValuesFromSrc(inst,True,inst.dataset(),a1,a0,a1) else: if a1 == a3: return if len(a2.getAttributeIndices()) != len(a4.getAttributeIndices()): raise Exception("Src and Dest string indices differ in length: " + str(len(a2.getAttributeIndices())) + " != " + str(len(a4.getAttributeIndices()))) if len(a2.getLocatorIndices()) != len(a4.getLocatorIndices()): raise Exception("Src and Dest locator indices differ in length: " + str(len(a2.getLocatorIndices())) + " != " + str(len(a4.getLocatorIndices()))) for i in range(len(a2.getAttributeIndices())): if a0: instIndex = a2.getActualIndex(a2.getAttributeIndices()[i]) else: instIndex = a4.getActualIndex(a4.getAttributeIndices()[i]) src = a1.attribute(a2.getActualIndex(a2.getAttributeIndices()[i])) dest = a3.attribute(a4.getActualIndex(a4.getAttributeIndices()[i])) if not inst.isMissing(instIndex): valIndex = dest.addStringValue(src, int(inst.value(instIndex))) inst.setValue(instIndex, valIndex)
def split(self,data:Instances)->List[Instances]: subsetSize=[0]*self.m_numSubsets for inst in data: subset=self.whichSubset(inst) if subset > -1: subsetSize[subset]+=1 else: weights=self.weights(inst) for j in range(self.m_numSubsets): if Utils.gr(weights[j], 0): subsetSize[j]+=1 instances=[] #type:List[Instances] for j in range(self.m_numSubsets): instances.append(Instances(data,subsetSize[j])) for inst in data: subset=self.whichSubset(inst) if subset > -1: instances[subset].add(inst) else: weights=self.weights(inst) for j in range(self.m_numSubsets): if Utils.gr(weights[j], 0): instances[j].add(inst) instances[j].lastInstance().setWeight(float(weights[j]*inst.weight())) return instances
def selectModel(self, data: Instances, test: Instances = None): if test is not None: return self.selectModel(data) multiVal = True averageInfoGain = validModels = 0 checkDistribution = Distribution(data) noSplitModel = NoSplit(checkDistribution) if Utils.gr(2*self.m_minNoObj, checkDistribution.total()) or \ Utils.equal(checkDistribution.total(), checkDistribution.perClass(checkDistribution.maxClass())): return noSplitModel if self.m_allData is not None: for attr in data.enumerateAttributes(): if attr.isNumeric() or Utils.gr( 0.3 * self.m_allData.numInstances(), attr.numValues()): multiVal = False break currentModel = [None] * data.numAttributes() #type:List[C45Split] sumOfWeights = data.sumOfWeight() for i in range(data.numAttributes()): if i != data.classIndex(): currentModel[i] = C45Split(i, self.m_minNoObj, sumOfWeights, self.m_useMDLcorrection) currentModel[i].buildClassifer(data) if currentModel[i].checkModel(): if self.m_allData is not None: if data.attribute(i).isNumeric() or \ (multiVal or Utils.gr(0.3*self.m_allData.numInstances(), data.attribute(i).numValues())): averageInfoGain = averageInfoGain + currentModel[ i].infoGain() validModels += 1 else: averageInfoGain = averageInfoGain + currentModel[ i].infoGain() validModels += 1 else: currentModel[i] = None if validModels == 0: return noSplitModel averageInfoGain = averageInfoGain / validModels minResult = 0 for i in range(data.numAttributes()): if i != data.classIndex() and currentModel[i].checkModel(): if currentModel[i].infoGain() >= averageInfoGain-1e-3 and\ Utils.gr(currentModel[i].gainRatio(), minResult): bestModel = currentModel[i] minResult = currentModel[i].gainRatio() if Utils.equal(minResult, 0): return noSplitModel bestModel.distribution().addInstWithUnknown(data, bestModel.attIndex()) if self.m_allData is not None and not self.m_doNotMakeSplitPointActualValue: bestModel.setSplitPoint(self.m_allData) return bestModel
def getPRCArea(cls, tcurve: Instances): n = tcurve.numInstances() if cls.RELATION_NAME != tcurve.relationName() or n == 0: return float('nan') pInd = tcurve.attribute(cls.PRECISION_NAME).index() rInd = tcurve.attribute(cls.RECALL_NAME).index() pVals = tcurve.attributeToDoubleArray(pInd) rVals = tcurve.attributeToDoubleArray(rInd) area = 0 xlast = rVals[n - 1] for i in range(n - 2, -1, -1): recallDelta = rVals[i] - xlast area += pVals[i] * recallDelta xlast = rVals[i] if area == 0: return Utils.missingValue() return area
def dumpLabel(self,index:int,data:Instances): text="" text+=data.classAttribute().value(self.m_distribution.maxClass(index)) text+=" ("+str(Utils.roundDouble(self.m_distribution.perBag(index), 2)) if Utils.gr(self.m_distribution.numIncorrect(index), 0): text+="/"+str(Utils.roundDouble(self.m_distribution.numIncorrect(index), 2)) text+=")" return text
def evaluateClustersWithRespectToClass(self, inst: Instances): numClasses = inst.classAttribute().numValues() counts = [[0] * numClasses for i in range(self.m_numClusters)] clusterTotals = [0] * self.m_numClusters best = [0] * (self.m_numClusters + 1) current = [0] * (self.m_numClusters + 1) instances = copy.deepcopy(inst) instances.setClassIndex(inst.classIndex()) i = 0 for instance in instances: if self.m_clusterAssignments[i] >= 0: if not instance.classIsMissing(): counts[int(self.m_clusterAssignments[i])][int( instance.classValue())] += 1 clusterTotals[int(self.m_clusterAssignments[i])] += 1 i += 1 numInstances = i best[self.m_numClusters] = float('inf') self.mapClasses(self.m_numClusters, 0, counts, clusterTotals, current, best, 0) self.m_clusteringResult += "\n\nClass attribute: " + inst.classAttribute( ).name() + "\n" self.m_clusteringResult += "Classes to Clusters:\n" matrixString = self.toMatrixString(counts, clusterTotals, Instances(inst, 0)) self.m_clusteringResult += matrixString + '\n' Cwidth = 1 + int(math.log(self.m_numClusters) / math.log(10)) for i in range(self.m_numClusters): if clusterTotals[i] > 0: self.m_clusteringResult += "Cluster " + Utils.doubleToString( i, Cwidth, 0) self.m_clusteringResult += " <-- " if best[i] < 0: self.m_clusteringResult += "No class\n" else: self.m_clusteringResult += inst.classAttribute().value( int(best[i])) + '\n' self.m_clusteringResult+="\nIncorrectly clustered instances :\t"\ + str(best[self.m_numClusters])\ + "\t" \ + Utils.doubleToString((best[self.m_numClusters] / numInstances * 100.0), 8, 4) \ + " %\n" self.m_classToCluster = [] for i in range(self.m_numClusters): self.m_classToCluster[i] = int(best[i])
def setOutputFormatNominal(self): self.m_needToTransform = False for i in range(self.getInputFormat().numAttributes()): att = self.getInputFormat().attribute(i) if att.isNominal() and i != self.getInputFormat().classIndex() and\ (att.numValues() > 2 or self.m_TransformAll or self.binaryAttributesNominal): self.m_needToTransform = True break if not self.m_needToTransform: self.setOutputFormat(self.getInputFormat()) return newClassIndex = self.getInputFormat().classIndex() newAtts = [] for j in range(self.getInputFormat().numAttributes()): att = self.getInputFormat().attribute(j) if not att.isNominal() or j == self.getInputFormat().classIndex(): newAtts.append(att.copy()) else: if att.numValues() <= 2 and not self.m_TransformAll: if self.binaryAttributesNominal: value = "" if att.numValues() == 2: value = "=" + att.value(1) a = Attribute(att.name() + value) a.setWeight(att.weight()) newAtts.append(a) else: newAtts.append(att.copy()) else: if j < self.getInputFormat().classIndex(): newClassIndex += att.numValues() - 1 for k in range(att.numValues()): attributeName = att.name() + "=" attributeName += att.value(k) if self.binaryAttributesNominal: a = Attribute(attributeName) if self.getSpreadAttributeWeight(): a.setWeight(att.weight() / att.numValues()) else: a.setWeight(att.weight()) newAtts.append(a) outputFormat = Instances(self.getInputFormat().relationName(), newAtts, 0) outputFormat.setClassIndex(newClassIndex) self.setOutputFormat(outputFormat)
def __init__(self, insts: Instances): self.m_maxX = self.m_minX = self.m_maxY = self.m_minY = self.m_maxC = self.m_minC = 0 self.m_plotName = "new plot" self.m_plotInstances = insts self.m_xIndex = self.m_yIndex = self.m_cIndex = 0 self.m_pointLookup = [[0] * 4 for i in range(insts.numInstances())] self.m_shapeSize = [] #type:List[int] self.m_shapeType = [] #type:List[int] self.m_connecctPoints = [False] * insts.numInstances() self.m_alwaysDisplayPointsOfThisSize = -1 self.m_displayAllPoints = False for i in range(insts.numInstances()): self.m_shapeSize.append(Plot2D.DEFAULT_SHAPE_SIZE.value) if self.m_plotInstances.instance(i).weight() >= 0: self.m_shapeType.append(Plot2D.CONST_AUTOMATIC_SHAPE) else: self.m_shapeType.append(-2) self.determineBounds()