def rightSide(self, index: int, data: Instances): text = "" if data.attribute(self.m_attIndex).isNominal(): text += " = " + data.attribute(self.m_attIndex).value(index) elif index == 0: text += " <= " + Utils.doubleToString(self.m_splitPoint, 6) else: text += " > " + Utils.doubleToString(self.m_splitPoint, 6) return text
def evaluateClusterer(self, test: Instances, outputModel: bool): i = loglk = unclusteredInstances = 0 cc = self.m_Clusterer.numberOfClusters() self.m_numClusters = cc instanceStats = [0] * cc hasClass = test.classIndex() >= 0 clusterAssignments = [] filter = None #type:Filter testRaw = copy.deepcopy(test) testRaw.setClassIndex(test.classIndex()) if hasClass: if testRaw.classAttribute().isNumeric(): raise Exception(unclusteredInstances) filter = Remove() filter.setAttributeIndices(str(testRaw.classIndex() + 1)) filter.setInvertSelection(False) filter.setInputFormat(testRaw) for inst in testRaw: if filter is not None: filter.input(inst) filter.batchFinished() inst = filter.output() cnum = self.m_Clusterer.clusterInstance(inst) clusterAssignments.append(cnum) if cnum != -1: instanceStats[cnum] += 1 sumNum = sum(instanceStats) loglk /= sumNum self.m_logL = loglk self.m_clusterAssignments = [] # for i in clusterAssignments: # print(",",i,end="") # print() for i in range(len(clusterAssignments)): self.m_clusterAssignments.append(clusterAssignments[i]) numInstFieldWidth = int( math.log(len(clusterAssignments)) / math.log(10) + 1) if outputModel: self.m_clusteringResult += str(self.m_Clusterer) self.m_clusteringResult += "Clustered Instances\n\n" clustFieldWidth = int((math.log(cc) / math.log(10)) + 1) for i in range(cc): if instanceStats[i] > 0: self.m_clusteringResult+= Utils.doubleToString(i, clustFieldWidth, 0) \ +" " \ + Utils.doubleToString(instanceStats[i], numInstFieldWidth, 0) \ +"(" + Utils.doubleToString((instanceStats[i] / sumNum * 100), 3, 0) \ +"%)\n" if unclusteredInstances > 0: self.m_clusteringResult += "\nUnclustered instances : " + str( unclusteredInstances) if hasClass: self.evaluateClustersWithRespectToClass(test)
def toString(self, attIndex: int, afterDecimalPoint: int) -> str: text = "" if self.isMissing(attIndex): text += "?" else: if self.m_Dataset is None: text += Utils.doubleToString(self.value(attIndex), afterDecimalPoint) else: if self.m_Dataset.attribute( attIndex).type() == Attribute.NUMERIC: text += Utils.doubleToString(self.value(attIndex), afterDecimalPoint) else: text += Utils.quote(self.stringValue(attIndex)) return text
def setTable(self, attrStats: AttributeStats, index: int): if attrStats.nominalCounts is not None: att = self.m_Instance.attribute(index) colNames = ["No.", "Label", "Count", "Weight"] data = [] for i in range(len(attrStats.nominalCounts)): val = [] item_No = QTableWidgetItem(str(i + 1)) item_No.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) val.append(item_No) val.append(QTableWidgetItem(att.value(i))) val.append( QTableWidgetItem(str(int(attrStats.nominalCounts[i])))) val.append( QTableWidgetItem( Utils.doubleToString(attrStats.nominalWeights[i], 3))) data.append(val) #更新表头,填数据 self.fillData(data, colNames) self.m_StatsTable.horizontalHeader().resizeSection(0, 60) elif attrStats.numericStats is not None: colNames = ["Statistic", "Value"] data = [] val = [QTableWidgetItem("Minimum")] val.append( QTableWidgetItem( Utils.doubleToString(attrStats.numericStats.min, 3))) data.append(val) val = [QTableWidgetItem("Maximum")] val.append( QTableWidgetItem( Utils.doubleToString(attrStats.numericStats.max, 3))) data.append(val) val = [QTableWidgetItem("Mean")] val.append( QTableWidgetItem( Utils.doubleToString(attrStats.numericStats.mean, 3))) data.append(val) val = [QTableWidgetItem("StdDev")] val.append( QTableWidgetItem( Utils.doubleToString(attrStats.numericStats.stdDev, 3))) data.append(val) self.fillData(data, colNames) self.m_StatsTable.horizontalHeader().resizeSection( 0, self.m_StatsTable.width() / 2)
def evaluateClustersWithRespectToClass(self, inst: Instances): numClasses = inst.classAttribute().numValues() counts = [[0] * numClasses for i in range(self.m_numClusters)] clusterTotals = [0] * self.m_numClusters best = [0] * (self.m_numClusters + 1) current = [0] * (self.m_numClusters + 1) instances = copy.deepcopy(inst) instances.setClassIndex(inst.classIndex()) i = 0 for instance in instances: if self.m_clusterAssignments[i] >= 0: if not instance.classIsMissing(): counts[int(self.m_clusterAssignments[i])][int( instance.classValue())] += 1 clusterTotals[int(self.m_clusterAssignments[i])] += 1 i += 1 numInstances = i best[self.m_numClusters] = float('inf') self.mapClasses(self.m_numClusters, 0, counts, clusterTotals, current, best, 0) self.m_clusteringResult += "\n\nClass attribute: " + inst.classAttribute( ).name() + "\n" self.m_clusteringResult += "Classes to Clusters:\n" matrixString = self.toMatrixString(counts, clusterTotals, Instances(inst, 0)) self.m_clusteringResult += matrixString + '\n' Cwidth = 1 + int(math.log(self.m_numClusters) / math.log(10)) for i in range(self.m_numClusters): if clusterTotals[i] > 0: self.m_clusteringResult += "Cluster " + Utils.doubleToString( i, Cwidth, 0) self.m_clusteringResult += " <-- " if best[i] < 0: self.m_clusteringResult += "No class\n" else: self.m_clusteringResult += inst.classAttribute().value( int(best[i])) + '\n' self.m_clusteringResult+="\nIncorrectly clustered instances :\t"\ + str(best[self.m_numClusters])\ + "\t" \ + Utils.doubleToString((best[self.m_numClusters] / numInstances * 100.0), 8, 4) \ + " %\n" self.m_classToCluster = [] for i in range(self.m_numClusters): self.m_classToCluster[i] = int(best[i])
def toMatrixString(self, counts: List[List], clusterTotals: List, inst: Instances): ms = "" maxval = 0 for i in range(self.m_numClusters): for j in range(len(counts[0])): if counts[i][j] > maxval: maxval = counts[i][j] Cwidth = 1 + max(int(math.log(maxval) / math.log(10)), int(math.log(self.m_numClusters) / math.log(10))) ms += '\n' for i in range(self.m_numClusters): if clusterTotals[i] > 0: ms += " " + Utils.doubleToString(i, Cwidth, 0) ms += " <-- assigned to cluster\n" for i in range(len(counts[0])): for j in range(self.m_numClusters): if clusterTotals[j] > 0: ms += " " + Utils.doubleToString(counts[j][i], Cwidth, 0) ms += " | " + inst.classAttribute().value(i) + "\n" return ms
def toMatrixString(self,title="=== Confusion Matrix ===\n"): text="" IDChars =['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n','o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] fractional=False if not self.m_ClassIsNominal: raise Exception("Evaluation: No confusion matrix possible!") maxval=0 for i in range(self.m_NumClasses): for j in range(self.m_NumClasses): current=self.m_ConfusionMatrix[i][j] if current < 0: current*=-10 if current > maxval: maxval=current fract=current-np.rint(current) if fract == 0: fract=float('inf') if not fractional and math.log(fract)/math.log(10)>=-2: fractional=True try: IDWidth=1+max(int(math.log(maxval)/math.log(10)+3 if fractional else 0), int(math.log(self.m_NumClasses)/math.log(len(IDChars)))) except ValueError: if maxval == 0: IDWidth=1+int(math.log(self.m_NumClasses)/math.log(len(IDChars))) else: raise ValueError text+=title+'\n' for i in range(self.m_NumClasses): if fractional: text+=" "+self.num2ShortID(i,IDChars,IDWidth-3)+" " else: text+=" "+self.num2ShortID(i,IDChars,IDWidth) text+="<-- classified as\n" for i in range(self.m_NumClasses): for j in range(self.m_NumClasses): text+=" " + Utils.doubleToString(self.m_ConfusionMatrix[i][j], IDWidth, 2 if fractional else 0) text+=" | "+self.num2ShortID(i,IDChars,IDWidth)+" = "+self.m_ClassNames[i]+"\n" return text
def toSummaryString(self,printComplexityStatistics:bool,title:str="=== Summary ===\n"): if printComplexityStatistics and self.m_NoPriors: printComplexityStatistics=False text=title+'\n' if self.m_WithClass > 0: if self.m_ClassIsNominal: displayCorrect="correct" in self.m_metricsToDisplay displayIncorrect="incorrect" in self.m_metricsToDisplay displayKappa="kappa" in self.m_metricsToDisplay if displayCorrect: text+="Correctly Classified Instances " text+= Utils.doubleToString(self.correct(), 12, 4) + " " + Utils.doubleToString(self.pctCorrect(), 12, 4) + " %\n" if displayIncorrect: text+="Incorrectly Classified Instances " text+= Utils.doubleToString(self.incorrect(), 12, 4) + " " + Utils.doubleToString(self.pctIncorrect(), 12, 4) + " %\n" if displayKappa: text+="Kappa statistic " text+= Utils.doubleToString(self.kappa(), 12, 4) + "\n" if printComplexityStatistics: displayKBRelative="kb relative" in self.m_metricsToDisplay displayKBInfo="kb information" in self.m_metricsToDisplay if displayKBRelative: text+="K&B Relative Info Score " text+= Utils.doubleToString(self.KBRelativeInformation(), 12, 4) + " %\n" if displayKBInfo: text+="K&B Information Score " text+= Utils.doubleToString(self.KBInformation(), 12, 4) + " bits" text+= Utils.doubleToString(self.KBMeanInformation(), 12, 4) + " bits/instance\n" #if self.m_pluginMetrics != null: else: displayCorrelation="correlation" in self.m_metricsToDisplay if displayCorrelation: text+="Correlation coefficient " text+= Utils.doubleToString(self.correlationCoefficient(), 12, 4) + "\n" # if self.m_pluginMetrics != null: if printComplexityStatistics and self.m_ComplexityStatisticsAvailable: displayComplexityOrder0="complexity 0" in self.m_metricsToDisplay displayComplexityScheme="complexity scheme" in self.m_metricsToDisplay displayComplexityImprovement="complexity improvement" in self.m_metricsToDisplay if displayComplexityOrder0: text+="Class complexity | order 0 " text+= Utils.doubleToString(self.SFPriorEntropy(), 12, 4) + " bits" text+= Utils.doubleToString(self.SFMeanPriorEntropy(), 12, 4) + " bits/instance\n" if displayComplexityScheme: text+="Class complexity | scheme " text+= Utils.doubleToString(self.SFSchemeEntropy(), 12, 4) + " bits" text+= Utils.doubleToString(self.SFMeanSchemeEntropy(), 12, 4) + " bits/instance\n" if displayComplexityImprovement: text+="Complexity improvement (Sf) " text+= Utils.doubleToString(self.SFEntropyGain(), 12, 4) + " bits" text+= Utils.doubleToString(self.SFMeanEntropyGain(), 12, 4) + " bits/instance\n" displayMAE = "mae" in self.m_metricsToDisplay displayRMSE = "rmse" in self.m_metricsToDisplay displayRAE = "rae" in self.m_metricsToDisplay displayRRSE = "rrse" in self.m_metricsToDisplay if displayMAE: text+="Mean absolute error " text+= Utils.doubleToString(self.meanAbsoluteError(), 12, 4) + "\n" if displayRMSE: text+="Root mean squared error " text+= Utils.doubleToString(self.rootMeanSquaredError(), 12, 4) + "\n" if not self.m_NoPriors: if displayRAE: text+="Relative absolute error " text+= Utils.doubleToString(self.relativeAbsoluteError(), 12, 4) + " %\n" if displayRRSE: text+="Root relative squared error " text+= Utils.doubleToString(self.rootRelativeSquaredError(), 12, 4) + " %\n" if self.m_CoverageStatisticsAvailable: displayCoverage="coverage" in self.m_metricsToDisplay displayRegionSize="region size" in self.m_metricsToDisplay if displayCoverage: text+="Coverage of cases " + Utils.doubleToString(self.m_ConfLevel, 4, 2) + " level) " text+= Utils.doubleToString(self.coverageOfTestCasesByPredictedRegions(), 12, 4) + " %\n" if not self.m_NoPriors: if displayRegionSize: text+="Mean rel. region size (" + Utils.doubleToString(self.m_ConfLevel, 4, 2) + " level) " text+= Utils.doubleToString(self.sizeOfPredictedRegions(), 12, 4) + " %\n" if Utils.gr(self.unclassified(), 0): text+="UnClassified Instances " text+= Utils.doubleToString(self.unclassified(), 12, 4) + " " + Utils.doubleToString(self.pctUnclassified(), 12, 4) + " %\n" text+="Total Number of Instances " text+= Utils.doubleToString(self.m_WithClass, 12, 4) + "\n" if self.m_MissingClass>0: text+="Ignored Class Unknown Instances " text+= Utils.doubleToString(self.m_MissingClass, 12, 4) + "\n" return text
def threadClassifierRun(self): try: self.m_CEPanel.addToHistory() inst = Instances(self.m_Instances) trainTimeStart = trainTimeElapsed = testTimeStart = testTimeElapsed = 0 userTestStructure = None if self.m_SetTestFrame is not None: userTestStructure = deepcopy( self.m_SetTestFrame.getInstances()) #type:Instances userTestStructure.setClassIndex(self.m_TestClassIndex) #默认outputmodel,output per-class stats,output confusion matrix,store predictions for visualization #outputPredictionsText=None numFolds = 10 classIndex = self.m_ClassCombo.currentIndex() inst.setClassIndex(classIndex) classifier = self.m_ClassifierEditor.getValue() #type:Classifier name = time.strftime("%H:%M:%S - ") outPutResult = "" evaluation = None #type:Evaluation grph = None if self.m_CVBut.isChecked(): testMode = 1 numFolds = int(self.m_CVText.text()) if numFolds <= 1: raise Exception("Number of folds must be greater than 1") elif self.m_TrainBut.isChecked(): testMode = 2 elif self.m_TestSplitBut.isChecked(): testMode = 3 # if source is None: # raise Exception("No user test set has been specified") if not inst.equalHeaders(userTestStructure): QMessageBox.critical(self.m_Explorer, "错误", "测试数据集属性不同") else: raise Exception("Unknown test mode") cname = classifier.__module__ if cname.startswith("classifiers."): name += cname[len("classifiers."):] else: name += cname cmd = classifier.__module__ # if isinstance(classifier,OptionHandler): # cmd+=" "+Utils.joinOptions(classifier.getOptions()) plotInstances = ClassifierErrorsPlotInstances() plotInstances.setInstances(userTestStructure if testMode == 4 else inst) plotInstances.setClassifier(classifier) plotInstances.setClassIndex(inst.classIndex()) plotInstances.setPointSizeProportionalToMargin(False) outPutResult += "=== Run information ===\n\n" outPutResult += "Scheme: " + cname # if isinstance(classifier,OptionHandler): # o=classifier.getOptions() # outPutResult+=" "+Utils.joinOptions(o) outPutResult += "\n" outPutResult += "Relation: " + inst.relationName() + '\n' outPutResult += "Instances: " + str(inst.numInstances()) + '\n' outPutResult += "Attributes: " + str(inst.numAttributes()) + '\n' if inst.numAttributes() < 100: for i in range(inst.numAttributes()): outPutResult += " " + inst.attribute( i).name() + '\n' else: outPutResult += " [list of attributes omitted]\n" outPutResult += "Test mode: " if testMode == 1: outPutResult += str(numFolds) + "-fold cross-validation\n" elif testMode == 2: outPutResult += "evaluate on training data\n" elif testMode == 3: outPutResult += "user supplied test set: " + str( userTestStructure.numInstances()) + " instances\n" outPutResult += "\n" self.m_History.addResult(name, outPutResult) self.m_History.setSingle(name) if testMode == 2 or testMode == 3: trainTimeStart = time.time() classifier.buildClassifier(inst) trainTimeElapsed = time.time() - trainTimeStart outPutResult += "=== Classifier model (full training set) ===\n\n" outPutResult += str(classifier) + "\n" outPutResult += "\nTime taken to build model: " + Utils.doubleToString( trainTimeElapsed, 2) + " seconds\n\n" self.m_History.updateResult(name, outPutResult) if isinstance(classifier, Drawable): grph = classifier.graph() print("==========update Compelte=================") if testMode == 2: evaluation = Evaluation(inst) evaluation = self.setupEval(evaluation, classifier, inst, plotInstances, False) evaluation.setMetricsToDisplay(self.m_selectedEvalMetrics) plotInstances.setUp() testTimeStart = time.time() #TODO # if isinstance(classifier,BatchPredictor) # else: for jj in range(inst.numInstances()): plotInstances.process(inst.instance(jj), classifier, evaluation) testTimeElapsed = time.time() - testTimeStart outPutResult += "=== Evaluation on training set ===\n" elif testMode == 1: rnd = 1 inst.randomize(rnd) if inst.attribute(classIndex).isNominal(): inst.stratify(numFolds) evaluation = Evaluation(inst) evaluation = self.setupEval(evaluation, classifier, inst, plotInstances, False) evaluation.setMetricsToDisplay(self.m_selectedEvalMetrics) plotInstances.setUp() for fold in range(numFolds): train = inst.trainCV(numFolds, fold, rnd) evaluation = self.setupEval(evaluation, classifier, train, plotInstances, True) evaluation.setMetricsToDisplay(self.m_selectedEvalMetrics) current = deepcopy(classifier) current.buildClassifier(train) test = inst.testCV(numFolds, fold) # TODO # if isinstance(classifier,BatchPredictor) # else: for jj in range(test.numInstances()): plotInstances.process(test.instance(jj), current, evaluation) if inst.attribute(classIndex).isNominal(): outPutResult += "=== Stratified cross-validation ===\n" else: outPutResult += "=== Cross-validation ===\n" elif testMode == 3: evaluation = Evaluation(inst) evaluation = self.setupEval(evaluation, classifier, inst, plotInstances, False) plotInstances.setInstances(userTestStructure) evaluation.setMetricsToDisplay(self.m_selectedEvalMetrics) plotInstances.setUp() # TODO # if isinstance(classifier,BatchPredictor) testTimeStart = time.time() for i in range(userTestStructure.numInstances()): instance = userTestStructure.instance(i) # if isinstance(classifier,BatchPredictor) #else plotInstances.process(instance, classifier, evaluation) # if isinstance(classifier,BatchPredictor) testTimeElapsed = time.time() - testTimeStart outPutResult += "=== Evaluation on test set ===\n" if testMode != 1: mode = "" if testMode == 2: mode = "training data" elif testMode == 3: mode = "supplied test set" outPutResult += "\nTime taken to test model on " + mode + ": " + Utils.doubleToString( testTimeElapsed, 2) + " seconds\n\n" outPutResult += evaluation.toSummaryString(False) + '\n' self.m_History.updateResult(name, outPutResult) if inst.attribute(classIndex).isNominal(): outPutResult += evaluation.toClassDetailsString() + '\n' outPutResult += evaluation.toMatrixString() + '\n' self.m_History.updateResult(name, outPutResult) Utils.debugOut(outPutResult) if (plotInstances is not None and plotInstances.canPlot(False)): visName = name + " (" + inst.relationName() + ")" pl2d = plotInstances.getPlotData(cname) plotInstances.cleanUp() vv = [] trainHeader = Instances(self.m_Instances, 0) trainHeader.setClassIndex(classIndex) vv.append(trainHeader) if grph is not None: vv.append(grph) if evaluation is not None and evaluation.predictions( ) is not None: vv.append(evaluation.predictions()) vv.append(inst.classAttribute()) self.history_add_visualize_signal.emit(name, vv, visName, pl2d) except Exception as e: self.error_diglog_signal.emit(str(e)) self.mutex.lock() self.m_StartBut.setEnabled(True) self.m_StopBut.setEnabled(False) self.m_RunThread = None self.mutex.unlock() print("RunFinished")
def __str__(self): if not self.m_ModelBuilt: return "Linear Regression: No model built yet." if self.Minimal: return "Linear Regression: Model built." text = "" column = 0 first = True text += "\nLinear Regression Model\n\n" text += self.m_TransformedData.classAttribute().name() + " =\n\n" for i in range(self.m_TransformedData.numAttributes()): if i != self.m_ClassIndex and self.m_SelectedAttributes[i]: if not first: text += " +\n" else: first = False text += Utils.doubleToString(self.m_Coefficients[column], 12, self.numDecimalPlaces) + " * " text += self.m_TransformedData.attribute(i).name() column += 1 text += " +\n" + Utils.doubleToString(self.m_Coefficients[column], 12, self.numDecimalPlaces) if self.outputAdditionalStats: maxAttLength = 0 for i in range(self.m_TransformedData.numAttributes()): if i != self.m_ClassIndex and self.m_SelectedAttributes[i]: if len(self.m_TransformedData.attribute( i).name()) > maxAttLength: maxAttLength = len( self.m_TransformedData.attribute(i).name()) maxAttLength += 3 if maxAttLength < len("Variable") + 3: maxAttLength = len("Variable") + 3 text+="\n\nRegression Analysis:\n\n" \ + Utils.padRight("Variable", maxAttLength)\ + " Coefficient SE of Coef t-Stat" column = 0 for i in range(self.m_TransformedData.numAttributes()): if i != self.m_ClassIndex and self.m_SelectedAttributes[i]: text += "\n" + Utils.padRight( self.m_TransformedData.attribute(i).name(), maxAttLength) text += Utils.doubleToString(self.m_Coefficients[column], 12, self.numDecimalPlaces) text += " " + Utils.doubleToString( self.m_StdErrorOfCoef[column], 12, self.numDecimalPlaces) text += " " + Utils.doubleToString( self.m_TStats[column], 12, self.numDecimalPlaces) column += 1 text += Utils.padRight( "\nconst", maxAttLength + 1) + Utils.doubleToString( self.m_Coefficients[column], 12, self.numDecimalPlaces) text += " " + Utils.doubleToString(self.m_StdErrorOfCoef[column], 12, self.numDecimalPlaces) text += " " + Utils.doubleToString(self.m_TStats[column], 12, self.numDecimalPlaces) text += "\n\nDegrees of freedom = " + str(self.m_df) text += "\nR^2 value = " + Utils.doubleToString( self.m_RSquared, self.numDecimalPlaces) text += "\nAdjusted R^2 = " + Utils.doubleToString( self.m_RSquaredAdj, 5) text += "\nF-statistic = " + Utils.doubleToString( self.m_FStat, self.numDecimalPlaces) return text
def __str__(self): if self.m_ClusterCentroids is None: return "No clusterer built yet!" maxAttWidth=0 maxWidth=0 for i in range(self.NumClusters): for j in range(self.m_ClusterCentroids.numAttributes()): if len(self.m_ClusterCentroids.attribute(j).name())>maxAttWidth: maxAttWidth=len(self.m_ClusterCentroids.attribute(j).name()) if self.m_ClusterCentroids.attribute(j).isNumeric(): try: width=math.log(math.fabs(self.m_ClusterCentroids.instance(i).value(j)))/math.log(10) except ValueError: width=float('-inf') if width<0: width=1 width+=6 if int(width) > maxWidth: maxWidth=int(width) for i in range(self.m_ClusterCentroids.numAttributes()): if self.m_ClusterCentroids.attribute(i).isNominal(): a=self.m_ClusterCentroids.attribute(i) for j in range(self.m_ClusterCentroids.numInstances()): val=a.value(int(self.m_ClusterCentroids.instance(j).value(i))) if len(val)>maxWidth: maxWidth=len(val) for j in range(a.numValues()): val=a.value(j)+" " if len(val)>maxAttWidth: maxAttWidth=len(val) for m_ClusterSize in self.m_ClusterSizes: size="("+str(m_ClusterSize)+")" if len(size)>maxWidth: maxWidth=len(size) plusMinus="+/-" maxAttWidth+=2 if maxAttWidth<len("Attribute")+2: maxAttWidth=len("Attribute")+2 if maxWidth<len("Full Data"): maxWidth=len("Full Data")+1 if maxWidth<len("missing"): maxWidth=len("missing")+1 temp="\nkMeans\n======\n" temp+="\nNumber of iterations: " + str(self.m_Iterations) if not self.m_FastDistanceCalc: temp+='\n' temp+="Within cluster sum of squared errors: "+ str(sum(self.m_squaredErrors)) temp+="\n\nInitial starting points (random):\n" temp+='\n' for i in range(self.m_initialStartPoints.numInstances()): temp+="Cluster " + str(i) + ": " + str(self.m_initialStartPoints.instance(i))+"\n" temp+="\nMissing values globally replaced with mean/mode" temp+="\n\nFinal cluster centroids:\n" temp+=self.pad("Cluster#", " ", (maxAttWidth + (maxWidth * 2 + 2))- len("Cluster#"), True) temp+='\n' temp+=self.pad("Attribute", " ", maxAttWidth - len("Attribute"), False) temp+=self.pad("Full Data", " ", maxWidth + 1 - len("Full Data"), True) for i in range(self.NumClusters): clustNum=str(i) temp+=self.pad(clustNum, " ", maxWidth + 1 - len(clustNum), True) temp+='\n' cSize="(" + str(sum(self.m_ClusterSizes)) + ")" temp+=self.pad(cSize, " ", maxAttWidth + maxWidth + 1 - len(cSize),True) for i in range(self.NumClusters): cSize="(" + str(self.m_ClusterSizes[i]) + ")" temp+=self.pad(cSize, " ", maxWidth + 1 - len(cSize), True) temp+='\n' temp+=self.pad("", "=",maxAttWidth+ (maxWidth * (self.m_ClusterCentroids.numInstances() + 1) + self.m_ClusterCentroids.numInstances() + 1), True) temp+='\n' for i in range(self.m_ClusterCentroids.numAttributes()): attName=self.m_ClusterCentroids.attribute(i).name() temp+=attName for j in range(maxAttWidth-len(attName)): temp+=" " if self.m_ClusterCentroids.attribute(i).isNominal(): if self.m_FullMeansOrMediansOrModes[i] == -1: valMeanMode=self.pad("missing", " ", maxWidth + 1 - len("missing"), True) else: strVal=self.m_ClusterCentroids.attribute(i).value(int(self.m_FullMeansOrMediansOrModes[i])) valMeanMode=self.pad(strVal," ",maxWidth+1-len(strVal),True) else: if math.isnan(self.m_FullMeansOrMediansOrModes[i]): valMeanMode=self.pad("missing", " ", maxWidth + 1 - len("missing"), True) else: strVal= Utils.doubleToString(self.m_FullMeansOrMediansOrModes[i], maxWidth, 4).strip() valMeanMode=self.pad(strVal," ",maxWidth+1-len(strVal),True) temp+=valMeanMode for j in range(self.NumClusters): if self.m_ClusterCentroids.attribute(i).isNominal(): if self.m_ClusterCentroids.instance(j).isMissing(i): valMeanMode=self.pad("missing", " ", maxWidth + 1 - len("missing"), True) else: strVal=self.m_ClusterCentroids.attribute(i).value(int(self.m_ClusterCentroids.instance(j).value(i))) valMeanMode=self.pad(strVal," ",maxWidth+1-len(strVal),True) else: if self.m_ClusterCentroids.instance(j).isMissing(i): valMeanMode=self.pad("missing", " ", maxWidth + 1 - len("missing"), True) else: strVal= Utils.doubleToString(self.m_ClusterCentroids.instance(j).value(i), maxWidth, 4).strip() valMeanMode=self.pad(strVal," ",maxWidth+1-len(strVal),True) temp+=valMeanMode temp+='\n' temp+='\n\n' return temp
def clusterRunThread(self): self.m_CLPanel.addToHistory() inst = Instances(self.m_Instances) inst.setClassIndex(-1) plotInstances = ClustererAssignmentsPlotInstances() plotInstances.setClusterer(self.m_ClustererEditor.getValue()) userTest = None if self.m_SetTestFrame is not None: if self.m_SetTestFrame.getInstances() is not None: userTest = Instances(self.m_SetTestFrame.getInstances()) clusterer = self.m_ClustererEditor.getValue() outBuff = "" name = time.strftime("%H:%M:%S - ") cname = clusterer.__module__ if cname.startswith("clusterers."): name += cname[len("clusterers."):] else: name += cname if self.m_TrainBut.isChecked(): testMode = 0 elif self.m_TestSplitBut.isChecked(): testMode = 1 if userTest is None: raise Exception("No user test set has been opened") if not inst.equalHeaders(userTest): raise Exception("Train and test set are not compatible\n" + inst.equalHeadersMsg(userTest)) else: raise Exception("Unknown test mode") trainInst = Instances(inst) outBuff += "=== Run information ===\n\n" outBuff += "Scheme: " + cname outBuff += "\n" outBuff += "Relation: " + inst.relationName() + '\n' outBuff += "Instances: " + str(inst.numInstances()) + '\n' outBuff += "Attributes: " + str(inst.numAttributes()) + '\n' if inst.numAttributes() < 100: for i in range(inst.numAttributes()): outBuff += " " + inst.attribute(i).name() + '\n' else: outBuff += " [list of attributes omitted]\n" outBuff += "Test mode: " if testMode == 0: outBuff += "evaluate on training data\n" elif testMode == 1: "user supplied test set: " + str( userTest.numInstances()) + " instances\n" outBuff += '\n' self.m_History.addResult(name, outBuff) self.m_History.setSingle(name) trainTimeStart = time.time() if isinstance(clusterer, Clusterer): clusterer.buildClusterer(self.removeClass(trainInst)) trainTimeElapsed = time.time() - trainTimeStart outBuff += "\n=== Clustering model (full training set) ===\n\n" outBuff += str(clusterer) + '\n' outBuff+="\nTime taken to build model (full training data) : " \ + Utils.doubleToString(trainTimeElapsed, 2)\ + " seconds\n\n" self.m_History.updateResult(name, outBuff) evaluation = ClusterEvaluation() evaluation.setClusterer(clusterer) if testMode == 0: evaluation.evaluateClusterer(trainInst, False) plotInstances.setInstances(inst) plotInstances.setClusterEvaluation(evaluation) outBuff += "=== Model and evaluation on training set ===\n\n" elif testMode == 1: userTestT = Instances(userTest) evaluation.evaluateClusterer(userTestT, False) plotInstances.setInstances(userTest) plotInstances.setClusterEvaluation(evaluation) outBuff += "=== Evaluation on test set ===\n" else: raise Exception("Test mode not implemented") outBuff += evaluation.clusterResultsToString() outBuff += '\n' self.m_History.updateResult(name, outBuff) if plotInstances is not None and plotInstances.canPlot(True): visName = name + " (" + inst.relationName() + ")" pl2d = plotInstances.getPlotData(name) plotInstances.cleanUp() vv = [] trainHeader = Instances(self.m_Instances, 0) vv.append(trainHeader) self.history_add_visualize_signal.emit(name, vv, visName, pl2d) self.m_RunThread = None self.m_StartBut.setEnabled(True) self.m_StopBut.setEnabled(False) # Utils.debugOut(outBuff) print("Run Finished")
def paintAxis(self): whole = int(abs(self.m_maxX)) decimal = abs(self.m_maxX) - whole if whole > 0: nondecimal = int(math.log(whole) / math.log(10)) else: nondecimal = 1 if decimal > 0: precisionXmax = int(abs( math.log(abs(self.m_maxX)) / math.log(10))) + 2 else: precisionXmax = 1 if precisionXmax > 10: precisionXmax = 1 maxStringX = Utils.doubleToString(self.m_maxX, nondecimal + 1 + precisionXmax, precisionXmax) whole = int(abs(self.m_minX)) decimal = abs(self.m_minX) - whole if whole > 0: nondecimal = int(math.log(whole) / math.log(10)) else: nondecimal = 1 if decimal > 0: precisionXmin = int(abs( math.log(abs(self.m_minX)) / math.log(10))) + 2 else: precisionXmin = 1 if precisionXmin > 10: precisionXmin = 1 minStringX = Utils.doubleToString(self.m_minX, nondecimal + 1 + precisionXmin, precisionXmin) whole = int(abs(self.m_maxY)) decimal = abs(self.m_maxY) - whole if whole > 0: nondecimal = int(math.log(whole) / math.log(10)) else: nondecimal = 1 if decimal > 0: precisionYmax = int(abs( math.log(abs(self.m_maxY)) / math.log(10))) + 2 else: precisionYmax = 1 if precisionYmax > 10: precisionYmax = 1 maxStringY = Utils.doubleToString(self.m_maxY, nondecimal + 1 + precisionYmax, precisionYmax) whole = int(abs(self.m_minY)) decimal = abs(self.m_minY) - whole if whole > 0: nondecimal = int(math.log(whole) / math.log(10)) else: nondecimal = 1 if decimal > 0: precisionYmin = int(abs( math.log(abs(self.m_minY)) / math.log(10))) + 2 else: precisionYmin = 1 if precisionYmin > 10: precisionYmin = 1 minStringY = Utils.doubleToString(self.m_minY, nondecimal + 1 + precisionYmin, precisionYmin) if self.m_plotInstances.attribute(self.m_xIndex).isNumeric(): mid = (self.m_minX + self.m_maxX) / 2 whole = int(abs(mid)) decimal = abs(mid) - whole if whole > 0: nondecimal = int(math.log(whole) / math.log(10)) else: nondecimal = 1 if decimal > 0: precisionXmid = int(abs(math.log(abs(mid)) / math.log(10))) + 2 else: precisionXmid = 1 if precisionXmid > 10: precisionXmid = 1 maxString = Utils.doubleToString(mid, nondecimal + 1 + precisionXmid, precisionXmid) ticks = [self.m_minX, (self.m_minX + self.m_maxX) / 2, self.m_maxX] self.axes.set_xticks(ticks) labelNumber = [minStringX, maxString, maxStringX] self.axes.set_xlim(self.m_minX, self.m_maxX) self.axes.set_xticklabels(labelNumber) else: numValues = self.m_plotInstances.attribute( self.m_xIndex).numValues() x = np.arange(0, numValues) self.axes.set_xticks(x) label = [] subFlag = False if numValues > 10: subFlag = True for i in range(numValues): if subFlag: label.append( self.m_plotInstances.attribute( self.m_xIndex).value(i)[:3]) else: label.append( self.m_plotInstances.attribute(self.m_xIndex).value(i)) self.axes.set_xticklabels(label) self.axes.set_xlim(self.m_minX, self.m_maxX) if self.m_plotInstances.attribute(self.m_yIndex).isNumeric(): ticks = [self.m_minY, (self.m_minY + self.m_maxY) / 2, self.m_maxY] self.axes.set_yticks(ticks) mid = (self.m_minY + self.m_maxY) / 2 whole = int(abs(mid)) decimal = abs(mid) - whole if whole > 0: nondecimal = int(math.log(whole) / math.log(10)) else: nondecimal = 1 if decimal > 0: precisionYmid = int(abs(math.log(abs(mid)) / math.log(10))) + 2 else: precisionYmid = 1 if precisionYmid > 10: precisionYmid = 1 maxString = Utils.doubleToString(mid, nondecimal + 1 + precisionYmid, precisionYmid) labelNumber = [minStringY, maxString, maxStringY] self.axes.set_yticklabels(labelNumber) self.axes.set_ylim(self.m_minY, self.m_maxY) else: numValues = self.m_plotInstances.attribute( self.m_yIndex).numValues() x = np.arange(0, numValues) self.axes.set_yticks(x) label = [] subFlag = False if numValues > 10: subFlag = True for i in range(numValues): if subFlag: label.append( self.m_plotInstances.attribute( self.m_yIndex).value(i)[:3]) else: label.append( self.m_plotInstances.attribute(self.m_yIndex).value(i)) self.axes.set_yticklabels(label) self.axes.set_ylim(self.m_minY, self.m_maxY)